<?php
//Топ N самых часто встречающихся слов и словосочетаний в тексте(из 2-3-х слов)
$stopWords = array('и', 'у', 'к', 'с', 'о', 'от', 'в', 'же', 'из', 'на'); $input = 'Эта функция сортирует массив в обратном порядке таким образом, что
сохраняются отношения между ключами и значениями. Сохраняются отношения и
сохраняются отношения и еще сохраняются отношения и опять сохраняются отношения.';
//Функция удаляющая слова и словосочетания являющиеся частью больших словосочений
function removeRepetition($moreWords, $words)
{
foreach ($moreWords as $moreWord => $moreCount) {
foreach ($words as $word => $count) {
//если во втором массиве словосочетания, иначе слова
if ((mb_strpos($moreWord, $word) !== false) && ($count == $moreCount)) { }
} else {
$severalWords = explode(' ', $moreWord); if ((count($matches) != 0) && ($moreCount == $count)) { }
}
}
}
return $words;
}
//Удаляем лишнее - переносы, запятые, стоп слова.
foreach ($stopWords as $word) {
}
$sentences = preg_split('/\./', $input, 0, PREG_SPLIT_NO_EMPTY
);
//Формируем словосочетания
foreach ($sentences as $sentence) {
$words = preg_split('/ /', $sentence, 0, PREG_SPLIT_NO_EMPTY
); foreach ($words as $key => $word) {
$oneWords[] = $word;
//Прерываем цикл на последнем слове.
if ($key == (count($words) - 1)) { break;
}
$twoWords[] = $word . ' ' . $words[$key + 1];
if ($key !== 0) {
$threeWords[] = $words[$key - 1] . ' ' . $word . ' ' . $words[$key + 1];
}
}
}
//Считаем, удаляем все что было найдено 1 раз и сортируем
arsort($countWords, SORT_NUMERIC
); arsort($countTwoWords, SORT_NUMERIC
); arsort($countThreeWords, SORT_NUMERIC
);
//Удаляем слова и словосочеания являющиеся часть других
$countWords = removeRepetition($countThreeWords, $countWords);
$countWords = removeRepetition($countTwoWords, $countWords);
$countTwoWords = removeRepetition($countThreeWords, $countTwoWords);
$top = array_merge($countWords, $countTwoWords, $countThreeWords);
//Выводим результат нашей магии
if ($top == '') {
echo 'Увы, но в данном тексте нет частых слов или словосочетаний встречающихся больше одного раза :(';
} elseif (count($top) == 1) { foreach ($top as $words => $count) {
echo 'Самое частое слово/словосочетание: "' . $words . '", оно встречается - ' . $count .
' раз.';
}
} else {
echo "Самые частые слова/словосочетания:\n";
foreach ($top as $words => $count) {
echo $words . " - встречается " . $count . " раз.\n";
}
}
PD9waHAKLy/QotC+0L8gTiDRgdCw0LzRi9GFINGH0LDRgdGC0L4g0LLRgdGC0YDQtdGH0LDRjtGJ0LjRhdGB0Y8g0YHQu9C+0LIg0Lgg0YHQu9C+0LLQvtGB0L7Rh9C10YLQsNC90LjQuSDQsiDRgtC10LrRgdGC0LUo0LjQtyAyLTMt0YUg0YHQu9C+0LIpCm1iX2ludGVybmFsX2VuY29kaW5nKCdVdGYtOCcpOwoKJHN0b3BXb3JkcyA9IGFycmF5KCfQuCcsICfRgycsICfQuicsICfRgScsICfQvicsICfQvtGCJywgJ9CyJywgJ9C20LUnLCAn0LjQtycsICfQvdCwJyk7CiRpbnB1dCA9ICfQrdGC0LAg0YTRg9C90LrRhtC40Y8g0YHQvtGA0YLQuNGA0YPQtdGCINC80LDRgdGB0LjQsiDQsiDQvtCx0YDQsNGC0L3QvtC8INC/0L7RgNGP0LTQutC1INGC0LDQutC40Lwg0L7QsdGA0LDQt9C+0LwsINGH0YLQvgrRgdC+0YXRgNCw0L3Rj9GO0YLRgdGPINC+0YLQvdC+0YjQtdC90LjRjyDQvNC10LbQtNGDINC60LvRjtGH0LDQvNC4INC4INC30L3QsNGH0LXQvdC40Y/QvNC4LiDQodC+0YXRgNCw0L3Rj9GO0YLRgdGPINC+0YLQvdC+0YjQtdC90LjRjyDQuArRgdC+0YXRgNCw0L3Rj9GO0YLRgdGPINC+0YLQvdC+0YjQtdC90LjRjyDQuCDQtdGJ0LUg0YHQvtGF0YDQsNC90Y/RjtGC0YHRjyDQvtGC0L3QvtGI0LXQvdC40Y8g0Lgg0L7Qv9GP0YLRjCDRgdC+0YXRgNCw0L3Rj9GO0YLRgdGPINC+0YLQvdC+0YjQtdC90LjRjy4nOwoKLy/QpNGD0L3QutGG0LjRjyDRg9C00LDQu9GP0Y7RidCw0Y8g0YHQu9C+0LLQsCDQuCDRgdC70L7QstC+0YHQvtGH0LXRgtCw0L3QuNGPINGP0LLQu9GP0Y7RidC40LXRgdGPINGH0LDRgdGC0YzRjiDQsdC+0LvRjNGI0LjRhSDRgdC70L7QstC+0YHQvtGH0LXQvdC40LkKZnVuY3Rpb24gcmVtb3ZlUmVwZXRpdGlvbigkbW9yZVdvcmRzLCAkd29yZHMpCnsKICAgIGZvcmVhY2ggKCRtb3JlV29yZHMgYXMgJG1vcmVXb3JkID0+ICRtb3JlQ291bnQpIHsKICAgICAgICBmb3JlYWNoICgkd29yZHMgYXMgJHdvcmQgPT4gJGNvdW50KSB7CiAgICAgICAgICAgIC8v0LXRgdC70Lgg0LLQviDQstGC0L7RgNC+0Lwg0LzQsNGB0YHQuNCy0LUg0YHQu9C+0LLQvtGB0L7Rh9C10YLQsNC90LjRjywg0LjQvdCw0YfQtSDRgdC70L7QstCwCiAgICAgICAgICAgIGlmIChwcmVnX21hdGNoKCcvXHcrIFx3Ky91JywgJHdvcmQpKSB7CiAgICAgICAgICAgICAgICBpZiAoKG1iX3N0cnBvcygkbW9yZVdvcmQsICR3b3JkKSAhPT0gZmFsc2UpICYmICgkY291bnQgPT0gJG1vcmVDb3VudCkpIHsKICAgICAgICAgICAgICAgICAgICB1bnNldCgkd29yZHNbJHdvcmRdKTsKICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgfSBlbHNlIHsKICAgICAgICAgICAgICAgICRzZXZlcmFsV29yZHMgPSBleHBsb2RlKCcgJywgJG1vcmVXb3JkKTsKICAgICAgICAgICAgICAgICRvbmVXb3JkID0gYXJyYXkoJHdvcmQpOwogICAgICAgICAgICAgICAgJG1hdGNoZXMgPSBhcnJheV9pbnRlcnNlY3QoJG9uZVdvcmQsICRzZXZlcmFsV29yZHMpOwogICAgICAgICAgICAgICAgaWYgKChjb3VudCgkbWF0Y2hlcykgIT0gMCkgJiYgKCRtb3JlQ291bnQgPT0gJGNvdW50KSkgewogICAgICAgICAgICAgICAgICAgIHVuc2V0KCR3b3Jkc1skd29yZF0pOwogICAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgfQogICAgfQogICAgcmV0dXJuICR3b3JkczsKfQoKLy/Qo9C00LDQu9GP0LXQvCDQu9C40YjQvdC10LUgLSDQv9C10YDQtdC90L7RgdGLLCDQt9Cw0L/Rj9GC0YvQtSwg0YHRgtC+0L8g0YHQu9C+0LLQsC4KJGlucHV0ID0gbWJfc3RydG9sb3dlcigkaW5wdXQpOwokaW5wdXQgPSBwcmVnX3JlcGxhY2UoJy9bXFxyXFxuXS8nLCAnICcsICRpbnB1dCk7CiRpbnB1dCA9IHByZWdfcmVwbGFjZSgnLywvJywgJycsICRpbnB1dCk7CmZvcmVhY2ggKCRzdG9wV29yZHMgYXMgJHdvcmQpIHsKICAgICRpbnB1dCA9IHByZWdfcmVwbGFjZSgiL1xcYiR3b3JkXFxiL3UiLCAnJywgJGlucHV0KTsKfQoKJHNlbnRlbmNlcyA9IHByZWdfc3BsaXQoJy9cLi8nLCAkaW5wdXQsIDAsIFBSRUdfU1BMSVRfTk9fRU1QVFkpOwoKLy/QpNC+0YDQvNC40YDRg9C10Lwg0YHQu9C+0LLQvtGB0L7Rh9C10YLQsNC90LjRjwpmb3JlYWNoICgkc2VudGVuY2VzIGFzICRzZW50ZW5jZSkgewogICAgJHdvcmRzID0gcHJlZ19zcGxpdCgnLyAvJywgJHNlbnRlbmNlLCAwLCBQUkVHX1NQTElUX05PX0VNUFRZKTsKICAgIGZvcmVhY2ggKCR3b3JkcyBhcyAka2V5ID0+ICR3b3JkKSB7CiAgICAgICAgJG9uZVdvcmRzW10gPSAkd29yZDsKICAgICAgICAvL9Cf0YDQtdGA0YvQstCw0LXQvCDRhtC40LrQuyDQvdCwINC/0L7RgdC70LXQtNC90LXQvCDRgdC70L7QstC1LgogICAgICAgIGlmICgka2V5ID09IChjb3VudCgkd29yZHMpIC0gMSkpIHsKICAgICAgICAgICAgYnJlYWs7CiAgICAgICAgfQogICAgICAgICR0d29Xb3Jkc1tdID0gJHdvcmQgLiAnICcgLiAkd29yZHNbJGtleSArIDFdOwogICAgICAgIGlmICgka2V5ICE9PSAwKSB7CiAgICAgICAgICAgICR0aHJlZVdvcmRzW10gPSAkd29yZHNbJGtleSAtIDFdIC4gJyAnIC4gJHdvcmQgLiAnICcgLiAkd29yZHNbJGtleSArIDFdOwogICAgICAgIH0KICAgIH0KfQoKLy/QodGH0LjRgtCw0LXQvCwg0YPQtNCw0LvRj9C10Lwg0LLRgdC1INGH0YLQviDQsdGL0LvQviDQvdCw0LnQtNC10L3QviAxINGA0LDQtyDQuCDRgdC+0YDRgtC40YDRg9C10LwKJGNvdW50V29yZHMgPSBhcnJheV9kaWZmKGFycmF5X2NvdW50X3ZhbHVlcygkb25lV29yZHMpLCBhcnJheSgxKSk7CiRjb3VudFR3b1dvcmRzID0gYXJyYXlfZGlmZihhcnJheV9jb3VudF92YWx1ZXMoJHR3b1dvcmRzKSwgYXJyYXkoMSkpOwokY291bnRUaHJlZVdvcmRzID0gYXJyYXlfZGlmZihhcnJheV9jb3VudF92YWx1ZXMoJHRocmVlV29yZHMpLCBhcnJheSgxKSk7CmFyc29ydCgkY291bnRXb3JkcywgU09SVF9OVU1FUklDKTsKYXJzb3J0KCRjb3VudFR3b1dvcmRzLCBTT1JUX05VTUVSSUMpOwphcnNvcnQoJGNvdW50VGhyZWVXb3JkcywgU09SVF9OVU1FUklDKTsKCi8v0KPQtNCw0LvRj9C10Lwg0YHQu9C+0LLQsCDQuCDRgdC70L7QstC+0YHQvtGH0LXQsNC90LjRjyDRj9Cy0LvRj9GO0YnQuNC10YHRjyDRh9Cw0YHRgtGMINC00YDRg9Cz0LjRhQokY291bnRXb3JkcyA9IHJlbW92ZVJlcGV0aXRpb24oJGNvdW50VGhyZWVXb3JkcywgJGNvdW50V29yZHMpOwokY291bnRXb3JkcyA9IHJlbW92ZVJlcGV0aXRpb24oJGNvdW50VHdvV29yZHMsICRjb3VudFdvcmRzKTsKJGNvdW50VHdvV29yZHMgPSByZW1vdmVSZXBldGl0aW9uKCRjb3VudFRocmVlV29yZHMsICRjb3VudFR3b1dvcmRzKTsKCiR0b3AgPSBhcnJheV9tZXJnZSgkY291bnRXb3JkcywgJGNvdW50VHdvV29yZHMsICRjb3VudFRocmVlV29yZHMpOwoKLy/QktGL0LLQvtC00LjQvCDRgNC10LfRg9C70YzRgtCw0YIg0L3QsNGI0LXQuSDQvNCw0LPQuNC4CmlmICgkdG9wID09ICcnKSB7CiAgICBlY2hvICfQo9Cy0YssINC90L4g0LIg0LTQsNC90L3QvtC8INGC0LXQutGB0YLQtSDQvdC10YIg0YfQsNGB0YLRi9GFINGB0LvQvtCyINC40LvQuCDRgdC70L7QstC+0YHQvtGH0LXRgtCw0L3QuNC5INCy0YHRgtGA0LXRh9Cw0Y7RidC40YXRgdGPINCx0L7Qu9GM0YjQtSDQvtC00L3QvtCz0L4g0YDQsNC30LAgOignOwp9IGVsc2VpZiAoY291bnQoJHRvcCkgPT0gMSkgewogICAgZm9yZWFjaCAoJHRvcCBhcyAkd29yZHMgPT4gJGNvdW50KSB7CiAgICAgICAgZWNobyAn0KHQsNC80L7QtSDRh9Cw0YHRgtC+0LUg0YHQu9C+0LLQvi/RgdC70L7QstC+0YHQvtGH0LXRgtCw0L3QuNC1OiAiJyAuICR3b3JkcyAuICciLCDQvtC90L4g0LLRgdGC0YDQtdGH0LDQtdGC0YHRjyAtICcgLiAkY291bnQgLgogICAgICAgICAgICAnINGA0LDQty4nOwogICAgfQp9IGVsc2UgewogICAgYXJzb3J0KCR0b3ApOwogICAgZWNobyAi0KHQsNC80YvQtSDRh9Cw0YHRgtGL0LUg0YHQu9C+0LLQsC/RgdC70L7QstC+0YHQvtGH0LXRgtCw0L3QuNGPOlxuIjsKICAgIGZvcmVhY2ggKCR0b3AgYXMgJHdvcmRzID0+ICRjb3VudCkgewogICAgICAgIGVjaG8gJHdvcmRzIC4gIiAtINCy0YHRgtGA0LXRh9Cw0LXRgtGB0Y8gIiAuICRjb3VudCAuICIg0YDQsNC3LlxuIjsKICAgIH0KfQ==