fork(1) download
  1. <?php
  2.  
  3. /**
  4.  * Simple implementation, feel free to use something more complicated or robust
  5.  * if your situation requires it:
  6.  * http://stackoverflow.com/questions/3542818/remove-accents-without-using-iconv/
  7.  */
  8. function stripAccents($p_sSubject) {
  9. $sSubject = (string) $p_sSubject;
  10.  
  11. $sSubject = str_replace('æ', 'ae', $sSubject);
  12. $sSubject = str_replace('Æ', 'AE', $sSubject);
  13.  
  14. $sSubject = strtr(
  15. utf8_decode($sSubject)
  16. , utf8_decode('àáâãäåçèéêëìíîïñòóôõöøùúûüýÿÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝ')
  17. , 'aaaaaaceeeeiiiinoooooouuuuyyAAAAAACEEEEIIIINOOOOOOUUUUY'
  18. );
  19.  
  20.  
  21. return $sSubject;
  22. }
  23.  
  24. function emphasiseWord($p_sSubject, $p_sSearchTerm){
  25. /*
  26.   * The part in the regular expression that read "\p{L}\p{M}" makes sure we
  27.   * also get all the multibyte characters.
  28.   * You can learn more about unicode and regular expressions at:
  29.   * http://w...content-available-to-author-only...s.info/unicode.html
  30.   */
  31. $aSubjects = preg_split('#([^a-z0-9\p{L}\p{M}]+)#iu', $p_sSubject, null, PREG_SPLIT_DELIM_CAPTURE);
  32.  
  33. foreach($aSubjects as $t_iKey => $t_sSubject){
  34. $sSubject = stripAccents($t_sSubject);
  35.  
  36. if(stripos($sSubject, $p_sSearchTerm) !== false || mb_stripos($t_sSubject, $p_sSearchTerm) !== false){
  37. $aSubjects[$t_iKey] = '<<<' . $t_sSubject . '>>>';
  38. }
  39. }
  40.  
  41. $sSubject = implode('', $aSubjects);
  42.  
  43. return $sSubject;
  44. }
  45.  
  46.  
  47. // Test
  48. $aTest = array(
  49. 'goo' => 'I love Google to make my searches, but I`m starting to worry about privacy.'
  50. , 'peo' => 'people, People, PEOPLE, peOple, people!, people., people?, "people, people" péo'
  51. , 'péo' => 'people, People, PEOPLE, peOple, people!, people., people?, "people, people" péo'
  52. , 'gen' => '"gente", "inteligente", "VAGENS", and "Gente" ...vocês da física que passam o dia protegendo...'
  53. , 'voce' => '...vocês da física que passam o dia protegendo...'
  54. , 'o' => 'Characters like æ,ø,å,Æ,Ø and Å are used in Denmark, Sweden and Norway'
  55. , 'ø' => 'Characters like æ,ø,å,Æ,Ø and Å are used in Denmark, Sweden and Norway'
  56. , 'ae' => 'Characters like æ,ø,å,Æ,Ø and Å are used in Denmark, Sweden and Norway'
  57. , 'Æ' => 'Characters like æ,ø,å,Æ,Ø and Å are used in Denmark, Sweden and Norway'
  58. );
  59.  
  60. $sContent = '';
  61. foreach($aTest as $t_sSearchTerm => $t_sSubject){
  62. $sContent .= $t_sSearchTerm . "\n\t" . emphasiseWord($t_sSubject, $t_sSearchTerm) . "\n";
  63. }
  64.  
  65. echo $sContent;
  66.  
  67. #EOF
Success #stdin #stdout 0.02s 20520KB
stdin
Standard input is empty
stdout
goo
	I love <<<Google>>> to make my searches, but I`m starting to worry about privacy.
peo
	<<<people>>>, <<<People>>>, <<<PEOPLE>>>, <<<peOple>>>, <<<people>>>!, <<<people>>>., <<<people>>>?, "<<<people>>>, <<<people>>>" <<<péo>>>
péo
	people, People, PEOPLE, peOple, people!, people., people?, "people, people" <<<péo>>>
gen
	"<<<gente>>>", "<<<inteligente>>>", "<<<VAGENS>>>", and "<<<Gente>>>" ...vocês da física que passam o dia <<<protegendo>>>...
voce
	...<<<vocês>>> da física que passam o dia protegendo...
o
	Characters like æ,<<<ø>>>,å,Æ,<<<Ø>>> and Å are used in Denmark, Sweden and <<<Norway>>>
ø
	Characters like æ,<<<ø>>>,å,Æ,Ø and Å are used in Denmark, Sweden and Norway
ae
	Characters like <<<æ>>>,ø,å,<<<Æ>>>,Ø and Å are used in Denmark, Sweden and Norway
Æ
	Characters like æ,ø,å,<<<Æ>>>,Ø and Å are used in Denmark, Sweden and Norway