fork(4) download
  1. <?php
  2.  
  3. function sentence_split($text) {
  4. $before_regexes = array('/(?:[”’"\'»])\s*\Z/u',
  5. '/(?:(?:[\'\"„][\.!?…][\'\"”]\s)|(?:[^\.]\s[A-Z]\.\s)|(?:\b(?:St|Gen|Hon|Prof|Dr|Mr|Ms|Mrs|[JS]r|Col|Maj|Brig|Sgt|Capt|Cmnd|Sen|Rev|Rep|Revd)\.\s)|(?:\b(?:St|Gen|Hon|Prof|Dr|Mr|Ms|Mrs|[JS]r|Col|Maj|Brig|Sgt|Capt|Cmnd|Sen|Rev|Rep|Revd)\.\s[A-Z]\.\s)|(?:\bApr\.\s)|(?:\bAug\.\s)|(?:\bBros\.\s)|(?:\bCo\.\s)|(?:\bCorp\.\s)|(?:\bDec\.\s)|(?:\bDist\.\s)|(?:\bFeb\.\s)|(?:\bInc\.\s)|(?:\bJan\.\s)|(?:\bJul\.\s)|(?:\bJun\.\s)|(?:\bMar\.\s)|(?:\bNov\.\s)|(?:\bOct\.\s)|(?:\bPh\.?D\.\s)|(?:\bSept?\.\s)|(?:\b\p{Lu}\.\p{Lu}\.\s)|(?:\b\p{Lu}\.\s\p{Lu}\.\s)|(?:\bcf\.\s)|(?:\be\.g\.\s)|(?:\besp\.\s)|(?:\bet\b\s\bal\.\s)|(?:\bvs\.\s)|(?:\p{Ps}[!?]+\p{Pe} ))\Z/su',
  6. '/(?:(?:[\.\s]\p{L}{1,2}\.\s))\Z/su',
  7. '/(?:(?:[\[\(]*\.\.\.[\]\)]* ))\Z/su',
  8. '/(?:(?:\b(?:pp|[Vv]iz|i\.?\s*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Ii]ncl|Pres|[Dd]ept|min|max|[Gg]ovt|lb|ft|c\.?\s*f|vs|qtd)\.\s))\Z/su',
  9. '/(?:(?:\b[Ee]tc\.\s))\Z/su',
  10. '/(?:(?:[\.!?…]+\p{Pe} )|(?:[\[\(]*…[\]\)]* ))\Z/su',
  11. '/(?:(?:\b\p{L}\.))\Z/su',
  12. '/(?:(?:\b\p{L}\.\s))\Z/su',
  13. '/(?:(?:\b[Ff]igs?\.\s)|(?:\b[nN]o\.\s))\Z/su',
  14. '/(?:(?:[\"”\']\s*))\Z/su',
  15. '/(?:(?:[\.!?…][\x{00BB}\x{2019}\x{201D}\x{203A}\"\'\p{Pe}\x{0002}]*\s)|(?:\r?\n))\Z/su',
  16. '/(?:(?:[\.!?…][\'\"\x{00BB}\x{2019}\x{201D}\x{203A}\p{Pe}\x{0002}]*))\Z/su',
  17. '/(?:(?:\s\p{L}[\.!?…]\s))\Z/su');
  18. $after_regexes = array('/\A(?:\(\p{L})/u',
  19. '/\A(?:)/su',
  20. '/\A(?:[\p{N}\p{Ll}])/su',
  21. '/\A(?:[^\p{Lu}])/su',
  22. '/\A(?:[^\p{Lu}]|I)/su',
  23. '/\A(?:[^p{Lu}])/su',
  24. '/\A(?:\p{Ll})/su',
  25. '/\A(?:\p{L}\.)/su',
  26. '/\A(?:\p{L}\.\s)/su',
  27. '/\A(?:\p{N})/su',
  28. '/\A(?:\s*\p{Ll})/su',
  29. '/\A(?:)/su',
  30. '/\A(?:\p{Lu}[^\p{Lu}])/su',
  31. '/\A(?:\p{Lu}\p{Ll})/su');
  32. $is_sentence_boundary = array(false, false, false, false, false, false, false, false, false, false, false, true, true, true);
  33. $count = 13;
  34.  
  35. $sentences = array();
  36. $sentence = '';
  37. $before = '';
  38. $after = substr($text, 0, 10);
  39. $text = substr($text, 10);
  40.  
  41. while($text != '') {
  42. for($i = 0; $i < $count; $i++) {
  43. if(preg_match($before_regexes[$i], $before) && preg_match($after_regexes[$i], $after)) {
  44. if($is_sentence_boundary[$i]) {
  45. array_push($sentences, $sentence);
  46. $sentence = '';
  47. }
  48. break;
  49. }
  50. }
  51.  
  52. $first_from_text = $text[0];
  53. $text = substr($text, 1);
  54. $first_from_after = $after[0];
  55. $after = substr($after, 1);
  56. $before .= $first_from_after;
  57. $sentence .= $first_from_after;
  58. $after .= $first_from_text;
  59. }
  60.  
  61. if($sentence != '' && $after != '') {
  62. array_push($sentences, $sentence.$after);
  63. }
  64.  
  65. return $sentences;
  66. }
  67.  
  68. $text = "In his book The Symposium, Plato wrote “Those who are halves of a man whole pursue males, and being slices, so to speak, of the male, love men throughout their boyhood, and take pleasure in physical contact with men” (qtd. in Isay 11).";
  69. print_r(sentence_split($text));
  70. $text = "Dr. Evelyn Hooker, a heterosexual psychologist";
  71. print_r(sentence_split($text));
  72. $text = "Dobbens reasoned that most parents would not raise their children to be homosexual; “They’re not like ‘My child’s going to be gay!”’ (Dobbens). Dobbens believes that he was born homosexual.";
  73. print_r(sentence_split($text));
  74. $text = "Dobbens reasoned that most parents would not raise their children to be homosexual; “They’re not like ‘My child’s going to be gay!”’(Dobbens). Dobbens believes that he was born homosexual.";
  75. print_r(sentence_split($text));
  76. $text = "In an interview in 1903, he professed his beliefs: “I am… of the firm conviction that homosexuals must not be treated as sick people… Homosexual persons are not sick. They also do not belong in a court of law!” (qtd. in Isay 3).";
  77. print_r(sentence_split($text));
Success #stdin #stdout 0.03s 82944KB
stdin
Standard input is empty
stdout
Array
(
    [0] => In his book The Symposium, Plato wrote “Those who are halves of a man whole pursue males, and being slices, so to speak, of the male, love men throughout their boyhood, and take pleasure in physical contact with men” (qtd. in Isay 11).
)
Array
(
    [0] => Dr. Evelyn Hooker, a heterosexual psychologist
)
Array
(
    [0] => Dobbens reasoned that most parents would not raise their children to be homosexual; “They’re not like ‘My child’s going to be gay!”’ (Dobbens). 
    [1] => Dobbens believes that he was born homosexual.
)
Array
(
    [0] => Dobbens reasoned that most parents would not raise their children to be homosexual; “They’re not like ‘My child’s going to be gay!”’(Dobbens). 
    [1] => Dobbens believes that he was born homosexual.
)
Array
(
    [0] => In an interview in 1903, he professed his beliefs: “I am… of the firm conviction that homosexuals must not be treated as sick people… 
    [1] => Homosexual persons are not sick. 
    [2] => They also do not belong in a court of law!” (qtd. in Isay 3).
)