fork download
  1. <?php
  2.  
  3.  
  4. function splitSentences($text) {
  5. $re = '/# Split sentences on whitespace between them.
  6. (?<= # Begin positive lookbehind.
  7. [.!?] # Either an end of sentence punct,
  8. | [.!?][\'"] # or end of sentence punct and quote.
  9. ) # End positive lookbehind.
  10. (?<! # Begin negative lookbehind.
  11. Mr\. # Skip either "Mr."
  12. | Mrs\. # or "Mrs.",
  13. | Ms\. # or "Ms.",
  14. | Jr\. # or "Jr.",
  15. | Dr\. # or "Dr.",
  16. | Prof\. # or "Prof.",
  17. | Vol\. # or "Vol.",
  18. | A\.D\. # or "A.D.",
  19. | B\.C\. # or "B.C.",
  20. | Sr\. # or "Sr.",
  21. | T\.V\.A\. # or "T.V.A.",
  22. # or... (you get the idea).
  23. ) # End negative lookbehind.
  24. \s+ # Split on whitespace between sentences.
  25. /uix';
  26.  
  27. $sentences = preg_split($re, $text, -1, PREG_SPLIT_NO_EMPTY);
  28. return $sentences;
  29. }
  30.  
  31. $sentences = 'Entertainment media properties. Ã Fairy Tail and Tokyo Ghoul. Entertainment media properties. &Acirc;&nbsp; Fairy Tail and Tokyo Ghoul.';
  32.  
  33. $sentences = splitSentences($sentences);
  34.  
  35. print_r($sentences);
Success #stdin #stdout 0.02s 52472KB
stdin
Standard input is empty
stdout
Array
(
    [0] => Entertainment media properties.
    [1] => Ã Fairy Tail and Tokyo Ghoul.
    [2] => Entertainment media properties.
    [3] => &Acirc;&nbsp; Fairy Tail and Tokyo Ghoul.
)