fork download
  1. <?php
  2.  
  3. $strs = ['c0', 'c0-3', 'c0+3', 'c0 & 9', 'c0001, 2, 03', 'c01-03', 'c1.0 - 2.0', 'chapter 2A Hello', 'chapter 2AHello', 'chapter 10.4c', 'chapter 2B', 'episode 23.000 & 00024', 'episode 23 & 24', 'e23 & 24', 'text c25.6 text', '001 & 2 & 5 & 8-20 & 100 text chapter 25.6 text 98', 'hello 23 & 24', 'ep 1 - 2', 'chapter 1 - chapter 2', 'text chapter 25.6 text', 'text chapters 23, 24, 25 text','text chapter 23, 25 text', 'text chapter 23 & 24 & 25 text','text c25.5-30 text', 'text c99-c102 text', 'text chapter 1 - 3 text', '33 text chapter 1, 2 text 3','text chapters 23, 24, 25, 29, 31, 32 text', 'c19 & c20', 'chapter 25.6 & chapter 29', 'chapter 25+c26', 'chapter 25 + 26 + 27'];
  4. $terms = ['episode', 'chapter', 'ch', 'ep', 'c', 'e', ''];
  5.  
  6. usort($terms, function($a, $b) {
  7. return strlen($b) - strlen($a);
  8. });
  9.  
  10. $chapter_main_rx = "\b(?|" . implode("|", array_map(function ($term) {
  11. return strlen($term) > 0 ? "(" . substr($term, 0, 1) . ")(" . substr($term, 1) . "s?)": "()()" ;},
  12. $terms)) . ")\s*";
  13. $chapter_aux_rx = "\b(?:" . implode("|", array_map(function ($term) {
  14. return strlen($term) > 0 ? substr($term, 0, 1) . "(?:" . substr($term, 1) . "s?)": "" ;},
  15. $terms)) . ")\s*";
  16.  
  17. $reg = "~$chapter_main_rx((\d+(?:\.\d+)?(?:[A-Z]\b)?)(?:\s*(?:[,&+-]|and)\s*(?:$chapter_aux_rx)?(?4))*)~ui";
  18.  
  19. foreach ($strs as $s) {
  20. if (preg_match($reg, $s, $m)) {
  21. "~(\d*(?:\.\d+)?)([A-Z]?)\s*-\s*(?:$chapter_aux_rx)?|(\d+(?:\.\d+)?(?:[A-Z]\b)?)(?:\s*(?:[,&+]|and)\s*(?:$chapter_aux_rx)?(?1))*~ui", function($x) use ($chapter_aux_rx) {
  22. return (isset($x[3]) && strlen($x[3])) ? buildNumChain(preg_split("~\s*(?:[,&+]|and)\s*(?:$chapter_aux_rx)?~ui", $x[0]))
  23. : ((isset($x[1]) && strlen($x[1])) ? ($x[1] + 0) : "") . ((isset($x[2]) && strlen($x[2])) ? ord(strtolower($x[2])) - 96 : "") . "-";
  24. }, $m[3]);
  25. print_r(["original" => $s, "found_match" => trim($m[0]), "converted" => $m[1] . $p3]);
  26. echo "\n";
  27. } else {
  28. echo "No match for '$s'!\n";
  29.  
  30. }
  31. }
  32.  
  33. function buildNumChain($arr) {
  34. $ret = "";
  35. $rngnum = "";
  36. for ($i=0; $i < count($arr); $i++) {
  37. $val = $arr[$i];
  38. $part = "";
  39. if (preg_match('~^(\d+(?:\.\d+)?)([A-Z]?)$~i', $val, $ms)) {
  40. $val = $ms[1];
  41. if (!empty($ms[2])) {
  42. $part = ' part ' . (ord(strtolower($ms[2])) - 96);
  43. }
  44. }
  45. $val = $val + 0;
  46. if (($i < count($arr) - 1) && $val == ($arr[$i+1] + 0) - 1) {
  47. if (empty($rngnum)) {
  48. $ret .= ($i == 0 ? "" : " & ") . $val;
  49. }
  50. $rngnum = $val;
  51. } else if (!empty($rngnum) || $i == count($arr)) {
  52. $ret .= '-' . $val;
  53. $rngnum = "";
  54. } else {
  55. $ret .= ($i == 0 ? "" : " & ") . $val . $part;
  56. }
  57. }
  58. return $ret;
  59. }
  60.  
Success #stdin #stdout 0.02s 82624KB
stdin
Standard input is empty
stdout
Array
(
    [original] => c0
    [found_match] => c0
    [converted] => c0
)

Array
(
    [original] => c0-3
    [found_match] => c0-3
    [converted] => c0-3
)

Array
(
    [original] => c0+3
    [found_match] => c0+3
    [converted] => c0 & 3
)

Array
(
    [original] => c0 & 9
    [found_match] => c0 & 9
    [converted] => c0 & 9
)

Array
(
    [original] => c0001, 2, 03
    [found_match] => c0001, 2, 03
    [converted] => c1-3
)

Array
(
    [original] => c01-03
    [found_match] => c01-03
    [converted] => c1-3
)

Array
(
    [original] => c1.0 - 2.0
    [found_match] => c1.0 - 2.0
    [converted] => c1-2
)

Array
(
    [original] => chapter 2A Hello
    [found_match] => chapter 2A
    [converted] => c2 part 1
)

Array
(
    [original] => chapter 2AHello
    [found_match] => chapter 2
    [converted] => c2
)

Array
(
    [original] => chapter 10.4c
    [found_match] => chapter 10.4c
    [converted] => c10.4 part 3
)

Array
(
    [original] => chapter 2B
    [found_match] => chapter 2B
    [converted] => c2 part 2
)

Array
(
    [original] => episode 23.000 & 00024
    [found_match] => episode 23.000 & 00024
    [converted] => e23-24
)

Array
(
    [original] => episode 23 & 24
    [found_match] => episode 23 & 24
    [converted] => e23-24
)

Array
(
    [original] => e23 & 24
    [found_match] => e23 & 24
    [converted] => e23-24
)

Array
(
    [original] => text c25.6 text
    [found_match] => c25.6
    [converted] => c25.6
)

Array
(
    [original] => 001 & 2 & 5 & 8-20 & 100 text chapter 25.6 text 98
    [found_match] => 001 & 2 & 5 & 8-20 & 100
    [converted] => 1-2 & 5 & 8-20 & 100
)

Array
(
    [original] => hello 23 & 24
    [found_match] => 23 & 24
    [converted] => 23-24
)

Array
(
    [original] => ep 1 - 2
    [found_match] => ep 1 - 2
    [converted] => e1-2
)

Array
(
    [original] => chapter 1 - chapter 2
    [found_match] => chapter 1 - chapter 2
    [converted] => c1-2
)

Array
(
    [original] => text chapter 25.6 text
    [found_match] => chapter 25.6
    [converted] => c25.6
)

Array
(
    [original] => text chapters 23, 24, 25 text
    [found_match] => chapters 23, 24, 25
    [converted] => c23-25
)

Array
(
    [original] => text chapter 23, 25 text
    [found_match] => chapter 23, 25
    [converted] => c23 & 25
)

Array
(
    [original] => text chapter 23 & 24 & 25 text
    [found_match] => chapter 23 & 24 & 25
    [converted] => c23-25
)

Array
(
    [original] => text c25.5-30 text
    [found_match] => c25.5-30
    [converted] => c25.5-30
)

Array
(
    [original] => text c99-c102 text
    [found_match] => c99-c102
    [converted] => c99-102
)

Array
(
    [original] => text chapter 1 - 3 text
    [found_match] => chapter 1 - 3
    [converted] => c1-3
)

Array
(
    [original] => 33 text chapter 1, 2 text 3
    [found_match] => 33
    [converted] => 33
)

Array
(
    [original] => text chapters 23, 24, 25, 29, 31, 32 text
    [found_match] => chapters 23, 24, 25, 29, 31, 32
    [converted] => c23-25 & 29 & 31-32
)

Array
(
    [original] => c19 & c20
    [found_match] => c19 & c20
    [converted] => c19-20
)

Array
(
    [original] => chapter 25.6 & chapter 29
    [found_match] => chapter 25.6 & chapter 29
    [converted] => c25.6 & 29
)

Array
(
    [original] => chapter 25+c26
    [found_match] => chapter 25+c26
    [converted] => c25-26
)

Array
(
    [original] => chapter 25 + 26 + 27
    [found_match] => chapter 25 + 26 + 27
    [converted] => c25-27
)