fork(1) download
  1. <?php
  2. /**
  3.  * Checks if a $word is a concatenation of valid $symbols
  4.  * E.g. `$word = 'paw'`, `$symbols = array('p', 'pa', 'aw')` would return
  5.  * true, because `$word = 'p'.'aw'`
  6.  *
  7.  * (Too slow)
  8.  */
  9. function isValidWord($word,&$symbols){
  10. $nodes = array($word);
  11. while (count($nodes)>0){
  12. $node = array_shift($nodes);
  13. $nodeExpansions = array();
  14. $nodeLength = strlen($node);
  15. if (in_array($node,$symbols)) { return true; }
  16. for ($len=$nodeLength-1;$len>0;$len--){
  17. if (in_array(substr($node, 0, $len), $symbols)){
  18. $nodeExpansions[] = substr($node, $len-$nodeLength);
  19. }
  20. }
  21. $nodes = array_merge($nodeExpansions,$nodes);
  22. }
  23. return false;
  24. }
  25.  
  26. /**
  27.  * Recursive implementation proposed by Waleed Khan @ stackoverflow
  28.  *
  29.  * (Even slower)
  30.  */
  31. function search(&$array, $word, $word_so_far = '') {
  32. if ($word === $word_so_far) {
  33. return true;
  34. }
  35.  
  36. foreach ($array as $symbol) {
  37. $new_word = $word_so_far . $symbol;
  38. if (substr($word, 0, strlen($new_word)) === $new_word) {
  39. if (search($array, $word, $new_word)) {
  40. return true;
  41. }
  42. }
  43. }
  44.  
  45. return false;
  46. }
  47.  
  48. $minWordLength = 2;
  49. $maxWordLength = 6;
  50. $validWordsCount = 5;
  51. $invalidWordsCount = 5;
  52. $validSymbols = array(
  53. 'the',
  54. 'of',
  55. 'and',
  56. 'to',
  57. 'a',
  58. 'in',
  59. 'for',
  60. 'is',
  61. 'on',
  62. 'that',
  63. 'by',
  64. 'this',
  65. 'with',
  66. 'i',
  67. 'you',
  68. 'it',
  69. 'not',
  70. 'or',
  71. 'be',
  72. 'are',
  73. 'from',
  74. 'at',
  75. 'as',
  76. 'your',
  77. 'all',
  78. 'have',
  79. 'new',
  80. 'more',
  81. 'an',
  82. 'was',
  83. 'we',
  84. 'will',
  85. 'home',
  86. 'can',
  87. 'us',
  88. 'about',
  89. 'if',
  90. 'page',
  91. 'my',
  92. 'has',
  93. 'search',
  94. 'free',
  95. 'but',
  96. 'our',
  97. 'one',
  98. 'other',
  99. 'do',
  100. 'no',
  101. 'information',
  102. 'time',
  103. 'they',
  104. 'site',
  105. 'he',
  106. 'up',
  107. 'may',
  108. 'what',
  109. 'which',
  110. 'their',
  111. 'news',
  112. 'out',
  113. 'use',
  114. 'any',
  115. 'there',
  116. 'see',
  117. 'only',
  118. 'so',
  119. 'his',
  120. 'when',
  121. 'contact',
  122. 'here',
  123. 'business',
  124. 'who',
  125. 'web',
  126. 'also',
  127. 'now',
  128. 'help',
  129. 'get',
  130. 'pm',
  131. 'view',
  132. 'online',
  133. 'c',
  134. 'e',
  135. 'first',
  136. 'am',
  137. 'been',
  138. 'would',
  139. 'how',
  140. 'were',
  141. 'me',
  142. 's',
  143. 'services',
  144. 'some',
  145. 'these',
  146. 'click',
  147. 'its',
  148. 'like',
  149. 'service',
  150. 'x',
  151. 'than',
  152. 'find',
  153. 'price',
  154. );
  155. //careful here with potential concatenations of invalid symbols that make symbols from the valid symbols list!
  156. $invalidSymbols = array ('b*','c(d','e.f','efg..','hij-','spam`','maps_?','kk=','++t+a','___zz');
  157.  
  158. echo "minWordLength: {$minWordLength}\n";
  159. echo "maxWordLength: {$maxWordLength}\n";
  160. echo "validWordsCount: {$validWordsCount}\n";
  161. echo "invalidWordsCount: {$invalidWordsCount}\n";
  162.  
  163. $validSymbolsMaxIndex = count($validSymbols)-1;
  164. echo "validSymbolsCount: ".($validSymbolsMaxIndex+1)."\n";
  165. echo "invalidSymbolsCount: ".count($invalidSymbols)."}\n";
  166. $validWords = array();
  167. for ($i=0;$i<$validWordsCount;$i++){
  168. $word = '';
  169. $wordLength = mt_rand($minWordLength,$maxWordLength);
  170. for ($j=0;$j<$wordLength;$j++){
  171. $word.=$validSymbols[mt_rand(0,$validSymbolsMaxIndex)];
  172. }
  173. $validWords[] = $word;
  174. }
  175.  
  176. $invalidSymbolsMaxIndex = count($invalidSymbols)-1;
  177. $invalidWords = array();
  178. for ($i=0;$i<$invalidWordsCount;$i++){
  179. $word = '';
  180. $wordLength = mt_rand($minWordLength,$maxWordLength);
  181. $invalidSymbolCount = 0;
  182. for ($j=$wordLength-1;$j>=0;$j--){
  183. $word.=(($invalid = mt_rand(0,1))===0)?
  184. $validSymbols[mt_rand(0,$validSymbolsMaxIndex)] :
  185. $invalidSymbols[mt_rand(0,$invalidSymbolsMaxIndex)];
  186. $invalidSymbolCount+=$invalid;
  187. }
  188. if ($invalidSymbolCount===0){
  189. $word.=$invalidSymbols[mt_rand(0,$invalidSymbolsMaxIndex)];
  190. } else {
  191. $word.=(($invalid = mt_rand(0,1))===0)?
  192. $validSymbols[mt_rand(0,$validSymbolsMaxIndex)] :
  193. $invalidSymbols[mt_rand(0,$invalidSymbolsMaxIndex)];
  194. }
  195. $invalidWords[] = $word;
  196. }
  197.  
  198. // isValidWord($word,$validSymbols)
  199. $start = microtime(true);
  200. foreach($validWords as $word){
  201. if(!isValidWord($word,$validSymbols)){
  202. echo "Error: Allegedly valid word `{$word}` got flagged as invalid by isValidWord()\n";
  203. break;
  204. }
  205. }
  206.  
  207. foreach($invalidWords as $word){
  208. if(isValidWord($word,$validSymbols)){
  209. echo "Error: Allegedly invalid word `{$word}` got flagged as valid by isValidWord()\n";
  210. break;
  211. }
  212. }
  213. $elapsedTime = microtime(true) - $start;
  214. echo "Exec. time using isValidWord(\$word,\$validSymbols): {$elapsedTime} seconds\n";
  215.  
  216. // search($validSymbols,$word)
  217. $start = microtime(true);
  218. foreach($validWords as $word){
  219. if(!search($validSymbols,$word)){
  220. echo "Error: Allegedly valid word `{$word}` got flagged as invalid by search()\n";
  221. break;
  222. }
  223. }
  224.  
  225. foreach($invalidWords as $word){
  226. if(search($validSymbols,$word)){
  227. echo "Error: Allegedly invalid word `{$word}` got flagged as valid by search()\n";
  228. break;
  229. }
  230. }
  231. $elapsedTime = microtime(true) - $start;
  232. echo "Exec. time using search(\$validSymbols,\$word): {$elapsedTime} seconds\n";
  233. /*
  234.  * Regex implementation proposed by Waleed Khan @ stackoverflow
  235.  *
  236.  * (Fast enough, unfortunately it won't compile for big $validSymbol dictionaries (~10000 symbols): Regex too long)
  237.  */
  238. // preg_match('/^(preg_quote($validSymbols[0])|...|preg_quote($validSymbols[count($validSymbols)-1]))+$/',$word)
  239. $start = microtime(true);
  240. $regex = '/^(';
  241. foreach ($validSymbols as $symbol){
  242. $regex .= preg_quote($symbol,'/').'|';
  243. }
  244. $regex = rtrim($regex,'|');
  245. $regex.=')+$/';
  246. foreach($validWords as $word){
  247. if(preg_match($regex,$word)!==1){
  248. echo "Error: Allegedly valid word `{$word}` got flagged as invalid by preg_match()\n";
  249. break;
  250. }
  251. }
  252.  
  253. foreach($invalidWords as $word){
  254. if(preg_match($regex,$word)!==0){
  255. echo "Error: Allegedly invalid word `{$word}` did not get flagged as invalid by preg_match()\n";
  256. break;
  257. }
  258. }
  259. $elapsedTime = microtime(true) - $start;
  260. echo "Exec. time using preg_match('/^(preg_quote(\$validSymbols[0])|...|preg_quote(\$validSymbols[count(\$validSymbols)-1]))+\$/',\$word): {$elapsedTime} seconds\n";
  261.  
Success #stdin #stdout 0.02s 20568KB
stdin
Standard input is empty
stdout
minWordLength: 2
maxWordLength: 6
validWordsCount: 5
invalidWordsCount: 5
validSymbolsCount: 101
invalidSymbolsCount: 10}
Exec. time using isValidWord($word,$validSymbols): 0.0033400058746338 seconds
Exec. time using search($validSymbols,$word): 0.0017709732055664 seconds
Exec. time using preg_match('/^(preg_quote($validSymbols[0])|...|preg_quote($validSymbols[count($validSymbols)-1]))+$/',$word): 0.00032901763916016 seconds