fork download
  1. #!/usr/bin/python
  2. # -*- coding: UTF-8 -*-
  3.  
  4.  
  5.  
  6.  
  7. def CountOccurencesInText(word,text):
  8. """Number of occurences of word (case insensitive) in text"""
  9. #This does not pass the unittests:
  10. t = text.lower().split(r"\W"+word.lower()+r"\W")
  11. return len(t)-1
  12.  
  13.  
  14. def testCountOccurencesInText():
  15. """ Test the CountOccurencesInText function"""
  16. text="""Georges is my name and I like python. Oh ! your name is georges? And you like Python!
  17. Yes is is true, I like PYTHON
  18. and my name is GEORGES"""
  19. # test with a little text.
  20. assert( 3 == CountOccurencesInText("Georges",text) )
  21. assert( 3 == CountOccurencesInText("GEORGES",text) )
  22. assert( 3 == CountOccurencesInText("georges",text) )
  23. assert( 0 == CountOccurencesInText("george",text) )
  24. assert( 3 == CountOccurencesInText("python",text) )
  25. assert( 3 == CountOccurencesInText("PYTHON",text) )
  26. assert( 2 == CountOccurencesInText("I",text) )
  27. assert( 0 == CountOccurencesInText("n",text) )
  28. assert( 1 == CountOccurencesInText("true",text) )
  29. # regard ' as text:
  30. assert ( 0 == CountOccurencesInText ( "maley", "John O'maley is my friend" ) )
  31. # Test it but with a BIG length file. (we once had a memory error with this...)
  32. text = """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy dog.""" * 500
  33. text += """The quick brown fox jump over the lazy dog.The quick brown Georges jump over the lazy dog."""
  34. text += """esrf sqfdg sfdglkj sdflgh sdflgjdsqrgl """ * 4000
  35. text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy python."""
  36. text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy dog.""" * 500
  37. text += """The quick brown fox jump over the lazy dog.The quick brown Georges jump over the lazy dog."""
  38. text += """esrf sqfdg sfdglkj sdflgh sdflgjdsqrgl """ * 4000
  39. text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy python."""
  40. text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy dog.""" * 500
  41. text += """The quick brown fox jump over the lazy dog.The quick brown Georges jump over the lazy dog."""
  42. text += """esrf sqfdg sfdglkj sdflgh sdflgjdsqrgl """ * 4000
  43. text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy python."""
  44. text += """The quick brown fox jump over the true lazy dog.The quick brown fox jump over the lazy dog."""
  45. text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy dog.""" * 500
  46. text += """ I vsfgsdfg sfdg sdfg sdgh sgh I sfdgsdf"""
  47. text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy dog.""" * 500
  48. assert( 3 == CountOccurencesInText("Georges",text) )
  49. assert( 3 == CountOccurencesInText("GEORGES",text) )
  50. assert( 3 == CountOccurencesInText("georges",text) )
  51. assert( 0 == CountOccurencesInText("george",text) )
  52. assert( 3 == CountOccurencesInText("python",text) )
  53. assert( 3 == CountOccurencesInText("PYTHON",text) )
  54. assert( 2 == CountOccurencesInText("I",text) )
  55. assert( 0 == CountOccurencesInText("n",text) )
  56. assert( 1 == CountOccurencesInText("true",text) )
  57. assert( 0 == CountOccurencesInText("reflexion mirror",
  58. "I am a senior citizen and I live in the Fun-Plex 'Reflexion Mirror' in Sopchoppy, Florida") )
  59. assert( 1 == CountOccurencesInText("'reflexion mirror'",
  60. "I am a senior citizen and I live in the Fun-Plex 'Reflexion Mirror' in Sopchoppy, Florida") )
  61. assert( 1 == CountOccurencesInText("reflexion mirror",
  62. "I am a senior citizen and I live in the Fun-Plex (Reflexion Mirror) in Sopchoppy, Florida") )
  63. assert( 1 == CountOccurencesInText("reflexion mirror",
  64. "Reflexion Mirror\" in Sopchoppy, Florida") )
  65. assert( 1 == CountOccurencesInText("reflexion mirror",
  66. u"I am a senior citizen and I live in the Fun-Plex «Reflexion Mirror» in Sopchoppy, Florida") )
  67. assert( 1 == CountOccurencesInText("reflexion mirror",
  68. u"I am a senior citizen and I live in the Fun-Plex \u201cReflexion Mirror\u201d in Sopchoppy, Florida") )
  69. assert( 1 == CountOccurencesInText("legitimate",
  70. u"who is approved by OILS is completely legitimate: their employees are of legal working age") )
  71. assert( 0 == CountOccurencesInText("legitimate their",
  72. u"who is approved by OILS is completely legitimate: their employees are of legal working age") )
  73. assert( 1 == CountOccurencesInText("get back to me",
  74. u"I hope you will consider this proposal, and get back to me as soon as possible") )
  75. assert( 1 == CountOccurencesInText("skin-care",
  76. u"enable Delavigne and its subsidiaries to create a skin-care monopoly") )
  77. assert( 1 == CountOccurencesInText("skin-care monopoly",
  78. u"enable Delavigne and its subsidiaries to create a skin-care monopoly") )
  79. assert( 0 == CountOccurencesInText("skin-care monopoly in the US",
  80. u"enable Delavigne and its subsidiaries to create a skin-care monopoly") )
  81. assert( 1 == CountOccurencesInText("get back to me",
  82. u"When you know:get back to me") )
  83. assert( 1 == CountOccurencesInText("don't be left" , """emergency alarm warning.
  84. Don't be left unprotected. Order your SSSS3000 today!""" ) )
  85. assert( 1 == CountOccurencesInText("don" , """emergency alarm warning.
  86. Don't be left unprotected. Order your don SSSS3000 today!""" ) )
  87. assert( 1 == CountOccurencesInText("take that as a 'yes'",
  88. "Do I have to take that as a 'yes'?") )
  89. assert( 1 == CountOccurencesInText("don't take that as a 'yes'",
  90. "I don't take that as a 'yes'?") )
  91. assert( 1 == CountOccurencesInText("take that as a 'yes'",
  92. "I don't take that as a 'yes'?") )
  93. assert( 1 == CountOccurencesInText("don't",
  94. "I don't take that as a 'yes'?") )
  95. assert( 1 == CountOccurencesInText("attaching my c.v. to this e-mail",
  96. "I am attaching my c.v. to this e-mail." ))
  97. assert ( 1 == CountOccurencesInText ( "Linguist", "'''Linguist Specialist Found Dead on Laboratory Floor'''" ))
  98. assert ( 1 == CountOccurencesInText ( "Linguist Specialist", "'''Linguist Specialist Found Dead on Laboratory Floor'''" ))
  99. assert ( 1 == CountOccurencesInText ( "Laboratory Floor", "'''Linguist Specialist Found Dead on Laboratory Floor'''" ))
  100. assert ( 1 == CountOccurencesInText ( "Floor", "'''Linguist Specialist Found Dead on Laboratory Floor'''" ))
  101. assert ( 1 == CountOccurencesInText ( "Floor", "''Linguist Specialist Found Dead on Laboratory Floor''" ))
  102. assert ( 1 == CountOccurencesInText ( "Floor", "__Linguist Specialist Found Dead on Laboratory Floor__" ))
  103. assert ( 1 == CountOccurencesInText ( "Floor", "'''''Linguist Specialist Found Dead on Laboratory Floor'''''" ))
  104. assert ( 1 == CountOccurencesInText ( "Linguist", "'''Linguist Specialist Found Dead on Laboratory Floor'''" ))
  105. assert ( 1 == CountOccurencesInText ( "Linguist", "''Linguist Specialist Found Dead on Laboratory Floor''" ))
  106. assert ( 1 == CountOccurencesInText ( "Linguist", "__Linguist Specialist Found Dead on Laboratory Floor__" ))
  107. assert ( 1 == CountOccurencesInText ( "Linguist", "'''''Linguist Specialist Found Dead on Laboratory Floor'''''" ))
  108.  
  109.  
  110.  
  111. SampleTextForBench = """
  112. A Suggestion Box Entry from Bob Carter
  113.  
  114. Dear Anonymous,
  115.  
  116. I'm not quite sure I understand the concept of this 'Anonymous' Suggestion Box. If no one reads what we write, then how will anything ever
  117. change?
  118.  
  119. But in the spirit of good will, I've decided to offer my two cents, and hopefully Kevin won't steal it! (ha, ha). I would really like to
  120. see more varieties of coffee in the coffee machine in the break room. 'Milk and sugar', 'black with sugar', 'extra sugar' and 'cream and su
  121. gar' don't offer much diversity. Also, the selection of drinks seems heavily weighted in favor of 'sugar'. What if we don't want any suga
  122. r?
  123.  
  124. But all this is beside the point because I quite like sugar, to be honest. In fact, that's my second suggestion: more sugar in the office.
  125. Cakes, candy, insulin, aspartame... I'm not picky. I'll take it by mouth or inject it intravenously, if I have to.
  126.  
  127. Also, if someone could please fix the lock on the men's room stall, that would be helpful. Yesterday I was doing my business when Icarus ne
  128. arly climbed into my lap.
  129.  
  130. So, have a great day!
  131.  
  132. Anonymously,
  133. Bob Carter
  134. """
  135.  
  136.  
  137.  
  138. def doit():
  139. """Run CountOccurencesInText on a few examples"""
  140. i = 0
  141. for x in xrange(400):
  142. i+= CountOccurencesInText("word" , SampleTextForBench)
  143. i+= CountOccurencesInText("sugar" , SampleTextForBench)
  144. i+= CountOccurencesInText("help" , SampleTextForBench)
  145. i+= CountOccurencesInText("heavily" , SampleTextForBench)
  146. i+= CountOccurencesInText("witfull" , SampleTextForBench)
  147. i+= CountOccurencesInText("dog" , SampleTextForBench)
  148. i+= CountOccurencesInText("almost" , SampleTextForBench)
  149. i+= CountOccurencesInText("insulin" , SampleTextForBench)
  150. i+= CountOccurencesInText("attaching" , SampleTextForBench)
  151. i+= CountOccurencesInText("asma" , SampleTextForBench)
  152. i+= CountOccurencesInText("neither" , SampleTextForBench)
  153. i+= CountOccurencesInText("won't" , SampleTextForBench)
  154. i+= CountOccurencesInText("green" , SampleTextForBench)
  155. i+= CountOccurencesInText("parabole" , SampleTextForBench)
  156. print i
  157.  
  158.  
  159.  
  160.  
  161. #Start the tests
  162. if __name__ == '__main__':
  163. #I need to pass the test:
  164. try:
  165. testCountOccurencesInText()
  166. except:
  167. print "Error !"
  168. raise
  169. print "Tests passed"
  170. #I need to be fast as well:
  171. import profile
  172. profile.run('doit()')
Success #stdin #stdout 0.36s 8228KB
stdin
Standard input is empty
stdout
Tests passed
0
         28004 function calls in 0.146 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     5600    0.012    0.000    0.012    0.000 :0(len)
    11200    0.035    0.000    0.035    0.000 :0(lower)
        1    0.000    0.000    0.000    0.000 :0(setprofile)
     5600    0.016    0.000    0.016    0.000 :0(split)
        1    0.000    0.000    0.146    0.146 <string>:1(<module>)
        1    0.000    0.000    0.146    0.146 profile:0(doit())
        0    0.000             0.000          profile:0(profiler)
        1    0.016    0.016    0.146    0.146 prog.py:138(doit)
     5600    0.067    0.000    0.130    0.000 prog.py:7(CountOccurencesInText)