fork download
  1. import re
  2.  
  3. number_words = [ "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"]
  4. number_words_tens =[ "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety" ]
  5. number_words_rx = re.compile(r'\b(?:(?:{0})?(?:{1})|(?:{0}))\b'.format("|".join(number_words_tens),"|".join(number_words)))
  6. main_rx = re.compile(r'\s*\d+(?:\s+(?:and\s+)?\d+){2,}')
  7. numbers_1_99 = number_words
  8. numbers_1_99.extend(tens if ones == "zero" else (tens + "-" + ones) # stackoverflow.com/a/8982279/3832970
  9. for tens in "twenty thirty forty fifty sixty seventy eighty ninety".split()
  10. for ones in numbers_1_99[0:10])
  11.  
  12. def text2int(textnum, numwords={}): # stackoverflow.com/a/493788/3832970
  13. units = [
  14. "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
  15. "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
  16. "sixteen", "seventeen", "eighteen", "nineteen",
  17. ]
  18. tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
  19. numwords["and"] = (1, 0)
  20. for idx, word in enumerate(units):
  21. numwords[word] = (1, idx)
  22. for idx, word in enumerate(tens):
  23. numwords[word] = (1, idx * 10)
  24. current = result = 0
  25. for word in textnum.split():
  26. if word not in numwords:
  27. raise Exception("Illegal word: " + word)
  28.  
  29. scale, increment = numwords[word]
  30. current = current + increment
  31.  
  32. return result + current
  33. sample1 = "hello my name is sofie my social security number is thirteen zero four five and seventy eighteen seven and forty and I live on mountain street number twelve"
  34. sample1 = number_words_rx.sub(lambda x: str(text2int(x.group())), sample1)
  35. #3 or more numbers occur with only whitespace or "and"
  36. re_results = main_rx.sub('', sample1)
  37. print( re.sub(r'\d{1,2}', lambda x: numbers_1_99[int(x.group())], re_results) )
Success #stdin #stdout 0.02s 9784KB
stdin
Standard input is empty
stdout
hello my name is sofie my social security number is and I live on mountain street number twelve