fork download
  1. import Data.List (elemIndex)
  2.  
  3. -- Let's call a Token a single word in the input String (such as "three" or "hundred").
  4. type Token = String
  5.  
  6. -- We define a type for a parser from a list of tokens to the value they represent.
  7. type NParse = [Token] -> Int
  8.  
  9. -- Map of literal tokens (0-9, 11-19 and tens) to their names.
  10. literals :: [(Token, Int)]
  11. literals = [
  12. ("zero", 0), ("one", 1), ("two", 2), ("three", 3), ("four", 4), ("five", 5), ("six", 6), ("seven", 7), ("eight", 8), ("nine", 9),
  13. ("eleven", 11), ("twelve", 12), ("thirteen", 13), ("fourteen", 14), ("fifteen", 15), ("sixteen", 16), ("seventeen", 17), ("eighteen", 18), ("nineteen", 19),
  14. ("ten", 10), ("twenty", 20), ("thirty", 30), ("fourty", 40), ("fifty", 50), ("sixty", 60), ("seventy", 70), ("eighty", 80), ("ninety", 90)
  15. ]
  16. -- It'd be nice to have less tokens (using the regularity of the "-teen" and "-ty" suffixes, but this would probably break the nice combinator below.
  17.  
  18. -- Splits the input string into tokens.
  19. -- We do one special transformation: replace dshes by a new token. Such that "fifty-three" becomes "fifty tens three".
  20. prepare :: String -> [Token]
  21. prepare s = words (replace '-' " tens " s)
  22. -- there's probably something in the standard lib for that but anyway...
  23. where replace a b xs = concatMap (\x -> if x == a then b else [x]) xs
  24.  
  25. {- Splits a token list around a given token.
  26. E.g. splitAround [a,b,c,d] b = ([a], [c,d])
  27. If token is not found, returns ([], str).
  28. Pretty sure there should be something similar in the libraries too. -}
  29. splitAround :: (Eq a) => [a] -> a -> ([a], [a])
  30. splitAround str token = case elemIndex token str of
  31. Just i -> (\(l,r) -> (l, tail r)) $ splitAt i str
  32. Nothing -> ([], str)
  33.  
  34. -- Let's do the easy stuff and just parse literals first. We just have to look them up in the literals map.
  35. -- This is a partial function, it'll error-out on unknown values or if more than a single Token is passed.
  36. parseL :: NParse
  37. parseL [] = 0
  38. parseL [tok] = case lookup tok literals of
  39. Just x -> x
  40. Nothing -> error $ "Found unknown literal : " ++ (show tok)
  41. parseL ts = error $ "parseL expects a single token got " ++ (show ts)
  42.  
  43. -- We're going to exploit the fact that the input strings have a tree-like structure like so
  44. -- thousand
  45. -- hundred hundred
  46. -- ten ten ten ten
  47. -- lit lit lit lit lit lit lit lit
  48. -- And recursively parse that tree until we only have literal values.
  49. -- When I parse the tree
  50. -- thousand
  51. -- h1 h2
  52. -- The resulting value is 1000 * h1 + h2.
  53. -- And this works similarly for all levels of the tree.
  54. -- So instead of writing specific parsers for all levels, let's just write a generic one :
  55.  
  56. {- genParse ::
  57. NParse
  58. the sub parser
  59. -> Int
  60. the left part multiplier
  61. -> Token
  62. the boundary token
  63. -> NParse
  64. returns a new parser -}
  65. genParse :: NParse -> Int -> Token -> NParse
  66. genParse delegate mul tok = newParser where
  67. newParser [] = 0
  68. newParser str = case splitAround str tok of
  69. -- Split around the boundary token, sub-parse the left and right parts, and combine them
  70. (l,r) -> (delegate l) * mul + (delegate r)
  71.  
  72. -- And so here's the result:
  73. parseNumber :: String -> Int
  74. parseNumber = parseM . prepare
  75. where
  76. parseT = genParse parseL 1 "tens" -- multiplier is irregular, because the fifty in fifty-three is already multiplied by 10
  77. parseH = genParse parseT 100 "hundred"
  78. parseK = genParse parseH 1000 "thousand"
  79. parseM = genParse parseK 1000000 "million" -- For fun :D
  80.  
  81. test = (parseNumber "five hundred twenty-three thousand six hundred twelve million two thousand one") == 523612002001
  82.  
  83. -- interactive
  84. main = do
  85. input <- getLine
  86. putStrLn $ (show . parseNumber) input
Success #stdin #stdout 0s 6256KB
stdin
three thousand
stdout
3000