import Data.List (elemIndex) -- Let's call a Token a single word in the input String (such as "three" or "hundred"). type Token = String -- We define a type for a parser from a list of tokens to the value they represent. type NParse = [Token] -> Int -- Map of literal tokens (0-9, 11-19 and tens) to their names. literals :: [(Token, Int)] literals = [ ("zero", 0), ("one", 1), ("two", 2), ("three", 3), ("four", 4), ("five", 5), ("six", 6), ("seven", 7), ("eight", 8), ("nine", 9), ("eleven", 11), ("twelve", 12), ("thirteen", 13), ("fourteen", 14), ("fifteen", 15), ("sixteen", 16), ("seventeen", 17), ("eighteen", 18), ("nineteen", 19), ("ten", 10), ("twenty", 20), ("thirty", 30), ("fourty", 40), ("fifty", 50), ("sixty", 60), ("seventy", 70), ("eighty", 80), ("ninety", 90) ] -- It'd be nice to have less tokens (using the regularity of the "-teen" and "-ty" suffixes, but this would probably break the nice combinator below. -- Splits the input string into tokens. -- We do one special transformation: replace dshes by a new token. Such that "fifty-three" becomes "fifty tens three". prepare :: String -> [Token] prepare s = words (replace '-' " tens " s) -- there's probably something in the standard lib for that but anyway... where replace a b xs = concatMap (\x -> if x == a then b else [x]) xs {- Splits a token list around a given token. E.g. splitAround [a,b,c,d] b = ([a], [c,d]) If token is not found, returns ([], str). Pretty sure there should be something similar in the libraries too. -} splitAround :: (Eq a) => [a] -> a -> ([a], [a]) splitAround str token = case elemIndex token str of Just i -> (\(l,r) -> (l, tail r)) $ splitAt i str Nothing -> ([], str) -- Let's do the easy stuff and just parse literals first. We just have to look them up in the literals map. -- This is a partial function, it'll error-out on unknown values or if more than a single Token is passed. parseL :: NParse parseL [] = 0 parseL [tok] = case lookup tok literals of Just x -> x Nothing -> error $ "Found unknown literal : " ++ (show tok) parseL ts = error $ "parseL expects a single token got " ++ (show ts) -- We're going to exploit the fact that the input strings have a tree-like structure like so -- thousand -- hundred hundred -- ten ten ten ten -- lit lit lit lit lit lit lit lit -- And recursively parse that tree until we only have literal values. -- When I parse the tree -- thousand -- h1 h2 -- The resulting value is 1000 * h1 + h2. -- And this works similarly for all levels of the tree. -- So instead of writing specific parsers for all levels, let's just write a generic one : {- genParse :: NParse the sub parser -> Int the left part multiplier -> Token the boundary token -> NParse returns a new parser -} genParse :: NParse -> Int -> Token -> NParse genParse delegate mul tok = newParser where newParser [] = 0 newParser str = case splitAround str tok of -- Split around the boundary token, sub-parse the left and right parts, and combine them (l,r) -> (delegate l) * mul + (delegate r) -- And so here's the result: parseNumber :: String -> Int parseNumber = parseM . prepare where parseT = genParse parseL 1 "tens" -- multiplier is irregular, because the fifty in fifty-three is already multiplied by 10 parseH = genParse parseT 100 "hundred" parseK = genParse parseH 1000 "thousand" parseM = genParse parseK 1000000 "million" -- For fun :D test = (parseNumber "five hundred twenty-three thousand six hundred twelve million two thousand one") == 523612002001 -- interactive main = do input <- getLine putStrLn $ (show . parseNumber) input