import Data.List (elemIndex)
-- Let's call a Token a single word in the input String (such as "three" or "hundred").
-- We define a type for a parser from a list of tokens to the value they represent.
type NParse
= [Token
] -> Int
-- Map of literal tokens (0-9, 11-19 and tens) to their names.
literals
:: [(Token
, Int)]literals = [
("zero", 0), ("one", 1), ("two", 2), ("three", 3), ("four", 4), ("five", 5), ("six", 6), ("seven", 7), ("eight", 8), ("nine", 9),
("eleven", 11), ("twelve", 12), ("thirteen", 13), ("fourteen", 14), ("fifteen", 15), ("sixteen", 16), ("seventeen", 17), ("eighteen", 18), ("nineteen", 19),
("ten", 10), ("twenty", 20), ("thirty", 30), ("fourty", 40), ("fifty", 50), ("sixty", 60), ("seventy", 70), ("eighty", 80), ("ninety", 90)
]
-- It'd be nice to have less tokens (using the regularity of the "-teen" and "-ty" suffixes, but this would probably break the nice combinator below.
-- Splits the input string into tokens.
-- We do one special transformation: replace dshes by a new token. Such that "fifty-three" becomes "fifty tens three".
prepare s
= words (replace
'-' " tens " s
) -- there's probably something in the standard lib for that but anyway...
where replace a b xs
= concatMap (\x
-> if x
== a
then b
else [x
]) xs
{- Splits a token list around a given token.
E.g. splitAround [a,b,c,d] b = ([a], [c,d])
If token is not found, returns ([], str).
Pretty sure there should be something similar in the libraries too. -}
splitAround
:: (Eq a
) => [a
] -> a
-> ([a
], [a
])splitAround str token = case elemIndex token str of
Nothing -> ([], str)
-- Let's do the easy stuff and just parse literals first. We just have to look them up in the literals map.
-- This is a partial function, it'll error-out on unknown values or if more than a single Token is passed.
parseL :: NParse
parseL [] = 0
parseL
[tok
] = case lookup tok literals
of Just x -> x
Nothing
-> error $ "Found unknown literal : " ++ (show tok
)parseL ts
= error $ "parseL expects a single token got " ++ (show ts
)
-- We're going to exploit the fact that the input strings have a tree-like structure like so
-- thousand
-- hundred hundred
-- ten ten ten ten
-- lit lit lit lit lit lit lit lit
-- And recursively parse that tree until we only have literal values.
-- When I parse the tree
-- thousand
-- h1 h2
-- The resulting value is 1000 * h1 + h2.
-- And this works similarly for all levels of the tree.
-- So instead of writing specific parsers for all levels, let's just write a generic one :
{- genParse ::
NParse
the sub parser
-> Int
the left part multiplier
-> Token
the boundary token
-> NParse
returns a new parser -}
genParse
:: NParse
-> Int -> Token
-> NParse
genParse delegate mul tok = newParser where
newParser [] = 0
newParser str = case splitAround str tok of
-- Split around the boundary token, sub-parse the left and right parts, and combine them
(l,r) -> (delegate l) * mul + (delegate r)
-- And so here's the result:
parseNumber = parseM . prepare
where
parseT = genParse parseL 1 "tens" -- multiplier is irregular, because the fifty in fifty-three is already multiplied by 10
parseH = genParse parseT 100 "hundred"
parseK = genParse parseH 1000 "thousand"
parseM = genParse parseK 1000000 "million" -- For fun :D
test = (parseNumber "five hundred twenty-three thousand six hundred twelve million two thousand one") == 523612002001
-- interactive
main = do
aW1wb3J0IERhdGEuTGlzdCAoZWxlbUluZGV4KQoKLS0gTGV0J3MgY2FsbCBhIFRva2VuIGEgc2luZ2xlIHdvcmQgaW4gdGhlIGlucHV0IFN0cmluZyAoc3VjaCBhcyAidGhyZWUiIG9yICJodW5kcmVkIikuCnR5cGUgVG9rZW4gPSBTdHJpbmcKCi0tIFdlIGRlZmluZSBhIHR5cGUgZm9yIGEgcGFyc2VyIGZyb20gYSBsaXN0IG9mIHRva2VucyB0byB0aGUgdmFsdWUgdGhleSByZXByZXNlbnQuCnR5cGUgTlBhcnNlID0gW1Rva2VuXSAtPiBJbnQJCgotLSBNYXAgb2YgbGl0ZXJhbCB0b2tlbnMgKDAtOSwgMTEtMTkgYW5kIHRlbnMpIHRvIHRoZWlyIG5hbWVzLgpsaXRlcmFscyA6OiBbKFRva2VuLCBJbnQpXQpsaXRlcmFscyA9IFsKCQkoInplcm8iLCAwKSwgKCJvbmUiLCAxKSwgKCJ0d28iLCAyKSwgKCJ0aHJlZSIsIDMpLCAoImZvdXIiLCA0KSwgKCJmaXZlIiwgNSksICgic2l4IiwgNiksICgic2V2ZW4iLCA3KSwgKCJlaWdodCIsIDgpLCAoIm5pbmUiLCA5KSwKCQkoImVsZXZlbiIsIDExKSwJKCJ0d2VsdmUiLCAxMiksCSgidGhpcnRlZW4iLCAxMyksICgiZm91cnRlZW4iLCAxNCksICgiZmlmdGVlbiIsIDE1KSwgKCJzaXh0ZWVuIiwgMTYpLCAoInNldmVudGVlbiIsIDE3KSwgKCJlaWdodGVlbiIsIDE4KSwgKCJuaW5ldGVlbiIsIDE5KSwKCQkoInRlbiIsIDEwKSwgKCJ0d2VudHkiLCAyMCksICgidGhpcnR5IiwgMzApLCAoImZvdXJ0eSIsIDQwKSwgKCJmaWZ0eSIsIDUwKSwgKCJzaXh0eSIsIDYwKSwgKCJzZXZlbnR5IiwgNzApLCAoImVpZ2h0eSIsIDgwKSwgKCJuaW5ldHkiLCA5MCkKCV0KLS0gSXQnZCBiZSBuaWNlIHRvIGhhdmUgbGVzcyB0b2tlbnMgKHVzaW5nIHRoZSByZWd1bGFyaXR5IG9mIHRoZSAiLXRlZW4iIGFuZCAiLXR5IiBzdWZmaXhlcywgYnV0IHRoaXMgd291bGQgcHJvYmFibHkgYnJlYWsgdGhlIG5pY2UgY29tYmluYXRvciBiZWxvdy4KCi0tIFNwbGl0cyB0aGUgaW5wdXQgc3RyaW5nIGludG8gdG9rZW5zLgotLSBXZSBkbyBvbmUgc3BlY2lhbCB0cmFuc2Zvcm1hdGlvbjogcmVwbGFjZSBkc2hlcyBieSBhIG5ldyB0b2tlbi4gU3VjaCB0aGF0ICJmaWZ0eS10aHJlZSIgYmVjb21lcyAiZmlmdHkgdGVucyB0aHJlZSIuIApwcmVwYXJlIDo6IFN0cmluZyAtPiBbVG9rZW5dCnByZXBhcmUgcyA9IHdvcmRzIChyZXBsYWNlICctJyAiIHRlbnMgIiBzKQoJLS0gdGhlcmUncyBwcm9iYWJseSBzb21ldGhpbmcgaW4gdGhlIHN0YW5kYXJkIGxpYiBmb3IgdGhhdCBidXQgYW55d2F5Li4uCgl3aGVyZSByZXBsYWNlIGEgYiB4cyA9IGNvbmNhdE1hcCAoXHggLT4gaWYgeCA9PSBhIHRoZW4gYiBlbHNlIFt4XSkgeHMgCgp7LSBTcGxpdHMgYSB0b2tlbiBsaXN0IGFyb3VuZCBhIGdpdmVuIHRva2VuLgoJRS5nLiBzcGxpdEFyb3VuZCBbYSxiLGMsZF0gYiA9IChbYV0sIFtjLGRdKQoJSWYgdG9rZW4gaXMgbm90IGZvdW5kLCByZXR1cm5zIChbXSwgc3RyKS4KCVByZXR0eSBzdXJlIHRoZXJlIHNob3VsZCBiZSBzb21ldGhpbmcgc2ltaWxhciBpbiB0aGUgbGlicmFyaWVzIHRvby4gLX0Kc3BsaXRBcm91bmQgOjogKEVxIGEpID0+IFthXSAtPiBhIC0+IChbYV0sIFthXSkKc3BsaXRBcm91bmQgc3RyIHRva2VuID0gY2FzZSBlbGVtSW5kZXggdG9rZW4gc3RyIG9mIAoJCUp1c3QgaSAtPiAoXChsLHIpIC0+IChsLCB0YWlsIHIpKSAkIHNwbGl0QXQgaSBzdHIKCQlOb3RoaW5nIC0+IChbXSwgc3RyKQoKLS0gTGV0J3MgZG8gdGhlIGVhc3kgc3R1ZmYgYW5kIGp1c3QgcGFyc2UgbGl0ZXJhbHMgZmlyc3QuIFdlIGp1c3QgaGF2ZSB0byBsb29rIHRoZW0gdXAgaW4gdGhlIGxpdGVyYWxzIG1hcC4KLS0gVGhpcyBpcyBhIHBhcnRpYWwgZnVuY3Rpb24sIGl0J2xsIGVycm9yLW91dCBvbiB1bmtub3duIHZhbHVlcyBvciBpZiBtb3JlIHRoYW4gYSBzaW5nbGUgVG9rZW4gaXMgcGFzc2VkLgpwYXJzZUwgOjogTlBhcnNlCnBhcnNlTCBbXSA9IDAKcGFyc2VMIFt0b2tdID0gY2FzZSBsb29rdXAgdG9rIGxpdGVyYWxzIG9mIAoJSnVzdCB4IC0+IHgKCU5vdGhpbmcgLT4gZXJyb3IgJCAiRm91bmQgdW5rbm93biBsaXRlcmFsIDogIiArKyAoc2hvdyB0b2spCnBhcnNlTCB0cyA9IGVycm9yICQgInBhcnNlTCBleHBlY3RzIGEgc2luZ2xlIHRva2VuIGdvdCAiICsrIChzaG93IHRzKQoKLS0gV2UncmUgZ29pbmcgdG8gZXhwbG9pdCB0aGUgZmFjdCB0aGF0IHRoZSBpbnB1dCBzdHJpbmdzIGhhdmUgYSB0cmVlLWxpa2Ugc3RydWN0dXJlIGxpa2Ugc28KLS0JCQkJCSAgdGhvdXNhbmQKLS0JCQlodW5kcmVkCQkJCWh1bmRyZWQKLS0JCXRlbiAgICAgICB0ZW4JCXRlbgkJCXRlbgotLSAgICBsaXQgICBsaXQgbGl0ICBsaXQgICBsaXQgIGxpdCAgICBsaXQgIGxpdAotLSBBbmQgcmVjdXJzaXZlbHkgcGFyc2UgdGhhdCB0cmVlIHVudGlsIHdlIG9ubHkgaGF2ZSBsaXRlcmFsIHZhbHVlcy4KLS0gV2hlbiBJIHBhcnNlIHRoZSB0cmVlCi0tICAgICAgIHRob3VzYW5kCi0tICAgICBoMSAgICAgICBoMgotLSBUaGUgcmVzdWx0aW5nIHZhbHVlIGlzIDEwMDAgKiBoMSArIGgyLgotLSBBbmQgdGhpcyB3b3JrcyBzaW1pbGFybHkgZm9yIGFsbCBsZXZlbHMgb2YgdGhlIHRyZWUuCi0tIFNvIGluc3RlYWQgb2Ygd3JpdGluZyBzcGVjaWZpYyBwYXJzZXJzIGZvciBhbGwgbGV2ZWxzLCBsZXQncyBqdXN0IHdyaXRlIGEgZ2VuZXJpYyBvbmUgOgoKey0gZ2VuUGFyc2UgOjogCglOUGFyc2UKCQl0aGUgc3ViIHBhcnNlcgoJLT4gSW50CgkJdGhlIGxlZnQgcGFydCBtdWx0aXBsaWVyCgktPiBUb2tlbgoJCXRoZSBib3VuZGFyeSB0b2tlbiAKCS0+IE5QYXJzZQkKCQlyZXR1cm5zIGEgbmV3IHBhcnNlciAtfQkKZ2VuUGFyc2UgOjogTlBhcnNlIC0+IEludCAtPiBUb2tlbiAtPiBOUGFyc2UJCmdlblBhcnNlIGRlbGVnYXRlIG11bCB0b2sgPSBuZXdQYXJzZXIgd2hlcmUKCW5ld1BhcnNlciBbXSA9IDAKCW5ld1BhcnNlciBzdHIgPSBjYXNlIHNwbGl0QXJvdW5kIHN0ciB0b2sgb2YKCQktLSBTcGxpdCBhcm91bmQgdGhlIGJvdW5kYXJ5IHRva2VuLCBzdWItcGFyc2UgdGhlIGxlZnQgYW5kIHJpZ2h0IHBhcnRzLCBhbmQgY29tYmluZSB0aGVtCgkJKGwscikgLT4gKGRlbGVnYXRlIGwpICogbXVsICsgKGRlbGVnYXRlIHIpCQoKLS0gQW5kIHNvIGhlcmUncyB0aGUgcmVzdWx0OiAKcGFyc2VOdW1iZXIgOjogU3RyaW5nIC0+IEludApwYXJzZU51bWJlciA9IHBhcnNlTSAuIHByZXBhcmUKCXdoZXJlCglwYXJzZVQgPSBnZW5QYXJzZSAJcGFyc2VMCTEgCQkidGVucyIJCSAtLSBtdWx0aXBsaWVyIGlzIGlycmVndWxhciwgYmVjYXVzZSB0aGUgZmlmdHkgaW4gZmlmdHktdGhyZWUgaXMgYWxyZWFkeSBtdWx0aXBsaWVkIGJ5IDEwCglwYXJzZUggPSBnZW5QYXJzZSAJcGFyc2VUCTEwMCAJImh1bmRyZWQiCglwYXJzZUsgPSBnZW5QYXJzZSAJcGFyc2VICTEwMDAgCSJ0aG91c2FuZCIKCXBhcnNlTSA9IGdlblBhcnNlIAlwYXJzZUsJMTAwMDAwMAkibWlsbGlvbiIgLS0gRm9yIGZ1biA6RAoJCnRlc3QgPSAocGFyc2VOdW1iZXIgImZpdmUgaHVuZHJlZCB0d2VudHktdGhyZWUgdGhvdXNhbmQgc2l4IGh1bmRyZWQgdHdlbHZlIG1pbGxpb24gdHdvIHRob3VzYW5kIG9uZSIpID09IDUyMzYxMjAwMjAwMQoKLS0gaW50ZXJhY3RpdmUKbWFpbiA9IGRvCglpbnB1dCA8LSBnZXRMaW5lCglwdXRTdHJMbiAkIChzaG93IC4gcGFyc2VOdW1iZXIpIGlucHV0