-- caesarsolver.hs -- this is part one of the optional programming assignment for the -- Stanford Univerity Introduction to Artificial Intelligence class -- it solves a rotating or caesar cypher by using the probabilities -- of three letter trigrams for the english language. -- All 26 rotations are generated, scored and the top 3 most likely -- candidates are displayed. Apologies for the Haskell, it's my first program -- -- source for trigram data is here: http://h...content-available-to-author-only...l.org/~cowan/trigrams -- the trigram data is read from input -- PAC 9th January, 2012 import Data.Char (ord, chr, isUpper, isLower, toLower, isLetter, toUpper) import Data.List (sortBy) import Data.Ord (comparing) import qualified Data.Map as M import System.IO import System.Environment import Control.Monad -- define a datatype for our map of trigram frequencies for clarity type TrigramFrequencies = M.Map String Int -- datatype for a scored string type ScoredString = ( Double, String ) -- the code to break defaultEncodedMsg = "Esp qtcde nzyqpcpynp zy esp ezatn zq Lcetqtntlw Tyepwwtrpynp hld spwo le Olcexzfes Nzwwprp ty estd jplc." -- Shifts a character to the right if positive, left if negative. Wraps around. shift :: Int -> Char -> Char -- Modulus handles the wraparound(shift 1 'z' = 'a') shift n c | isUpper c = chr $ ord 'A' + ((ord c + n - ord 'A') `mod` 26) | isLower c = chr $ ord 'a' + ((ord c + n - ord 'a') `mod` 26) | otherwise = c -- shifts input string by n shiftString:: Int -> String -> String shiftString n = map (shift n) --parse a line from the trigams data file into a tuple of trigram and frequency per 10000 words parseTrigram:: String -> [ (String,Int) ] parseTrigram contents = [ lineToTuple( words x ) | x <- lines(contents), head x /= ';' ] where lineToTuple [ trigram, frequency ] = ( trigram, read frequency :: Int) -- loads the trigram frequency data into a Map for fast lookup -- source for trigram data is here: http://h...content-available-to-author-only...l.org/~cowan/trigrams -- we read it in from stdin -- returns an IO action yielding a Map containing a trigram and it's frequency per 10000 words loadTrigrams:: IO (TrigramFrequencies) loadTrigrams = fmap (M.fromList . parseTrigram) getContents -- preprocess our string for trigram lookup. Converts all non letters to # and the rest to uppercase preprocessText:: String -> String preprocessText text = map processChar text where processChar c = if isLetter c then toUpper c else '#' -- split a string into character trigrams splitIntoTrigrams:: String -> [String] splitIntoTrigrams str = [ take 3 $ drop x str | x <- [ 0 .. (length str - 3) ] ] -- lookup the trigram frequencies per 10000 words. If not found it will return 0 lookupTrigramFrequency:: TrigramFrequencies -> String -> Int lookupTrigramFrequency trigramFrequencies trigram = M.findWithDefault 0 trigram trigramFrequencies -- converts a letter frequency into a probability using laplace smoothing so we don't end up with -- trigrams that were not found borking our calculations (due to multiplying by 0) laplaceSmoothedProbability:: Int -> Int -> Int -> Double laplaceSmoothedProbability frequency totalCategories k = (fromIntegral(frequency) + fromIntegral(k))/(10000.0 + fromIntegral(k)*fromIntegral(totalCategories)) -- returns the log of the probability for a trigram. We use logs so we don't endup dealing with -- really small numbers caused by multiplying small numbers. We can just sum the logs instead. calcLogOfSmoothedProbabilityForTrigram:: TrigramFrequencies -> String -> Double calcLogOfSmoothedProbabilityForTrigram trigramFrequencies trigram = log ( laplaceSmoothedProbability (lookupTrigramFrequency trigramFrequencies trigram) (M.size trigramFrequencies) 1 ) -- calculates the score for a string based on the probabilities of all the trigrams it contains scoreString:: TrigramFrequencies -> String -> ScoredString scoreString trigramFrequencies str = ( sum $ map ( calcLogOfSmoothedProbabilityForTrigram trigramFrequencies ) ( splitIntoTrigrams $ preprocessText str ) , str ) -- calculates the scores for all 26 rotations of the input string and returns them in descending order of score scoreAll:: TrigramFrequencies -> String -> [ ScoredString ] scoreAll trigramFrequencies str = reverse $ sortBy (comparing fst) $ map (scoreString trigramFrequencies) [ shiftString n str | n <- [1 .. 26] ] -- displays the string along with its score displayScoredString:: (Double, String ) -> IO () displayScoredString (score, str) = do putStrLn str putStr "score: " putStrLn $ show score putStrLn "" -- parse arguments, if no supplied use default parseArgs:: [String] -> String parseArgs args = if null args then defaultEncodedMsg else head args -- entry point main = do trigramFrequencies <- loadTrigrams args <- getArgs let msg = parseArgs args putStrLn "Caesar Cypher Solver (ai-class)\n" putStrLn "encoded message:" putStrLn msg putStrLn "\nThe top 3 candidates based on trigram letter probabilities for english language are:\n" let top3 = take 3 $ scoreAll trigramFrequencies msg mapM_ displayScoredString top3