2007-04-13

Norvig's spelling corrector in Haskell

A pretty literal translation of http://www.norvig.com/spell-correct.html in Haskell.
*Main> sc <- getCorrector 
*Main> sc "speling"
spelling
I didn't bother to use ByteString so it's slow.
import Prelude hiding (words)
import Data.Char
import Data.Ord
import Data.Maybe
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.List as List

words = List.words . map (toLower . (\c -> if isAlpha c then c else ' ')) 

train = List.foldl' (\dict f -> Map.insertWith' (+) f 1 dict) Map.empty 

edits1 word =
    let n = length word in
    Set.fromList $    [take i word ++ drop (i+1) word | i <- [0..n-1]]                                  -- deletion
                   ++ [take i word ++ [word!!(i+1)] ++ [word!!i] ++ drop (i+2) word | i <- [0..n-2]]    -- transposition
                   ++ [take i word ++ [c] ++ drop (i+1) word | i <- [0..n-1] , c <- ['a'..'z'] ]          -- alteration
                   ++ [take i word ++ [c] ++ drop i word | i <- [0..n-1] , c <- ['a'..'z'] ]              -- insertion

known_edits2 nwords word  = Set.fromList [e2 | e1 <- Set.elems (edits1 word)
                                             , e2 <- Set.elems (edits1 e1) 
                                             , e2 `Map.member` nwords ]

known nwords = Set.intersection (Map.keysSet nwords)

correct nwords word = let candidates = fromJust $ List.find (not . Set.null) [ known nwords (Set.singleton word)
                                                                             , known nwords (edits1 word) 
                                                                             , known_edits2 nwords word
                                                                             , Set.singleton word ]
               in List.maximumBy (comparing (\c -> Map.findWithDefault 1 c nwords)) (Set.elems candidates)

getCorrector = do
  nWORDS <- fmap (train . words) (readFile "big.txt")
  return  (putStrLn . correct nWORDS)