A pretty literal translation of
http://www.norvig.com/spell-correct.html in Haskell.
*Main> sc <- getCorrector
*Main> sc "speling"
spelling
I didn't bother to use
ByteString so it's slow.
import Prelude hiding (words)
import Data.Char
import Data.Ord
import Data.Maybe
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.List as List
words = List.words . map (toLower . (\c -> if isAlpha c then c else ' '))
train = List.foldl' (\dict f -> Map.insertWith' (+) f 1 dict) Map.empty
edits1 word =
let n = length word in
Set.fromList $ [take i word ++ drop (i+1) word | i <- [0..n-1]] -- deletion
++ [take i word ++ [word!!(i+1)] ++ [word!!i] ++ drop (i+2) word | i <- [0..n-2]] -- transposition
++ [take i word ++ [c] ++ drop (i+1) word | i <- [0..n-1] , c <- ['a'..'z'] ] -- alteration
++ [take i word ++ [c] ++ drop i word | i <- [0..n-1] , c <- ['a'..'z'] ] -- insertion
known_edits2 nwords word = Set.fromList [e2 | e1 <- Set.elems (edits1 word)
, e2 <- Set.elems (edits1 e1)
, e2 `Map.member` nwords ]
known nwords = Set.intersection (Map.keysSet nwords)
correct nwords word = let candidates = fromJust $ List.find (not . Set.null) [ known nwords (Set.singleton word)
, known nwords (edits1 word)
, known_edits2 nwords word
, Set.singleton word ]
in List.maximumBy (comparing (\c -> Map.findWithDefault 1 c nwords)) (Set.elems candidates)
getCorrector = do
nWORDS <- fmap (train . words) (readFile "big.txt")
return (putStrLn . correct nWORDS)