-------------------------------------------------------------- -- Parsing arithmetic expressions using parser combinators -- -------------------------------------------------------------- module AE_Parser where import Char infixr 6 &&& infixr 4 ||| -- Tokentyp a, Resultattyp b type Parser a b = [a] -> [(b,[a])] -- basic parsers result :: b -> Parser a b result x s = [(x,s)] zero :: Parser a b zero s = [] item :: Parser a a item [] = [] -- failure item (a:as) = [(a,as)] checkChar :: Char -> Parser Char Char checkChar c [] = [] checkChar c (a:as) | c == a = [(c,as)] | otherwise = [] -- ------------------------------------------------------------ -- parser combinator bind -- ------------------------------------------------------------ bind :: Parser a b -> (b-> Parser a c) -> Parser a c bind p f s = concat [f v inp | (v,inp) <- p s] -- ------------------------------------------------------------ -- Examples with bind: -- ------------------------------------------------------------ check :: Parser Char Char check = item &&& checkChar -- bind item checkChar -- bind as infix operator (&&&) = bind -- simple combinator parsers sat :: (a -> Bool) -> Parser a a sat p = item &&& (\ x -> if p x then result x else zero) -- checkChar c = sat (\ y -> y == c) digit :: Parser Char Char digit = sat isDigit lower :: Parser Char Char lower = sat (\ x -> x >= 'a' && x <= 'z') upper :: Parser Char Char upper = sat (\ x -> x >= 'A' && x <= 'Z') twolower :: Parser Char String twolower = lower &&& \x -> lower &&& \y -> result [x,y] -- ------------------------------------------------------------ -- parser combinator plus -- ------------------------------------------------------------ plus :: Parser a b -> Parser a b -> Parser a b plus p q s = p s ++ q s -- plus as infix operator (|||) = plus -- ------------------------------------------------------------ -- Examples with plus: -- ------------------------------------------------------------ letter :: Parser Char Char letter = lower ||| upper alphanum :: Parser Char Char alphanum = letter ||| digit -- ------------------------------------------------------------ -- more combinators: -- ------------------------------------------------------------ many :: Parser a b -> Parser a [b] many p = (p &&& \x -> (many p) &&& \xs -> result (x:xs)) ||| result [] option :: Parser a b -> Parser a [b] option p = (p &&& \x -> result [x]) ||| result [] tok :: Eq a => [a] -> Parser a [a] tok s cs = loop s cs where loop [] cs = [(s,cs)] loop (s:ss) (c:cs) | s==c = loop ss cs loop _ _ = [] ---------------------------------------------------------- -- Translation of Grammar Rules into Parser Functions -- ---------------------------------------------------------- -- E -> TH expr :: Parser Char [Int] expr = term &&& \lt -> hexpr &&& \le -> result (1:lt++le) -- H -> +TH | eps hexpr = (checkChar '+' &&& \ _ -> --(tok "+") &&& \c -> term &&& \lt -> hexpr &&& \le -> result (2:lt++le)) ||| result [3] -- T -> FG term = factor &&& \lf -> hterm &&& \lt -> result (4:lf++lt) -- G -> *FG | eps hterm = (checkChar '*' &&& \ _ -> --(tok "*") &&& \c -> factor &&& \lf -> hterm &&& \lt -> result (5:lf++lt)) ||| result [6] -- F -> (E) | a factor = (checkChar '(' &&& \ _ -> --(tok "(") &&& \c -> expr &&& \le -> checkChar ')' &&& \ _ -> --(tok ")") &&& \c -> result (7:le)) ||| (checkChar 'a' &&& \ _ -> -- tok "a" &&& \ c -> result [8]) parseRD :: String -> [Int] parseRD s = case expr s of ((l,""):xs) -> l _ -> error "syntax error in expression" -------------------------------------------------------------- -- integration of look ahead sets improves error detection -- -------------------------------------------------------------- lacontrol :: [String] -> String -> String lacontrol la [] | elem [] la = [] | otherwise = error ("Syntax error: expected: "++ show la ++" found: eof") lacontrol la inp@(c:cs) | elem [c] la = inp | otherwise = error ("Syntax error: expected: "++ show la ++" found: "++ show inp) -- combine parsers using look ahead information lachoice :: (Show a,Eq a) => [[[a]]] -> [Parser a b] -> Parser a b lachoice las parsers inp | (null inp) && (elem inp all_las) || not (null inp) && (elem [head inp] all_las) = findParser las parsers inp | otherwise = error ("Syntax error: expected: "++ show all_las ++" found: "++ show inp) where all_las = concat las findParser :: (Show a, Eq a) => [[[a]]] -> [Parser a b] -> Parser a b findParser (la:las) (p:ps) [] | elem [] la = p [] | otherwise = findParser las ps [] findParser (la:las) (p:ps) inp@(c:cs) | elem [c] la = p inp | otherwise = findParser las ps inp findParser [] ps inp = error "No Parser found - this case is not possible!" ----------------------------------------------------- -- extend parser functions by control information -- ----------------------------------------------------- -- E -> TH, Look-ahead: ["(","a"] expr2 :: Parser Char [Int] expr2 = (term2 &&& \lt -> hexpr2 &&& \le -> result (1:lt++le)) .(lacontrol ["(","a"]) -- H -> +TH | eps, Look-ahead: ["+"] | [")",""] hexpr2 = lachoice [["+"],[")",""]] [(tok "+") &&& \c -> term2 &&& \lt -> hexpr2 &&& \le -> result (2:lt++le),result [3]] -- T -> FG, Look-ahead: ["(","a"] term2 = (factor2 &&& \lf -> hterm2 &&& \lt -> result (4:lf++lt)).(lacontrol ["(","a"]) -- G -> *FG | eps, Look-ahead: ["*"] | ["+",")",""] hterm2 = lachoice [["*"],["+",")",""]] [ ((tok "*") &&& \c -> factor2 &&& \lf -> hterm2 &&& \lt -> result (5:lf++lt)), result [6]] -- F -> (E) | a, Look-ahead: ["("] | ["a"] factor2 = lachoice [["("], ["a"]] [((tok "(") &&& \c -> expr2 &&& \le -> (tok ")") &&& \c -> result (7:le)), (tok "a" &&& \ c -> result [8])] parseRDla :: String -> [Int] parseRDla s = case expr2 s of ((l,""):xs) -> l ((l,cs):xs) -> error ("Syntax error: expected: eof found:" ++show cs) _ -> error "Syntax error: closing bracket missing" -- ----------------------------------------------------- -- change result type of parser functions -- produce abstract syntax tree ----------------------------------------------------- data Expr = Plus Expr Expr | Mult Expr Expr | Factor | Empty deriving Show empty :: Expr -> Bool empty Empty = True empty _ = False -- E -> TH, Look-ahead: ["(","a"] expr3 :: Parser Char Expr expr3 = (term3 &&& \lt -> hexpr3 &&& \le -> result (if empty le then lt else Plus lt le)) .(lacontrol ["(","a"]) -- H -> +TH | eps, Look-ahead: ["+"] | [")",""] hexpr3 = lachoice [["+"],[")",""]] [((tok "+") &&& \c -> term3 &&& \lt -> hexpr3 &&& \le -> result (if empty le then lt else Plus lt le)), result Empty] -- T -> FG, Look-ahead: ["(","a"] term3 = (factor3 &&& \lf -> hterm3 &&& \lt -> result (if empty lt then lf else Mult lf lt)) .(lacontrol ["(","a"]) -- G -> *FG | eps, Look-ahead: ["*"] | ["+",")",""] hterm3 = lachoice [["*"],["+",")",""]] [ ((tok "*") &&& \c -> factor3 &&& \lf -> hterm3 &&& \lt -> result (if empty lt then lf else Mult lf lt)), result Empty] -- F -> (E) | a, Look-ahead: ["("] | ["a"] factor3 = lachoice [["("], ["a"]] [((tok "(") &&& \c -> expr3 &&& \le -> (tok ")") &&& \c -> result le), (tok "a" &&& \ c -> result Factor)] parseRDla3 :: String -> Expr parseRDla3 s = case expr3 s of ((l,""):xs) -> l ((l,cs):xs) -> error ("Syntax error: expected: eof found:" ++show cs) _ -> error "Syntax error: closing bracket missing" --