-------------------------------------------------------------- -- Parsing arithmetic expressions using parser combinators -- -------------------------------------------------------------- module AE_Parser where import Char infixr 6 &&& infixr 4 ||| -- Tokentyp a, Resultattyp b type Parser a b = [a] -> [(b,[a])] -- basic parsers result :: b -> Parser a b result x s = [(x,s)] zero :: Parser a b zero s = [] item :: Parser a a item [] = [] -- failure item (a:as) = [(a,as)] checkChar :: Char -> Parser Char Char checkChar c [] = [] checkChar c (a:as) | c == a = [(c,as)] | otherwise = [] -- ------------------------------------------------------------ -- parser combinator bind -- ------------------------------------------------------------ bind :: Parser a b -> (b-> Parser a c) -> Parser a c bind p f s = concat [f v inp | (v,inp) <- p s] -- ------------------------------------------------------------ -- Examples with bind: -- ------------------------------------------------------------ check :: Parser Char Char check = item &&& checkChar -- bind item checkChar -- bind as infix operator (&&&) = bind -- simple combinator parsers sat :: (a -> Bool) -> Parser a a sat p = item &&& (\ x -> if p x then result x else zero) -- checkChar c = sat (\ y -> y == c) digit :: Parser Char Char digit = sat isDigit lower :: Parser Char Char lower = sat (\ x -> x >= 'a' && x <= 'z') upper :: Parser Char Char upper = sat (\ x -> x >= 'A' && x <= 'Z') twolower :: Parser Char String twolower = lower &&& \x -> lower &&& \y -> result [x,y] -- ------------------------------------------------------------ -- parser combinator plus -- ------------------------------------------------------------ plus :: Parser a b -> Parser a b -> Parser a b plus p q s = p s ++ q s -- plus as infix operator (|||) = plus -- ------------------------------------------------------------ -- Examples with plus: -- ------------------------------------------------------------ letter :: Parser Char Char letter = lower ||| upper alphanum :: Parser Char Char alphanum = letter ||| digit ---------------------------------------------------------- -- Translation of Grammar Rules into Parser Functions -- ---------------------------------------------------------- -- E -> TH expr :: Parser Char [Int] expr = term &&& \lt -> hexpr &&& \le -> result (1:lt++le) -- H -> +TH | eps hexpr = (checkChar '+' &&& \ _ -> --(tok "+") &&& \c -> term &&& \lt -> hexpr &&& \le -> result (2:lt++le)) ||| result [3] -- T -> FG term = factor &&& \lf -> hterm &&& \lt -> result (4:lf++lt) -- G -> *FG | eps hterm = (checkChar '*' &&& \ _ -> --(tok "*") &&& \c -> factor &&& \lf -> hterm &&& \lt -> result (5:lf++lt)) ||| result [6] -- F -> (E) | a factor = (checkChar '(' &&& \ _ -> --(tok "(") &&& \c -> expr &&& \le -> checkChar ')' &&& \ _ -> --(tok ")") &&& \c -> result (7:le)) ||| (checkChar 'a' &&& \ _ -> -- tok "a" &&& \ c -> result [8]) parseRD :: String -> [Int] parseRD s = case expr s of ((l,""):xs) -> l _ -> error "syntax error in expression" -- ------------------------------------------------------------ -- more combinators: -- ------------------------------------------------------------ many :: Parser a b -> Parser a [b] many p = (p &&& \x -> (many p) &&& \xs -> result (x:xs)) ||| result [] option :: Parser a b -> Parser a [b] option p = (p &&& \x -> result [x]) ||| result [] tok :: Eq a => [a] -> Parser a [a] tok s cs = loop s cs where loop [] cs = [(s,cs)] loop (s:ss) (c:cs) | s==c = loop ss cs loop _ _ = [] -------------------------------------------------------------- -- integration of look ahead sets improves error detection -- -------------------------------------------------------------- lacontrol :: [String] -> String -> String lacontrol la [] | elem [] la = [] | otherwise = error ("Syntax error: expected: "++ show la ++" found: eof") lacontrol la inp@(c:cs) | elem [c] la = inp | otherwise = error ("Syntax error: expected: "++ show la ++" found: "++ show inp) -- combine parsers using look ahead information lachoice :: (Show a,Eq a) => [[[a]]] -> [Parser a b] -> Parser a b lachoice las parsers inp | (null inp) && (elem inp all_las) || not (null inp) && (elem [head inp] all_las) = findParser las parsers inp | otherwise = error ("Syntax error: expected: "++ show all_las ++" found: "++ show inp) where all_las = concat las findParser :: (Show a, Eq a) => [[[a]]] -> [Parser a b] -> Parser a b findParser (la:las) (p:ps) [] | elem [] la = p [] | otherwise = findParser las ps [] findParser (la:las) (p:ps) inp@(c:cs) | elem [c] la = p inp | otherwise = findParser las ps inp findParser [] ps inp = error "No Parser found - this case is not possible!" ----------------------------------------------------- -- extend parser functions by control information -- ----------------------------------------------------- -- E -> TH, Look-ahead: ["(","a"] expr2 :: Parser Char [Int] expr2 = (term2 &&& \lt -> hexpr2 &&& \le -> result (1:lt++le)) .(lacontrol ["(","a"]) -- H -> +TH | eps, Look-ahead: ["+"] | [")",""] hexpr2 = lachoice [["+"],[")",""]] [(tok "+") &&& \c -> term2 &&& \lt -> hexpr2 &&& \le -> result (2:lt++le),result [3]] -- T -> FG, Look-ahead: ["(","a"] term2 = (factor2 &&& \lf -> hterm2 &&& \lt -> result (4:lf++lt)).(lacontrol ["(","a"]) -- G -> *FG | eps, Look-ahead: ["*"] | ["+",")",""] hterm2 = lachoice [["*"],["+",")",""]] [ ((tok "*") &&& \c -> factor2 &&& \lf -> hterm2 &&& \lt -> result (5:lf++lt)), result [6]] -- F -> (E) | a, Look-ahead: ["("] | ["a"] factor2 = lachoice [["("], ["a"]] [((tok "(") &&& \c -> expr2 &&& \le -> (tok ")") &&& \c -> result (7:le)), (tok "a" &&& \ c -> result [8])] parseRDla :: String -> [Int] parseRDla s = case expr2 s of ((l,""):xs) -> l ((l,cs):xs) -> error ("Syntax error: expected: eof found:" ++show cs) _ -> error "Syntax error: closing bracket missing" -- ----------------------------------------------------- -- change result type of parser functions -- produce abstract syntax tree ----------------------------------------------------- -- 1st wrong approach, -- Problem: associativity is changed from left to right data Expr = Plus Expr Expr | Mult Expr Expr | Factor | Empty deriving Show empty :: Expr -> Bool empty Empty = True empty _ = False -- E -> TH, Look-ahead: ["(","a"] expr3 :: Parser Char Expr expr3 = (term3 &&& \lt -> hexpr3 &&& \le -> result (if empty le then lt else Plus lt le)) .(lacontrol ["(","a"]) -- H -> +TH | eps, Look-ahead: ["+"] | [")",""] hexpr3 = lachoice [["+"],[")",""]] [((tok "+") &&& \c -> term3 &&& \lt -> hexpr3 &&& \le -> result (if empty le then lt else Plus lt le)), result Empty] -- T -> FG, Look-ahead: ["(","a"] term3 = (factor3 &&& \lf -> hterm3 &&& \lt -> result (if empty lt then lf else Mult lf lt)) .(lacontrol ["(","a"]) -- G -> *FG | eps, Look-ahead: ["*"] | ["+",")",""] hterm3 = lachoice [["*"],["+",")",""]] [ ((tok "*") &&& \c -> factor3 &&& \lf -> hterm3 &&& \lt -> result (if empty lt then lf else Mult lf lt)), result Empty] -- F -> (E) | a, Look-ahead: ["("] | ["a"] factor3 = lachoice [["("], ["a"]] [((tok "(") &&& \c -> expr3 &&& \le -> (tok ")") &&& \c -> result le), (tok "a" &&& \ c -> result Factor)] parseRDla3 :: String -> Expr parseRDla3 s = case expr3 s of ((l,""):xs) -> l ((l,cs):xs) -> error ("Syntax error: expected: eof found:" ++show cs) _ -> error "Syntax error: closing bracket missing" -- 2nd approach: -- left associativity is kept -- auxiliary function buildExpr :: (Expr -> Expr -> Expr) -> [Expr] -> Expr buildExpr op [] = Empty buildExpr op [e] = e buildExpr op (e1:e2:es) = buildExpr op ((op e1 e2):es) -- E -> TH, Look-ahead: ["(","a"] expr4 :: Parser Char Expr expr4 = (term4 &&& \lt -> hexpr4 &&& \les -> result (buildExpr Plus (lt:les))) .(lacontrol ["(","a"]) -- H -> +TH | eps, Look-ahead: ["+"] | [")",""] hexpr4 :: Parser Char [Expr] hexpr4 = lachoice [["+"],[")",""]] [((tok "+") &&& \c -> term4 &&& \lt -> hexpr4 &&& \les -> result (lt:les)), result []] -- T -> FG, Look-ahead: ["(","a"] term4 :: Parser Char Expr term4 = (factor4 &&& \lf -> hterm4 &&& \lts -> result (buildExpr Mult (lf:lts))) .(lacontrol ["(","a"]) -- G -> *FG | eps, Look-ahead: ["*"] | ["+",")",""] hterm4 :: Parser Char [Expr] hterm4 = lachoice [["*"],["+",")",""]] [ ((tok "*") &&& \c -> factor4 &&& \lf -> hterm4 &&& \lts -> result (lf:lts)), result []] -- F -> (E) | a, Look-ahead: ["("] | ["a"] factor4 :: Parser Char Expr factor4 = lachoice [["("], ["a"]] [((tok "(") &&& \c -> expr4 &&& \le -> (tok ")") &&& \c -> result le), (tok "a" &&& \ c -> result Factor)] parseRDla4 :: String -> Expr parseRDla4 s = case expr4 s of ((l,""):xs) -> l ((l,cs):xs) -> error ("Syntax error: expected: eof found:" ++show cs) _ -> error "Syntax error: closing bracket missing" --