{ -- module XMLSX_ where import Char import Numeric } -- der Scanner arbeitet monadisch und mit start codes , weil die -- Klassen CharData und Name nicht ohne Kontext zu unterscheiden sind. %wrapper "monad" -- $char -> ALLE! $char = $printable $digit = 0-9 $namechar = [a-zA-Z0-9_\.\-:] -- spezielle whitespaces @w = [\ \n\t\r]+ @name = [a-z_:] $namechar* @noTR = ~[\<\&\]] @chRef = ((\& \# $digit+ ) | ( \& \# "x" [0-9A-Fa-f]+ )) \; @eRef = "&" @name ";" @CData = @w* ( [^\]] | \] [^\]] | \] \] [^\>] )-- Aufgabe 8.3 a') @noCommentEnd = @w* ([^\-] | \- [^\-] ) xml :- <0> "" { tok $ \s l -> Comment (trim 4 s) } <0> "" { tok $ \s l -> CData (trim 9 s) } <0> \< \? "xml" @w "version" @w? \= @w? \" 1 \. 0 \" @w? \? \> { tok $ \s l -> XMLDecl } <0> " DocTypeDecl (drop 10 s)) `andBegin` prolog } @w SYSTEM @w \" [^\"]* \" { tok $ \s l -> ExtDecl ( tail $ dropWhile noQuote $ init s) } @w* ">" { (tok $ \s l -> TagClose) `andBegin` 0} @w* "/>" { (tok $ \s l -> TagEmptyClose) `andBegin` 0} <0> " TagEnd (drop 2 s) ) `andBegin` tag } <0> "<" @name { (tok $ \s l -> TagStart (tail s)) `andBegin` tag } @w+ @name @w? \= @w? \" ([^\<\&\"] | @chRef | @eRef)* \" { tok $ \s l -> Attribute (extractAtt s) } <0> @chRef { tok $ \s l -> CharRef (readNum (init (drop 2 s))) } <0> @eRef { tok $ \s l -> EntityRef (init (tail s)) } <0> ((@w)? (@noTR | \] @noTR | \]\] [^\>])+ )+ { tok $ \s l -> CharData s} <0,tag,prolog> \n+ ; <0> @w { tok $ \s l -> WhitePlus } { -- Token-Definition data Token = Comment String | CData String | ExtDecl String | XMLDecl -- immer: :-) | DocTypeDecl String | CharData String | TagStart String | TagEnd String | TagClose | TagEmptyClose | Attribute (String, String) | CharRef Int | EntityRef String | WhitePlus | EOF deriving Show instance Eq Token where (==) EOF EOF = True (==) _ _ = False -- bei Bedarf selbst schreiben! readNum :: String -> Int readNum ('x':hex) = fst (head (readHex hex)) readNum dec = read dec extractAtt :: String -> (String, String) extractAtt s = (name, value) where name = takeWhile (not . isSpace) (takeWhile (/= '=' ) s') value= takeWhile noQuote (tail (dropWhile noQuote s)) -- fehlte: führende Leerzeichen von s abschneiden (stets vorhanden) s' = dropWhile isSpace s noQuote :: Char -> Bool noQuote = (/=) '"' trim :: Int -> String -> String trim n s = drop n s' where s' = reverse (drop 3 (reverse s)) ---------------------------------------------- -- monadischer Scanner mit Zustaenden... Hilfsfunktionen alexEOF :: Alex Token alexEOF = return EOF alexScanTokens str = case (runAlex str $ collect []) of Left msg -> error msg Right tks -> tks collect :: [Token] -> Alex [Token] collect tks = do tok <- monadScan if tok == EOF then return (reverse tks) else collect (tok:tks) -- from monad wrapper, error message displays offending input monadScan = do inp <- alexGetInput sc <- alexGetStartCode case alexScan inp sc of AlexEOF -> alexEOF AlexError inp' -> alexError $ "lexical error reading " ++ show (inp) ++ "..." AlexSkip inp' len -> do alexSetInput inp' monadScan AlexToken inp' len action -> do alexSetInput inp' action inp len -- from monad wrapper, doznwork: -- token :: (String -> Int -> token) -> AlexAction token -- token t input len = return (t input len) tok :: (String -> Int -> Token) -> AlexInput -> Int -> Alex Token tok f (pos,_,str) l = return (f (take l str) l) }