-- Bestimmung der look-ahead Mengen 
-- fuer Kontextfreie Grammatiken

{-----------------------------------------
   Definition kontextfreier Grammatiken
------------------------------------------}


terminals :: [Char]
terminals = ['a'..'z'] ++ ['0'..'9'] ++ "()+-*"

nonterminals :: [Char]
nonterminals = ['A'..'Z']

-- CFG = (Nonterminals,Terminals,StartSymbol,Produktionen)
type CFG = (String, String, Char, Rules)
type Rules = [(Char,String)]
type Nonterminal = Char
type Terminal = Char

isCFG (ns,as,start,ps) = subseteq ns nonterminals && subseteq as terminals
                         && elem start ns && valid ps 
    where valid ps = subseteq (lhs ps) ns && subseteq (rhs ps) (ns++as)

-- lhs bildet Liste aller linken Regelseiten
lhs :: Rules -> String
lhs =  foldr ((:).fst) [] 
            
-- rhs bildet Liste aller rechten Regelseiten
rhs :: Rules -> String
rhs = foldr ((++).snd) []

-- rules bestimmt Regeln zu Grammatik
rules :: CFG -> Rules
rules (ns,cs,s,ps) = ps


-- Bestimmung des Startsymbols
startsymbol :: CFG -> Nonterminal
startsymbol (ns,cs,s,ps) = s

-- Bestimmung der Liste der Nonterminale
nt :: CFG -> [Nonterminal]
nt (ns,cs,s,ps) = ns



-- Ausgabe einer CFG
showCFG :: CFG -> IO()
showCFG (ns,as,start,ps) 
 = putStr ("Nonterminale:  " ++ ns ++
      '\n':"Terminale:     " ++ as  ++
      '\n':"Startsymbol:   " ++ start :
      '\n':"Produktionen:  " ++ (showRules ps)) 

showRules        :: Rules -> String
showRules []     =  ['\n']
showRules (p:ps) = let (lhs,rhs) = p in
                   ('\n':lhs: " -> " ++ rhs) ++ showRules ps

-- Beispiel CFG
gae  = ("ETFHG","a()+*",'E',rgae)
rgae = [('E',"TH"),('H',"+TH"),('H',""),('T',"FG"),('G',"*FG"),('G',""),
        ('F',"(E)"),('F',"a")] 

gae2 = ("ETF","a()+*",'E',rgae2)
rgae2 = [('E',"E+T"),('E',"T"),('T',"F*T"),('T',"F"),('F',"(E)"),('F',"a")]


{----------------------------------------------
  Bestimmung der look-ahead Mengen
-----------------------------------------------}

fi :: CFG -> String -> [String] -- first Menge zu Satzform
fo :: CFG -> Char   -> [String] -- follow Menge zu Nonterminal

{- [String] zur Darstellung von epsilon als "", alle einzelnen Buchstaben
   werden entsprechend als einelementiger String dargestellt. -}
{- alte direkte Berechnung von first Mengen -}

fi g cs = removedups (fisave g cs [])
-- Um Linksrekursion abzufangen, wird im zusaetzlichen Argument alc 
-- (initialisiert mit []) protokolliert, von welchen Nonterminalen die
-- first-Symbole schon eingegangen sind.

fisave :: CFG -> String -> String -> [String]
fisave g "" alc = [""]
fisave g@(ns,as,start,ps)  [a] alc
    | elem a as = [[a]]
    | elem a ns = derivefi g a alc
    | otherwise = error "symbol not allowed in fi appl."
fisave g (c:cs) alc = checkForEps (fisave g [c] alc) g cs (c:alc) 

checkForEps :: [String] -> CFG -> String -> String -> [String]
checkForEps fs g "" alc    = fs
checkForEps fs g (c:cs) alc 
          | elem ""  fs = (remove "" fs) ++ 
                          checkForEps (fisave g [c] alc) g cs (c:alc)
          | otherwise   = fs

derivefi :: CFG -> Char -> String -> [String]
derivefi g@(ns,as,s,ps) c alc 
   | elem c alc = []
   | otherwise  = concat [fisave g alpha (c:alc)| (n,alpha) <- ps, n==c]


-- Bestimmung von Follow Mengen fuer Nonterminale

fo g@(ns,cs,start,ps) n  | n==start  = "" : derivefo g ps start [] [n]
                           | elem n ns = derivefo g ps n  [] [n]
                           | otherwise = error "nonterminal required in fo appl."

-- alc : already controlled symbols, zur Behandlung von Rekursion
derivefo :: CFG -> Rules -> Char -> [String] -> String -> [String]
derivefo g []     n fs alc = removedups fs
derivefo g (p:ps) n fs alc
    | elem n (snd p) = derivefo g ps n (fs ++ follows) newalc
    | otherwise      = derivefo g ps n fs alc
    where (follows,newalc) = collectfo g n (snd p) (fst p) alc

collectfo :: CFG -> Char -> String -> Char -> String -> ([String],String)
collectfo g n []     a alc = ([],alc)
collectfo g n (c:cs) a alc
     | n == c    = (fos, newalc) 
     | otherwise = collectfo g n cs a alc
     where fos = (if elem "" fics && not (elem a alc) 
                  then (remove "" fics) ++ focs 
                  else fics) ++ restlist
           (restlist,newalc) = collectfo g n cs a nextalc
           fics = fi g cs
           focs = derivefo g (rules g) a [] (a:alc) 
           nextalc = if elem "" fics && not (elem a alc) then a:alc else alc

-- Bestimmung der look-ahead Mengen
la :: CFG -> (Char,String) -> [String]
la g (a,alpha) = removedups (concat [ fi g (alpha ++ foA) | foA <- fo g a])


-- LL(1) Test fuer kontextfreie Grammatiken
ll1 :: CFG -> Bool
ll1 g = checkAlternatives (map fst ps)  (map (la g) ps)
      where ps = rules g

checkAlternatives :: String -> [[String]] -> Bool
checkAlternatives []     []       = True
checkAlternatives (n:ns) (la:las) = (check n la ns las) 
                                    && checkAlternatives ns las 

check :: Char -> [String] -> String -> [[String]] -> Bool
check n la [] [] = True
check n la (n':ns) (la':las) 
    | n == n' = (disjoint la la') && check n la ns las
    | otherwise = check n la ns las


-- Anzeige der La-Mengen

showLa :: CFG -> IO ()
showLa g = putStr (showRulesLa (rules g))
  where 
    showRulesLa        :: Rules -> String
    showRulesLa []     =  ['\n']
    showRulesLa (p:ps) = let (lhs,rhs) = p 
                             n = 10 - length rhs
                             blanks = replicate n ' '
                         in
                           ('\n':lhs: " -> " ++ rhs ++ blanks ++ 
                             concatMap (\ cs -> if null cs then " Eps " else cs) 
                                       (la g p)) 
                             ++ showRulesLa ps

----------------------------------------------
-- TD-Analyseautomat fuer LL(1)-Grammatik
----------------------------------------------}

action :: CFG -> String -> String -> Action
data Action = Rule String Int | Pop | Error | Accept
                deriving Show
action g@(ns,as,start,ps) [x]  [a] 
    | elem a ns           = findRule g a ps [x] 1
    | x == a && elem x as = Pop
    | otherwise           = Error
action g@(ns,as,start,ps) "" [a] = findRule g a ps "" 1 
action g "" ""  = Accept

findRule :: CFG -> Char -> Rules -> String -> Int -> Action
findRule g a (p:ps) x i 
    | fst p == a  && elem x (la g p) = Rule (snd p) i
    | otherwise                      = findRule g a ps x (i+1)
findRule g a [] x i = Error


type TDA = (String, String, [Int]) 

parseTDA :: CFG -> String -> [Int]
parseTDA g@(ns,as,start,ps) w = run g (w,[start],[])
    
run :: CFG -> TDA -> [Int]
run g (a:w,n:alpha,is) = case (action g [a] [n]) of  
                            Pop            -> run g (w,alpha,is)
                            (Rule beta i)  -> run g (a:w,beta++alpha,i:is)
                            x -> []
run g ("",n:alpha,is)  = case (action g "" [n]) of  
                            (Rule beta i) -> run g ("",beta++alpha,i:is)
                            x -> []
run g ("","",is) = reverse is
run g tda = []


step :: CFG -> TDA -> TDA
step g tda@(a:w,n:alpha,is) = case (action g [a] [n]) of  
                            Pop            -> (w,alpha,is)
                            (Rule beta i)  -> (a:w,beta++alpha,i:is)
                            x -> tda
step g tda@("",n:alpha,is)  = case (action g "" [n]) of  
                            (Rule beta i) ->  ("",beta++alpha,i:is)
                            x -> tda
step g ("","",is) =  ("","",is) 
step g tda = tda


stateSeq g w = iterate (step g) (w,[s],[])
               where s = startsymbol g

showSeq :: [TDA] -> String
showSeq (st:sts) = show st ++ '\n':showSeq sts
showSeq []       = []

showTDASeq g w = putStr (showSeq (stopSeq (stateSeq g w)))

-- cuts a sequence when a fixpoint is reached
stopSeq :: Eq a => [a] -> [a] 
stopSeq (x1:x2:xs) | x1==x2    = [x1]
                   | otherwise = x1:stopSeq (x2:xs)



main = showTDASeq gae "(a*a)"

{--------------------
-- Hilfsfunktionen --
--------------------}

-- entferne Duplikate aus Liste
removedups :: Eq a => [a] -> [a]
removedups []     = []
removedups (x:xs) = x:removedups(remove x xs)


-- entferne Element aus Liste
remove :: Eq a => a -> [a] -> [a]
remove x [] = []
remove x (y:ys) | x==y      = remove x ys
                | otherwise = y: remove x ys




-- subseteq testet, ob jedes Element der ersten Liste 
-- auch in der zweiten Liste vorkommt

subseteq           :: Eq a => [a] -> [a] -> Bool
subseteq [] ys     = True
subseteq (x:xs) [] = False
subseteq (x:xs) ys = elem x ys && subseteq xs ys



-- disjoint :: Eq a => [a] -> [a] -> Bool
disjoint [] ys = True
disjoint xs [] = True
disjoint (x:xs) ys = not (elem x ys) && disjoint xs ys
--