-----------------------------------------
-- Programm "Erstellen eines Indexes"  --
-----------------------------------------

-- einige Hilfsdefinitionen

infixl 9 >.>  -- Deklaration eines linksassoziativen Operators >.>
              -- mit Bindungsst"arke 9

(>.>) :: (a -> b) -> (b -> c) -> (a -> c)
g >.> f = f . g

type Doc   = String
type Wort  = String  -- Achtung: Word vordefiniert als "unsigned int"
type Line  = String

------------------------
-- Die Hauptfunktion  --
------------------------    

makeIndex ::   Doc -> [(Wort,[Int])]
makeIndex =    splitup       --  Doc            -> [Line] 
          >.>  numLines      --  [Line]         -> [(Int,Line)]
          >.>  allNumWords   --  [(Int,Line)]   -> [(Int,Wort)]
          >.>  sortList      --  [(Int,Wort)]   -> [(Wort,Int)]
          >.>  amalgamate    --  [(Wort,Int)]   -> [(Wort,[Int])]
          >.>  shorten       --  [(Wort,[Int])]   -> [(Wort,[Int])]


-----------------------------------------------------
-- Teilschritt 1: Aufteilen des Textes in Zeilen   --
-----------------------------------------------------

splitup :: Doc -> [Line] 
splitup =  lines

---------------------------------------------
-- Teilschritt 2: Nummerierung der Zeilen  --
---------------------------------------------

numLines :: [Line] -> [(Int,Line)]
numLines listls = zip [1 .. length listls] listls



-----------------------------------------------
-- Teilschritt 3: Zeilennummern zu W"ortern  --
-----------------------------------------------

allNumWords :: [(Int,Line)] -> [(Int,Wort)]
allNumWords  = foldr (++) [] . map numWords

numWords :: (Int,Line) -> [(Int,Wort)]
numWords    (nr, line)  = map (\ wd -> (nr,wd)) (splitWords line)


splitWords :: Line   -> [Wort]
splitWords =  words

-------------------------------------------------------------
-- Teilschritt 4: Sortieren nach Worten und Zeilennummern  --
-------------------------------------------------------------

sortList :: [(Int,Wort)] -> [(Wort,Int)] 
sortList =  map (\ (x,y) -> (y,x)) . genQSort comparePair 

comparePair                 :: (Int,Wort) -> (Int,Wort) -> Bool
comparePair (n1,w1) (n2,w2) =  (w1 < w2) || (w1 == w2 && n1 <= n2)  

genQSort :: (a -> a -> Bool) -> [a]    -> [a]
genQSort  cp  []      = []
genQSort  cp  (x:xs)  = genQSort cp [y | y <- xs, cp y x] ++
                        x: genQSort cp [y | y <- xs, not(cp y x)]


---------------------------------------------------------------
-- Teilschritt 5: Verschmelzen der Zeilenangaben zu Listen   --
---------------------------------------------------------------

amalgamate :: [(Wort,Int)] -> [(Wort,[Int])]
amalgamate =  makeLists >.> concatLists >.> removeDups

makeLists :: [(Wort,Int)] -> [(Wort,[Int])]
makeLists =  map (\ (w,n) -> (w,[n])) 

concatLists             :: [(Wort,[Int])] -> [(Wort,[Int])]
concatLists []          =  []
concatLists [pair]      =  [pair]
concatLists ((w1,l1):(w2,l2):ps) 
            | w1 == w2  =  concatLists ((w1,l1++l2):ps) 
            | otherwise =  (w1,l1) : concatLists ((w2,l2):ps)

removeDups :: [(Wort,[Int])] -> [(Wort,[Int])]
removeDups =  map (\ (w,l) -> (w, rmDups l))
              where rmDups          :: [Int] -> [Int]
                    rmDups []       =  []
                    rmDups [x]      =  [x]
                    rmDups (x:y:ys) 
			| x==y      =  rmDups (y:ys)
                        | otherwise =  x : rmDups (y:ys)       


--------------------------------------------------------
-- Teilschritt 6: Selektieren der Schluesselwoerter   --
--------------------------------------------------------

shorten :: [(Wort,[Int])] -> [(Wort,[Int])] 
shorten =  filter (\ (w,l) -> length w >= 5)


-------------------
-- Beispieltext  --
-------------------


text = "O Tannenbaum O Tannenbaum" ++ ['\n'] ++
       "Wie gruen sind deine Blaetter" ++ ['\n'] ++
       "Du gruenst nicht nur zur Sommerzeit" ++ ['\n'] ++
       "Nein auch im Winter wenn es schneit" ++ ['\n'] ++
       "O Tannenbaum O Tannenbaum" ++ ['\n'] ++
       "Wie gruen sind deine Blaetter" 
       

-- Pretty Printer fuer Index 
ppIndex       :: String -> IO ()
ppIndex text =  putStr (unlines (map makeLine index))
     where 
     	index     = makeIndex text
	maxlength = foldr1 max (map (\ (w,_) -> length w) index) + 3
	makeLine (w,is) = w ++ (replicate (maxlength-length w) ' ') 
				++ show is 
				
