----------------------------------------- -- Programm "Erstellen eines Indexes" -- ----------------------------------------- -- einige Hilfsdefinitionen infixl 9 >.> -- Deklaration eines linksassoziativen Operators >.> -- mit Bindungsst"arke 9 (>.>) :: (a -> b) -> (b -> c) -> (a -> c) g >.> f = f . g type Doc = String type Wort = String -- Achtung: Word vordefiniert als "unsigned int" type Line = String ------------------------ -- Die Hauptfunktion -- ------------------------ makeIndex :: Doc -> [(Wort,[Int])] makeIndex = splitup -- Doc -> [Line] >.> numLines -- [Line] -> [(Int,Line)] >.> allNumWords -- [(Int,Line)] -> [(Int,Wort)] >.> sortList -- [(Int,Wort)] -> [(Wort,Int)] >.> amalgamate -- [(Wort,Int)] -> [(Wort,[Int])] >.> shorten -- [(Wort,[Int])] -> [(Wort,[Int])] ----------------------------------------------------- -- Teilschritt 1: Aufteilen des Textes in Zeilen -- ----------------------------------------------------- splitup :: Doc -> [Line] splitup = lines --------------------------------------------- -- Teilschritt 2: Nummerierung der Zeilen -- --------------------------------------------- numLines :: [Line] -> [(Int,Line)] numLines listls = zip [1 .. length listls] listls ----------------------------------------------- -- Teilschritt 3: Zeilennummern zu W"ortern -- ----------------------------------------------- allNumWords :: [(Int,Line)] -> [(Int,Wort)] allNumWords = foldr (++) [] . map numWords numWords :: (Int,Line) -> [(Int,Wort)] numWords (nr, line) = map (\ wd -> (nr,wd)) (splitWords line) splitWords :: Line -> [Wort] splitWords = words ------------------------------------------------------------- -- Teilschritt 4: Sortieren nach Worten und Zeilennummern -- ------------------------------------------------------------- sortList :: [(Int,Wort)] -> [(Wort,Int)] sortList = map (\ (x,y) -> (y,x)) . genQSort comparePair comparePair :: (Int,Wort) -> (Int,Wort) -> Bool comparePair (n1,w1) (n2,w2) = (w1 < w2) || (w1 == w2 && n1 <= n2) genQSort :: (a -> a -> Bool) -> [a] -> [a] genQSort cp [] = [] genQSort cp (x:xs) = genQSort cp [y | y <- xs, cp y x] ++ x: genQSort cp [y | y <- xs, not(cp y x)] --------------------------------------------------------------- -- Teilschritt 5: Verschmelzen der Zeilenangaben zu Listen -- --------------------------------------------------------------- amalgamate :: [(Wort,Int)] -> [(Wort,[Int])] amalgamate = makeLists >.> concatLists >.> removeDups makeLists :: [(Wort,Int)] -> [(Wort,[Int])] makeLists = map (\ (w,n) -> (w,[n])) concatLists :: [(Wort,[Int])] -> [(Wort,[Int])] concatLists [] = [] concatLists [pair] = [pair] concatLists ((w1,l1):(w2,l2):ps) | w1 == w2 = concatLists ((w1,l1++l2):ps) | otherwise = (w1,l1) : concatLists ((w2,l2):ps) removeDups :: [(Wort,[Int])] -> [(Wort,[Int])] removeDups = map (\ (w,l) -> (w, rmDups l)) where rmDups :: [Int] -> [Int] rmDups [] = [] rmDups [x] = [x] rmDups (x:y:ys) | x==y = rmDups (y:ys) | otherwise = x : rmDups (y:ys) -------------------------------------------------------- -- Teilschritt 6: Selektieren der Schluesselwoerter -- -------------------------------------------------------- shorten :: [(Wort,[Int])] -> [(Wort,[Int])] shorten = filter (\ (w,l) -> length w >= 5) ------------------- -- Beispieltext -- ------------------- text = "O Tannenbaum O Tannenbaum" ++ ['\n'] ++ "Wie gruen sind deine Blaetter" ++ ['\n'] ++ "Du gruenst nicht nur zur Sommerzeit" ++ ['\n'] ++ "Nein auch im Winter wenn es schneit" ++ ['\n'] ++ "O Tannenbaum O Tannenbaum" ++ ['\n'] ++ "Wie gruen sind deine Blaetter" -- Pretty Printer fuer Index ppIndex :: String -> IO () ppIndex text = putStr (unlines (map makeLine index)) where index = makeIndex text maxlength = foldr1 max (map (\ (w,_) -> length w) index) + 3 makeLine (w,is) = w ++ (replicate (maxlength-length w) ' ') ++ show is