{-# LANGUAGE ScopedTypeVariables #-}
module Main where
import Data.List
import Debug.Trace
import Control.Parallel.Eden
import Control.Parallel.Eden.Auxiliary
import Control.Monad
import System.Environment
-- gentlemans algorithm uses a torus to multiply 2 matrices
-- see chapter 4 in http://www.ics.uci.edu/~bic/messengers/papers/ICPP05.pdf for a description of the algorithm.
gentlemanMul :: forall a . (Num a,Trans a)
=> (([a] -> [a]
-> ([a],[a],a)) -- ^worker function (applied to each torus element)
-> [[a]] -- ^torus input (left rotate)
-> [[a]] -- ^torus input (up rotate)
-> [[a]]) -- ^torus result
-> [[a]] -> [[a]] -> [[a]]
gentlemanMul torus ass bss = torus f ass' bss' where
-- function on torus elements
f :: [a] -> [a] -> ([a],[a],a)
f fromR fromD = (take n fromR, take n fromD, out) where
out = foldl1 (+) $ zipWith (*) fromR fromD
n = (length bss -1)
-- pre-rotation
ass' = zipWith leftRotIXs [0..] ass
bss' = transpose $ zipWith leftRotIXs [0..] (transpose bss)
leftRotIXs i xs = xs2 ++ xs1 where
(xs1,xs2) = splitAt i xs
-- sequential torus with stream rotation in 2 directions: initial inputs for each torus element and each direction are given. The worker function is used on each torus element to transform a stream of inputs from each direction to a stream of outputs to each direction. The heads of the streams are the initial elements. The tail of the streams are the worker function results of the torus neighbors. Each torus input has to be of the same size in both dimensions, if not the smallest input will be taken as reference for the size of the torus.
torus :: forall lr ur res .
([lr] -> [ur] -> ([lr],[ur],res)) -- ^worker function (applied to each torus element)
-> [[lr]] -- ^torus input (left rotate)
-> [[ur]] -- ^torus input (up rotate)
-> [[res]] -- ^torus result
torus f lrInit urInit = res
where
initInpss = zipWith zip lrInit urInit
lazyInpss = zipWith zip fromR' fromD'
(toL,toU,res)
= unzip3 $
map unzip3 $
lazy2ZipWith (lazy2ZipWith wf) initInpss lazyInpss
wf (lrInit,urInit) ~(lrLazy,urLazy) = f (lrInit : lrLazy) (urInit : urLazy) --haskell blocks on the second tuple without the lazy pattern
-- 2 matrices of outgoing streams are rotated each one direction to build the 4 matrices of incoming streams
fromD' :: [[[ur]]]
fromD' = upRotXss toU
fromR' :: [[[lr]]]
fromR' = leftRotXss toL
leftRotXs :: [a] -> [a]
upRotXss :: [[a]] -> [[a]]
leftRotXss :: [[a]] -> [[a]]
leftRotXs [] = []
leftRotXs (x:xs) = xs ++ [x]
upRotXss = leftRotXs
leftRotXss = map leftRotXs
-- fetch and release the components of tuples
fetch2 :: (Trans a, Trans b)
=> (RD a,RD b)
-> (a,b)
fetch2 (a,b) = runPA $
do a' <- fetchPA a
b' <- fetchPA b
return (a',b')
release3 :: (Trans a, Trans b, Trans c)
=>(a,b,c)
-> (RD a,RD b,RD c)
release3 (a,b,c) = (release a, release b, release c)
main = do args <- getArgs
if ((length args)<2)
then putStr usage
else do let [version',xres'] = take 2 $ args
version = [torus]!!(read version')
xres = read xres'
genMX x y = unshuffle y [1..(x*y)]
inp = (genMX xres xres)
res = gentlemanMul version inp inp
print inp
print res
usage = "I need 2 parameters: \n"++
"1. version (0 sequentiell),\n"++
"2. resolution X\n"