fork(2) download
  1. {-# GHC_OPTIONS -O2 #-}
  2. {-# LANGUAGE BangPatterns #-}
  3.  
  4. -- import Data.List (foldl')
  5.  
  6. main = do
  7. a <- getLine ; b <- getLine ;
  8. print $ test2 (read a) (read b)
  9.  
  10. {- using
  11.   main = do [a,b]<- fmap read getLine ; print $ test a b
  12.  
  13.   SHOOTS time and memory consumption UP by full 10% - 15% for some reason
  14.   but that's besides the point
  15.  
  16. ----- I'm trying to encode simple nested loop: -----
  17.  
  18.  test m n = let d = m+n-3 ; c = 0 ; pairs = [] in -- calculate some value and also
  19.   for i in [0..m] -- selectively collect few results
  20.   for j in [0..n] -- into a short buffer
  21.   c += (j%3)==0 ? 2 : 1 -- increment counter
  22.   if (i+j == d) append (i,j) AtEndOf pairs -- grow buffer at end, conditionally,
  23.   return (c,pairs) -- by some rarily-succeeding test
  24.  
  25. ----- thus only the final value of c is defined
  26. ----- but buffer may be consumed lazily, one-by-one, while being built
  27.  
  28. -- so we have here two values calculated in one go, in parallel -
  29. -- where one is built from the top down (buffer),
  30. -- and the other from the bottom up (counter)
  31. -}
  32.  
  33. test m n = let d = m + n - 3 in -- build a VERY short test buffer
  34. cross (sum,concat) $ unzip
  35. [ (c, [(i,j) | (i+j) == d ]) -- sample "rare" test
  36. | i<- [0..m], j<-[0..n],
  37. let c = if (rem j 3) == 0 then 2 else 1 ]
  38.  
  39. cross (f,g) (x,y) = (f x,g y)
  40.  
  41. {-
  42. > test 1000 1000 => (1336335,[(997,1000),(998,999),(999,998),(1000,997)])
  43.   => time: 0.56s memory: *** 45,728 kB ***
  44. > test 2000 2000 => (5338668,[(1997,2000),(1998,1999),(1999,1998),(2000,1997)])
  45.   => time: 2.51s memory: *** 222,848 kB ***
  46.  
  47. ;; nested loop in CLISP as control case
  48. ;; takes constant space, as it should: -- http://ideone.com/ziw12 --
  49.  
  50. (test 1000 1000) => Run time: 0.336949 sec. Space: 5832 Bytes
  51. (test 2000 2000) => Run time: 1.37579 sec. Space: 5832 Bytes
  52.  
  53.  
  54.   so clearly much interim storage is used unnecessarily in Haskell
  55.  
  56.   using FOLD or fused auxiliary functions does not help
  57.   (not even FOLDL' works,
  58.   which can only build whole buffer at once, in reversed order anyway)
  59.   unless we use BANG_PATTERNS which are not even a part of the base language (!!!?!)
  60. -}
  61.  
  62. -- here's unzip definition fused into it:
  63.  
  64. test1 m n = cross (sum,id) $ foldr f ([0],[]) -- foldr f (0,[])
  65. [ (c, [(i,j) | (i+j) == d ])
  66. | i<- [0..m], j<-[0..n],
  67. let c = if (rem j 3) == 0 then 2 else 1 ]
  68. where d = m + n - 3
  69. f (c1,h1) ~(c,h) = case h1 of []-> (c1:c,h) ; (x:_)->(c1:c,x:h) -- 0.32s-21.2MB / 1.24s-53.9MB
  70. -- f (c1,h1) ~(c,h) = (c1:c, case h1 of []-> h ; (x:_)-> x:h) -- 0.62s-63.1MB / 2.52s-245.4MB
  71. -- f (c1,h1) ~(c,h) = case h1 of []-> (c1+c,h) ; (x:_)->(c1+c,x:h) -- **0.77s-42.7MB STACK_OVER**
  72. -- f (c1,h1) ~(c,h) = case h1 of []-> (c+c1,h) ; (x:_)->(c+c1,x:h) -- **0.90s-48.8MB STACK_OVER**
  73. -- f (c1,h1) ~(c,h) = (c+c1, case h1 of []-> h ; (x:_)-> x:h) -- **0.94s-61.1MB STACK_OVER**
  74.  
  75. -- so here clearly the buffer is recognized as being built lazily,
  76. -- and the space is wasted in the interim list to be reduced by 'sum',
  77. -- but when we try to fuse the definition of sum into it (as evident above)
  78. -- STACK OVERFLOW follows (!?!??)
  79.  
  80. -- there shouldn't be any unnecessary interim storage used,
  81. -- explicit lists nor implicit stack frames;
  82. -- still memory leak's there in the best case, even whe we fuse
  83. -- the foldr definition itself, unless we use BANG PATTERNS:
  84.  
  85. test2 m n = f 0 0 0
  86. where d = m + n - 3
  87. f i j !c -- !accumulate the counter
  88. | j > n && i >= m = (c,[])
  89. | j > n = f (i+1) 0 c
  90. | True = let (c',h') = f i (j+1) (c + if (rem j 3) == 0 then 2 else 1)
  91. in if (i+j) == d then (c', (i,j):h') else (c',h')
  92.  
  93. -- (without bang pattern it's a stack overflow!)
  94. -- NB with (c', if (i+j) == d then (i,j):h' else h') there's still a leak there
  95.  
  96. {- test2: now constant, near-zero space
  97.  
  98. 1000,1000 => (1336335,[(997,1000),(998,999),(999,998),(1000,997)])
  99.   time: 0.19s memory: 4764 kB
  100.  
  101. 2000,2000 => (5338668,[(1997,2000),(1998,1999),(1999,1998),(2000,1997)])
  102.   time: 0.76s memory: 4764 kB
  103. -}
  104.  
  105. test3 m n = f 0 0 0 [] -- accum_build the list,
  106. where d = m + n - 3 -- but in reversed order, and unnecessarily strictly
  107. f i j !c !h
  108. | j > n && i >= m = (c,h)
  109. | j > n = f (i+1) 0 c h
  110. | True = f i (j+1) (c + if (rem j 3) == 0 then 2 else 1)
  111. (if (i+j) == d then (i,j):h else h)
  112.  
  113. -- for 1000,1000/2000,2000: time: 0.09s/0.37s memory: 3740 kB
  114. -- twice faster, 7/9 the memory footprint
  115.  
  116.  
  117. -- here's foldl reformulation of test3
  118. test4 m n = foldl f (0,[])
  119. [ (c, [(i,j) | (i+j) == d ])
  120. | i<- [0..m], j<-[0..n],
  121. let c = if (rem j 3) == 0 then 2 else 1 ]
  122. where d = m + n - 3
  123. f (!c,!h) (c1,h1) = case h1 of []->(c+c1, h) ; (x:_)->(c+c1, x:h)
  124.  
  125. -- for 1000,1000/2000,2000: time: 0.12s/0.45s memory: 3740 kB
  126. -- no bang-pats -> stack overflow
  127. -- FOLDL' instead of FOLDL, w/out the bang -> 0.47s-74.4MB STACK OVERFLOW
stdin
1000
1000
compilation info
[1 of 1] Compiling Main             ( prog.hs, prog.o )
Linking prog ...
stdout
(1336335,[(997,1000),(998,999),(999,998),(1000,997)])