fork download
  1. {-# GHC_OPTIONS -O2 #-}
  2. {-# LANGUAGE BangPatterns #-}
  3.  
  4. import Data.List (foldl')
  5.  
  6. main = do
  7. a <- getLine ; b <- getLine ;
  8. print $ test2 (read a) (read b)
  9.  
  10.  
  11. testR m n = foldr f (0,[]) -- testR 1000 1000 -- causes stack overflow
  12. [ (c, [(i,j) | (i+j) == d ])
  13. | i<- [0..m], j<-[0..n],
  14. let c = if (rem j 3) == 0 then 2 else 1 ]
  15. where d = m + n - 3
  16. f (!c1, []) (!c, h) = (c1+c,h)
  17. f (!c1, (!x :_)) (!c, h) = (c1+c,x:h)
  18.  
  19.  
  20.  
  21. {- using
  22. main = do [a,b]<- fmap read getLine ; print $ test a b
  23.  
  24. SHOOTS time and memory consumption UP by full 10% - 15% for some reason
  25. but that's besides the point
  26.  
  27. ----- I'm trying to encode simple nested loop: -----
  28.  
  29. test m n = let d = m+n-3 ; c = 0 ; pairs = [] in -- calculate some value and also
  30. for i in [0..m] -- selectively collect few results
  31. for j in [0..n] -- into a short buffer
  32. c += (j%3)==0 ? 2 : 1 -- increment counter
  33. if (i+j == d) append (i,j) AtEndOf pairs -- grow buffer at end, conditionally,
  34. return (c,pairs) -- by some rarily-succeeding test
  35.  
  36. ----- thus only the final value of c is defined
  37. ----- but buffer may be consumed lazily, one-by-one, while being built
  38.  
  39. -- so we have here two values calculated in one go, in parallel -
  40. -- where one is built from the top down (buffer),
  41. -- and the other from the bottom up (counter)
  42. -}
  43.  
  44. test m n = let d = m + n - 3 in -- build a VERY short test buffer
  45. cross (sum, concat) $ unzip
  46. [ (c, [(i,j) | (i+j) == d ]) -- sample "rare" test
  47. | i<- [0..m], j<-[0..n],
  48. let c = if (rem j 3) == 0 then 2 else 1 ]
  49.  
  50. cross (f,g) (x,y) = (f x,g y)
  51. -- cross (f,g) x = (f $ fst x, g $ snd x)
  52.  
  53. sum' xs = foldl' (\ !a b -> a+b) 0 xs
  54.  
  55. {-
  56. > test 1000 1000 => (1336335,[(997,1000),(998,999),(999,998),(1000,997)])
  57.   => time: 0.56s memory: *** 45,728 kB ***
  58. > test 2000 2000 => (5338668,[(1997,2000),(1998,1999),(1999,1998),(2000,1997)])
  59.   => time: 2.51s memory: *** 222,848 kB ***
  60.  
  61. ;; nested loop in CLISP as control case
  62. ;; takes constant space, as it should: -- http://ideone.com/ziw12 --
  63.  
  64. (test 1000 1000) => Run time: 0.336949 sec. Space: 5832 Bytes
  65. (test 2000 2000) => Run time: 1.37579 sec. Space: 5832 Bytes
  66.  
  67.  
  68.   so clearly much interim storage is used unnecessarily in Haskell
  69.  
  70.   using FOLD or fused auxiliary functions does not help
  71.   (not even FOLDL' works,
  72.   which can only build whole buffer at once, in reversed order anyway)
  73.   unless we use BANG_PATTERNS which are not even a part of the base language (!!!?!)
  74. -}
  75.  
  76. -- here's unzip definition fused into it:
  77.  
  78. test1 m n = cross (sum,id) $ foldr f ([0],[]) -- foldr f (0,[])
  79. [ (c, [(i,j) | (i+j) == d ])
  80. | i<- [0..m], j<-[0..n],
  81. let c = if (rem j 3) == 0 then 2 else 1 ]
  82. where d = m + n - 3
  83. f (c1,h1) ~(c,h) = case h1 of []-> (c1:c,h) ; (x:_)->(c1:c,x:h) -- 0.32s-21.2MB / 1.24s-53.9MB
  84. -- f (c1,h1) ~(c,h) = (c1:c, case h1 of []-> h ; (x:_)-> x:h) -- 0.62s-63.1MB / 2.52s-245.4MB
  85. -- f (c1,h1) ~(c,h) = case h1 of []-> (c1+c,h) ; (x:_)->(c1+c,x:h) -- **0.77s-42.7MB STACK_OVER**
  86. -- f (c1,h1) ~(c,h) = case h1 of []-> (c+c1,h) ; (x:_)->(c+c1,x:h) -- **0.90s-48.8MB STACK_OVER**
  87. -- f (c1,h1) ~(c,h) = (c+c1, case h1 of []-> h ; (x:_)-> x:h) -- **0.94s-61.1MB STACK_OVER**
  88.  
  89. -- so here clearly the buffer is recognized as being built lazily,
  90. -- and the space is wasted in the interim list to be reduced by 'sum',
  91. -- but when we try to fuse the definition of sum into it (as evident above)
  92. -- STACK OVERFLOW follows (!?!??)
  93.  
  94. -- there shouldn't be any unnecessary interim storage used,
  95. -- explicit lists nor implicit stack frames;
  96. -- still memory leak's there in the best case, even whe we fuse
  97. -- the foldr definition itself, unless we use BANG PATTERNS:
  98.  
  99. test2 m n = f 0 0 0
  100. where d = m + n - 3
  101. f i j !c -- !accumulate the counter
  102. | j > n && i >= m = (c,[])
  103. | j > n = f (i+1) 0 c
  104. | True = let (c',h') = f i (j+1) (c + if (rem j 3) == 0 then 2 else 1)
  105. in if (i+j) == d then (c', (i,j):h') else (c',h')
  106.  
  107. -- (without bang pattern it's a stack overflow!)
  108. -- NB with (c', if (i+j) == d then (i,j):h' else h') there's still a leak there
  109.  
  110. {- test2: now constant, near-zero space
  111.  
  112. 1000,1000 => (1336335,[(997,1000),(998,999),(999,998),(1000,997)])
  113.   time: 0.19s memory: 4764 kB
  114.  
  115. 2000,2000 => (5338668,[(1997,2000),(1998,1999),(1999,1998),(2000,1997)])
  116.   time: 0.76s memory: 4764 kB
  117. -}
  118.  
  119. test3 m n = f 0 0 0 [] -- accum_build the list,
  120. where d = m + n - 3 -- but in reversed order, and unnecessarily strictly
  121. f i j !c !h
  122. | j > n && i >= m = (c,h)
  123. | j > n = f (i+1) 0 c h
  124. | True = f i (j+1) (c + if (rem j 3) == 0 then 2 else 1)
  125. (if (i+j) == d then (i,j):h else h)
  126.  
  127. -- for 1000,1000/2000,2000: time: 0.09s/0.37s memory: 3740 kB
  128. -- twice faster, 7/9 the memory footprint
  129.  
  130.  
  131. -- here's foldl reformulation of test3
  132. test4 m n = foldl f (0,[])
  133. [ (c, [(i,j) | (i+j) == d ])
  134. | i<- [0..m], j<-[0..n],
  135. let c = if (rem j 3) == 0 then 2 else 1 ]
  136. where d = m + n - 3
  137. f (!c,!h) (c1,h1) = case h1 of []->(c+c1, h) ; (x:_)->(c+c1, x:h)
  138.  
  139. -- for 1000,1000/2000,2000: time: 0.12s/0.45s memory: 3740 kB
  140. -- no bang-pats -> stack overflow
  141. -- FOLDL' instead of FOLDL, w/out the bang -> 0.47s-74.4MB STACK OVERFLOW
Success #stdin #stdout 0.21s 4744KB
stdin
1000
1000
stdout
(1336335,[(997,1000),(998,999),(999,998),(1000,997)])