fork download
  1. -- CountingPagedSoE.hs
  2. -- from http://w...content-available-to-author-only...l.org/haskellwiki/Prime_numbers#Using_ST_Array
  3. -- but with very many improvements including a very fast count function
  4. -- applied as an answer to: http://stackoverflow.com/a/20884433/549617
  5.  
  6. {-# OPTIONS -O3 -rtsopts #-} -- -fllvm ide.com doesn't support LLVM
  7.  
  8. import Data.Word
  9. import Data.Bits
  10. import Control.Monad
  11. import Control.Monad.ST
  12. import Data.Array.ST (runSTUArray)
  13. import Data.Array.Unboxed
  14. import Data.Array.Base
  15.  
  16. pgSZBTS = (2^18) * 8 :: Int -- size of L2 data cache
  17.  
  18. type PrimeType = Word32
  19. type Chunk = UArray PrimeType Bool
  20.  
  21. -- makes a new page chunk and culls it
  22. -- if the base primes list provided is empty then
  23. -- it uses the current array as source (for zero page base primes)
  24. mkChnk :: Word32 -> [Chunk] -> Chunk
  25. mkChnk low bschnks = runSTUArray $ do
  26. let nxt = (fromIntegral low) + (fromIntegral pgSZBTS)
  27. buf <- nxt `seq` newArray (fromIntegral low, fromIntegral nxt - 1) True
  28. let cull ~(p:ps) =
  29. let bp = (fromIntegral p)
  30. i = (bp - 3) `shiftR` 1
  31. s = bp * (i + 1) + i in
  32. let cullp j = do
  33. if j >= pgSZBTS then cull ps
  34. else do
  35. unsafeWrite buf j False
  36. cullp (j + (fromIntegral p)) in
  37. when (s < nxt) $ do
  38. let strt = do
  39. if s >= low then fromIntegral (s - low)
  40. else do
  41. let b = (low - s) `rem` bp
  42. if b == 0 then 0 else fromIntegral (bp - b)
  43. cullp strt
  44. case bschnks of
  45. [] -> do bsbf <- unsafeFreezeSTUArray buf
  46. cull (listChnkPrms [bsbf])
  47. _ -> cull $ listChnkPrms bschnks
  48. return buf
  49.  
  50. -- creates a page chunk list starting at the lw value
  51. chnksList :: Word32 -> [Chunk]
  52. chnksList lw =
  53. mkChnk lw basePrmChnks : chnksList (lw + fromIntegral pgSZBTS)
  54.  
  55. -- converts a page chunk list to a list of primes
  56. listChnkPrms :: [Chunk] -> [PrimeType]
  57. listChnkPrms [] = []
  58. listChnkPrms ~(hdchnk@(UArray lw _ rng _):tlchnks) =
  59. let nxtp i =
  60. if i >= rng then [] else
  61. if unsafeAt hdchnk i then
  62. (case ((lw + fromIntegral i) `shiftL` 1) + 3 of
  63. np -> np) : nxtp (i + 1)
  64. else nxtp (i + 1) in
  65. (hdchnk `seq` lw `seq` nxtp 0) ++ listChnkPrms tlchnks
  66.  
  67. -- the base page chunk list used to cull the higher order pages,
  68. -- note that it has special treatment for the zero page.
  69. -- It is more space efficient to store this as chunks rather than
  70. -- as a list of primes or even a list of deltas (gaps), with the
  71. -- extra processing to convert as needed not too much.
  72. basePrmChnks :: [Chunk]
  73. basePrmChnks = mkChnk 0 [] : chnksList (fromIntegral pgSZBTS)
  74.  
  75. -- the full list of primes could be accessed with the following function.
  76. primes :: () -> [PrimeType]
  77. primes () = 2 : (listChnkPrms $ chnksList 0)
  78.  
  79. -- a quite fast prime counting up to the given limit using
  80. -- chunk processing to avoid innermost list processing loops.
  81. countPrimesTo :: PrimeType -> Int
  82. countPrimesTo limit =
  83. let lmtb = (limit - 3) `div` 2 in
  84. let sumChnks acc chnks@(chnk@(UArray lo hi rng _):chnks') =
  85. let cnt :: UArray PrimeType Word32 -> Int
  86. cnt bfw =
  87. case if lmtb < hi then fromIntegral (lmtb - lo) else rng of
  88. crng -> case crng `shiftR` 5 of
  89. rngw ->
  90. let cnt' i ac =
  91. ac `seq` if i >= rngw then
  92. if (i `shiftL` 5) >= rng then ac else
  93. case (-2) `shiftL` fromIntegral (lmtb .&. 31) of
  94. msk -> msk `seq`
  95. case (unsafeAt bfw rngw) .&.
  96. (complement msk) of
  97. bts -> bts `seq` case popCount bts of
  98. c -> c `seq` case ac + c of nac -> nac
  99. else case ac + (popCount $ unsafeAt bfw i) of
  100. nacc -> nacc `seq` cnt' (i + 1) (nacc)
  101. in cnt' 0 0 in
  102. acc `seq` case runST $ do -- make UArray _ Bool into a UArray _ Word32
  103. stbuf <- unsafeThawSTUArray chnk
  104. stbufw <- castSTUArray stbuf
  105. bufw <- unsafeFreezeSTUArray stbufw
  106. return $ cnt bufw of
  107. c -> c `seq` case acc + c of
  108. nacc -> nacc `seq` if hi >= lmtb then nacc
  109. else sumChnks nacc chnks' in
  110. if limit < 2 then 0 else if limit < 3 then 1 else
  111. lmtb `seq` sumChnks 1 (chnksList 0)
  112.  
  113. main = do
  114. x <- read `fmap` getLine -- 1mln 2mln 10mln 100mln 1000mln
  115. -- 0.02 0.03 0.06 0.45 4.60 seconds
  116. -- 7328 7328 8352 8352 9424 Kilobytes
  117. -- this takes 14.34 seconds and 9424 Kilobytes to 3 billion,
  118. -- and 9.12 seconds for 3 billion on an i7-2700K (3.5 GHz)
  119. -- The above ratio of about 1.6 is much better than usual due to
  120. -- the extremely low memory use of the page segmented algorithm.
  121. -- It seems thaat the Windows Native Code Generator (NCG) from GHC
  122. -- is particularly poor, as the Linux 32-bit version takes
  123. -- less than two thirds of the time for exactly the same program...
  124. print $ countPrimesTo x
  125. -- print $ length $ takeWhile ((>=) x) $ primes () -- the slow way to do this
Success #stdin #stdout 14.31s 9424KB
stdin
3000000000
stdout
144449537