-- CountingPagedSoE.hs
-- from http://w...content-available-to-author-only...l.org/haskellwiki/Prime_numbers#Using_ST_Array
-- but with very many improvements including a very fast count function
-- applied as an answer to: http://stackoverflow.com/a/20884433/549617
{-# OPTIONS -O3 -rtsopts #-} -- -fllvm ide.com doesn't support LLVM
import Data.Word
import Data.Bits
import Data.Array.ST (runSTUArray)
import Data.Array.Unboxed
import Data.Array.Base
pgSZBTS
= (2^18) * 8 :: Int -- size of L2 data cache
type PrimeType = Word32
type Chunk
= UArray PrimeType
Bool
-- makes a new page chunk and culls it
-- if the base primes list provided is empty then
-- it uses the current array as source (for zero page base primes)
mkChnk :: Word32 -> [Chunk] -> Chunk
mkChnk low bschnks = runSTUArray $ do
let cull ~(p:ps) =
i = (bp - 3) `shiftR` 1
s = bp * (i + 1) + i in
let cullp j = do
if j >= pgSZBTS then cull ps
else do
unsafeWrite buf j False
when (s < nxt) $ do
let strt = do
else do
let b
= (low
- s
) `
rem` bp
cullp strt
case bschnks of
[] -> do bsbf <- unsafeFreezeSTUArray buf
cull (listChnkPrms [bsbf])
_ -> cull $ listChnkPrms bschnks
-- creates a page chunk list starting at the lw value
chnksList :: Word32 -> [Chunk]
chnksList lw =
mkChnk lw basePrmChnks : chnksList
(lw
+ fromIntegral pgSZBTS
)
-- converts a page chunk list to a list of primes
listChnkPrms :: [Chunk] -> [PrimeType]
listChnkPrms [] = []
listChnkPrms ~(hdchnk@(UArray lw _ rng _):tlchnks) =
let nxtp i =
if i >= rng then [] else
if unsafeAt hdchnk i then
np -> np) : nxtp (i + 1)
else nxtp (i + 1) in
(hdchnk `
seq` lw `
seq` nxtp
0) ++ listChnkPrms tlchnks
-- the base page chunk list used to cull the higher order pages,
-- note that it has special treatment for the zero page.
-- It is more space efficient to store this as chunks rather than
-- as a list of primes or even a list of deltas (gaps), with the
-- extra processing to convert as needed not too much.
basePrmChnks :: [Chunk]
basePrmChnks
= mkChnk
0 [] : chnksList
(fromIntegral pgSZBTS
)
-- the full list of primes could be accessed with the following function.
primes :: () -> [PrimeType]
primes () = 2 : (listChnkPrms $ chnksList 0)
-- a quite fast prime counting up to the given limit using
-- chunk processing to avoid innermost list processing loops.
countPrimesTo
:: PrimeType
-> IntcountPrimesTo limit =
let lmtb
= (limit
- 3) `
div`
2 in let sumChnks acc chnks@(chnk@(UArray lo hi rng _):chnks') =
let cnt :: UArray PrimeType Word32 -> Int
cnt bfw =
case if lmtb < hi then fromIntegral (lmtb - lo) else rng of
crng -> case crng `shiftR` 5 of
rngw ->
let cnt' i ac =
ac `
seq`
if i
>= rngw
then if (i `shiftL` 5) >= rng then ac else
case (unsafeAt bfw rngw) .&.
(complement msk) of
bts
-> bts `
seq`
case popCount bts
of c
-> c `
seq`
case ac
+ c
of nac
-> nac
else case ac + (popCount $ unsafeAt bfw i) of
nacc
-> nacc `
seq` cnt
' (i + 1) (nacc) in cnt' 0 0 in
acc `
seq`
case runST
$ do -- make UArray _ Bool into a UArray _ Word32 stbuf <- unsafeThawSTUArray chnk
stbufw <- castSTUArray stbuf
bufw <- unsafeFreezeSTUArray stbufw
c
-> c `
seq`
case acc
+ c
of nacc
-> nacc `
seq`
if hi
>= lmtb
then nacc
else sumChnks nacc chnks' in
if limit < 2 then 0 else if limit < 3 then 1 else
lmtb `seq` sumChnks 1 (chnksList 0)
main = do
x <- read `fmap` getLine -- 1mln 2mln 10mln 100mln 1000mln
-- 0.02 0.03 0.06 0.45 4.60 seconds
-- 7328 7328 8352 8352 9424 Kilobytes
-- this takes 14.34 seconds and 9424 Kilobytes to 3 billion,
-- and 9.12 seconds for 3 billion on an i7-2700K (3.5 GHz)
-- The above ratio of about 1.6 is much better than usual due to
-- the extremely low memory use of the page segmented algorithm.
-- It seems thaat the Windows Native Code Generator (NCG) from GHC
-- is particularly poor, as the Linux 32-bit version takes
-- less than two thirds of the time for exactly the same program...
print $ countPrimesTo x
-- print $ length $ takeWhile ((>=) x) $ primes () -- the slow way to do this