-- CountingPagedSoE.hs -- from http://w...content-available-to-author-only...l.org/haskellwiki/Prime_numbers#Using_ST_Array -- but with very many improvements including a very fast count function -- applied as an answer to: http://stackoverflow.com/a/20884433/549617 {-# OPTIONS -O3 -rtsopts #-} -- -fllvm ide.com doesn't support LLVM import Data.Word import Data.Bits import Control.Monad import Control.Monad.ST import Data.Array.ST (runSTUArray) import Data.Array.Unboxed import Data.Array.Base pgSZBTS = (2^18) * 8 :: Int -- size of L2 data cache type PrimeType = Word32 type Chunk = UArray PrimeType Bool -- makes a new page chunk and culls it -- if the base primes list provided is empty then -- it uses the current array as source (for zero page base primes) mkChnk :: Word32 -> [Chunk] -> Chunk mkChnk low bschnks = runSTUArray $ do let nxt = (fromIntegral low) + (fromIntegral pgSZBTS) buf <- nxt `seq` newArray (fromIntegral low, fromIntegral nxt - 1) True let cull ~(p:ps) = let bp = (fromIntegral p) i = (bp - 3) `shiftR` 1 s = bp * (i + 1) + i in let cullp j = do if j >= pgSZBTS then cull ps else do unsafeWrite buf j False cullp (j + (fromIntegral p)) in when (s < nxt) $ do let strt = do if s >= low then fromIntegral (s - low) else do let b = (low - s) `rem` bp if b == 0 then 0 else fromIntegral (bp - b) cullp strt case bschnks of [] -> do bsbf <- unsafeFreezeSTUArray buf cull (listChnkPrms [bsbf]) _ -> cull $ listChnkPrms bschnks return buf -- creates a page chunk list starting at the lw value chnksList :: Word32 -> [Chunk] chnksList lw = mkChnk lw basePrmChnks : chnksList (lw + fromIntegral pgSZBTS) -- converts a page chunk list to a list of primes listChnkPrms :: [Chunk] -> [PrimeType] listChnkPrms [] = [] listChnkPrms ~(hdchnk@(UArray lw _ rng _):tlchnks) = let nxtp i = if i >= rng then [] else if unsafeAt hdchnk i then (case ((lw + fromIntegral i) `shiftL` 1) + 3 of np -> np) : nxtp (i + 1) else nxtp (i + 1) in (hdchnk `seq` lw `seq` nxtp 0) ++ listChnkPrms tlchnks -- the base page chunk list used to cull the higher order pages, -- note that it has special treatment for the zero page. -- It is more space efficient to store this as chunks rather than -- as a list of primes or even a list of deltas (gaps), with the -- extra processing to convert as needed not too much. basePrmChnks :: [Chunk] basePrmChnks = mkChnk 0 [] : chnksList (fromIntegral pgSZBTS) -- the full list of primes could be accessed with the following function. primes :: () -> [PrimeType] primes () = 2 : (listChnkPrms $ chnksList 0) -- a quite fast prime counting up to the given limit using -- chunk processing to avoid innermost list processing loops. countPrimesTo :: PrimeType -> Int countPrimesTo limit = let lmtb = (limit - 3) `div` 2 in let sumChnks acc chnks@(chnk@(UArray lo hi rng _):chnks') = let cnt :: UArray PrimeType Word32 -> Int cnt bfw = case if lmtb < hi then fromIntegral (lmtb - lo) else rng of crng -> case crng `shiftR` 5 of rngw -> let cnt' i ac = ac `seq` if i >= rngw then if (i `shiftL` 5) >= rng then ac else case (-2) `shiftL` fromIntegral (lmtb .&. 31) of msk -> msk `seq` case (unsafeAt bfw rngw) .&. (complement msk) of bts -> bts `seq` case popCount bts of c -> c `seq` case ac + c of nac -> nac else case ac + (popCount $ unsafeAt bfw i) of nacc -> nacc `seq` cnt' (i + 1) (nacc) in cnt' 0 0 in acc `seq` case runST $ do -- make UArray _ Bool into a UArray _ Word32 stbuf <- unsafeThawSTUArray chnk stbufw <- castSTUArray stbuf bufw <- unsafeFreezeSTUArray stbufw return $ cnt bufw of c -> c `seq` case acc + c of nacc -> nacc `seq` if hi >= lmtb then nacc else sumChnks nacc chnks' in if limit < 2 then 0 else if limit < 3 then 1 else lmtb `seq` sumChnks 1 (chnksList 0) main = do x <- read `fmap` getLine -- 1mln 2mln 10mln 100mln 1000mln -- 0.02 0.03 0.06 0.45 4.60 seconds -- 7328 7328 8352 8352 9424 Kilobytes -- this takes 14.34 seconds and 9424 Kilobytes to 3 billion, -- and 9.12 seconds for 3 billion on an i7-2700K (3.5 GHz) -- The above ratio of about 1.6 is much better than usual due to -- the extremely low memory use of the page segmented algorithm. -- It seems thaat the Windows Native Code Generator (NCG) from GHC -- is particularly poor, as the Linux 32-bit version takes -- less than two thirds of the time for exactly the same program... print $ countPrimesTo x -- print $ length $ takeWhile ((>=) x) $ primes () -- the slow way to do this