fork download
  1. //This work improves on the ideas of Jon Harrop as of
  2. //http://f...content-available-to-author-only...t.ca/2010/02/sieve-of-eratosthenes.html
  3. //which fails in terms of high memory use (saves all generated primes instead of
  4. //just the primes to the square root of the highest candidate prime, and continuously
  5. //regenerating buffer arrays of ever increasing huge size irrespective of the CPU cache sizes)
  6. //as well not including a generating sequence and not having the optimizations to
  7. //only deal with odd numbers let alone using wheel factorization
  8.  
  9. // Segmented functional Sieve of Eratosthenes not using mutable state other than the
  10. // mutable buffer array contents; sieving odds in a packed bit array
  11. // from http://stackoverflow.com/questions/4629734/the-sieve-of-eratosthenes-in-f/17820204#17820204
  12. // by GordonBGood
  13. // cloned from http://i...content-available-to-author-only...e.com/l8zupF
  14.  
  15. (*let primesJH() =
  16. let primes =
  17. let a = ResizeArray[2]
  18. let grow() =
  19. let p0 = a.[a.Count-1]+1
  20. let b = Array.create p0 true
  21. for di in a do
  22. let rec loop i =
  23. if i<b.Length then
  24. b.[i] <- false
  25. loop(i+di)
  26. let i0 = p0/di*di
  27. loop(if i0<p0 then i0+di-p0 else i0-p0)
  28. for i=0 to b.Length-1 do
  29. if b.[i] then a.Add(p0+i)
  30. fun n ->
  31. while n >= a.Count do
  32. grow()
  33. a.[n]
  34. Seq.initInfinite id |> Seq.map primes
  35.  
  36. primesJH() |> Seq.nth 10000000 *)
  37.  
  38. //#time
  39. let primesAPF() =
  40. let BUFSZ = 1<<<17 in let b = Array.create (BUFSZ>>>5) 0xFFFFFFFFu in let BUFRNG = uint32 BUFSZ<<<1
  41. let bLUT = [| for i in 0..31 -> 1u<<<i |] //simple Look Up Tables for bit operations on uint32's
  42. let rbLUT = bLUT |> Array.map ((^^^) 0xFFFFFFFFu)
  43. let inline cull p s low sz (buf:uint32[]) =
  44. let inline clrbit i (buf:uint32[]) = let w,b = i>>>5,rbLUT.[i&&&0x1F] in buf.[w] <- buf.[w]&&&b
  45. let rec cull2 i = if i < sz then (clrbit i buf; cull2 (i+int p))
  46. cull2 (if s>=low then int(s-low)>>>1
  47. else let r=((low-s)>>>1)%p in if r=0u then 0 else int(p-r))
  48. let inline testbit i (buf:uint32[]) = let w,b = i>>>5,bLUT.[i&&&0x1F] in (buf.[w]&&&b)<>0u
  49. let baseprimes = //memoization is by the little BitArray
  50. let LBSZ = (65537-3)>>>1 in let lb = Array.create ((LBSZ+31)>>>5) 0xFFFFFFFFu //size rounded up
  51. let rec mkpi i = if testbit i lb then i else mkpi (i+1)
  52. let inline maybecull p = //only cull once per new found prime
  53. let s = p*p in if s < 65536u && testbit (int(s-3u)>>>1) lb then cull p s 3u LBSZ lb
  54. Seq.unfold (fun i->let pi=mkpi i in let p=3u+(uint32 pi<<<1) in maybecull p;Some(p,pi+1)) 0 |> Seq.cache
  55. let inline cullpg low = //cull composites from whole buffer page for efficiency
  56. let max = low+BUFRNG-1u in let max = if max<low then 0xFFFFFFFFu else max
  57. baseprimes |> Seq.takeWhile (fun p-> //force side effect of culling to limit of buffer
  58. let s=p*p in if p>65536u || s>=max then false else true)|>Seq.iter (fun p->let s=p*p in cull p s low BUFSZ b)
  59. let rec mkpi i low =
  60. if i>=BUFSZ then (let nlow=low+BUFRNG in Array.fill b 0 b.Length 0xFFFFFFFFu; cullpg nlow;mkpi 0 nlow)
  61. else (if testbit i b then (i,low) else mkpi (i+1) low)
  62. let oddprimes = cullpg 3u; Seq.unfold (fun (i,lw) -> //force cull the first buffer page then doit
  63. let rtn=mkpi i lw in let ni,nlw = fst rtn,snd rtn in let p=nlw+(uint32 ni<<<1)
  64. (if p<lw then failwith "Enumerated primes past 32-bit range!!!"); Some(p,(ni+1,nlw))) (0,3u)
  65. seq { yield 2u; yield! oddprimes }
  66.  
  67. primesAPF() |> Seq.nth 1000000 |> printfn "%A"
Success #stdin #stdout 0.64s 12624KB
stdin
10K     0.20               (should've been 0.00; 0.2 is the constant overhead; SUBTRACT IT FROM ALL TIMINGS!)
100K    0.23s - 12.7MB     (should've been 0.06)
1 mln   0.63s - 12.7MB                              0.43
2 mln   1.07s - 12.6MB                              0.87  n^1.02
4 mln   1.98s - 12.6MB                              1.78  n^1.03
6 mln   2.95s - 12.6MB                              2.75  n^1.07 
8 mln   3.95s - 12.7MB                              3.75  n^1.08

10mln:  4.94s - 12.6MB   5.25-12.7   179,424,691     
        5.01s - 12.6MB   4.67-12.7    = 4.97s       4.77  n^1.08
15mln:  7.72s - 12.7MB   8.04-12.6   275,604,547     
        7.21s - 12.7MB   7.41-12.6    = 7.60s       7.40  n^1.08   
20mln:  9.93s - 12.7MB                              9.73  n^0.95  10m->n^1.03
stdout
15485867u