; loglog
(define range
(case-lambda
((stop) (range 0 stop (if (negative? stop) -1 1)))
((start stop) (range start stop (if (< start stop) 1 -1)))
((start stop step)
(let ((le? (if (negative? step) >= <=)))
(let loop ((x start) (xs (list)))
(if (le? stop x) (reverse xs)
(loop (+ x step) (cons x xs))))))
(else (error 'range "too many arguments"))))
(define (fold-left op base xs)
(if (null? xs)
base
(fold-left op (op base (car xs)) (cdr xs))))
(define rand ; knuth random number generator with shuffle box
(let* ((a 69069) (c 1234567) (m 4294967296) (k 32) ; 32-bit
(seed 19380110) ; Happy Birthday DEK!
(next (lambda ()
(set! seed (modulo (+ (* a seed) c) m)) seed))
(init (lambda (seed) (let ((box (make-vector k)))
(do ((j 0 (+ j 1))) ((= j k) box)
(vector-set! box j (next))))))
(box (init seed)))
(lambda args
(when (pair? args)
(set! seed (modulo (car args) m)) (set! box (init seed)))
(let* ((j (quotient (* k seed) m)) (n (vector-ref box j)))
(set! seed (next)) (vector-set! box j seed) (/ n m)))))
(define (randint . args)
(let ((lo (if (pair? (cdr args)) (car args) 0))
(hi (if (pair? (cdr args)) (cadr args) (car args))))
(inexact->exact (+ lo (floor (* (rand) (- hi lo)))))))
(define (shuffle x)
(do ((v (list->vector x)) (n (length x) (- n 1)))
((zero? n) (vector->list v))
(let* ((r (randint n)) (t (vector-ref v r)))
(vector-set! v r (vector-ref v (- n 1)))
(vector-set! v (- n 1) t))))
(define (sum xs) (apply + xs))
(define (average xs) (/ (sum xs) (length xs)))
(define (zerobits n)
(vector-ref (vector 32 0 1 26 2 23 27 0 3 16 24 30 28 11 0 13
4 7 17 0 25 22 31 15 29 10 12 6 0 21 14 9 5 20 8 19 18)
(modulo (bitwise-and n (- n)) 37)))
(define pearson ; 32-bit pearson hash
(let ((t (list->vector (shuffle (range 256)))))
(lambda (str)
(let ((cs (map char->integer (string->list str))))
(define (init c j) (vector-ref t (modulo (+ c j) 256)))
(define (next n h) (vector-ref t (bitwise-xor n h)))
(do ((j 0 (+ j 1))
(h 0 (+ (fold-left next (init (car cs) j) (cdr cs)) (* 256 h))))
((= j 4) h))))))
(define (read-word)
(let loop ((c (read-char)) (cs (list)))
(cond ((eof-object? c)
(if (null? cs) c
(list->string (reverse cs))))
((char-alphabetic? c)
(loop (read-char) (cons c cs)))
((pair? cs) (list->string (reverse cs)))
(else (loop (read-char) cs)))))
(define (count k)
(let* ((2^k (expt 2 k)) (counts (make-vector 2^k -1)))
(do ((word (read-word) (read-word)))
((eof-object? word) (vector->list counts))
(let* ((hash (pearson word))
(bucket (modulo hash 2^k))
(last-1bit (zerobits (quotient hash 2^k))))
(vector-set! counts bucket
(max (vector-ref counts bucket) (min last-1bit (- 32 k))))))))
(define (loglog k)
(let ((x (exact->inexact (average (count k)))))
(* (expt 2 x) (expt 2 k) 0.79402)))
; there are 141 distinct words in the gettysburg address
(display (loglog 7)) (newline)