; loglog

(define range
  (case-lambda
    ((stop) (range 0 stop (if (negative? stop) -1 1)))
    ((start stop) (range start stop (if (< start stop) 1 -1)))
    ((start stop step)
      (let ((le? (if (negative? step) >= <=)))
        (let loop ((x start) (xs (list)))
          (if (le? stop x) (reverse xs)
            (loop (+ x step) (cons x xs))))))
    (else (error 'range "too many arguments"))))
   
(define (fold-left op base xs)
  (if (null? xs)
    base
    (fold-left op (op base (car xs)) (cdr xs))))

(define rand ; knuth random number generator with shuffle box
  (let* ((a 69069) (c 1234567) (m 4294967296) (k 32) ; 32-bit
         (seed 19380110) ; Happy Birthday DEK!
         (next (lambda ()
           (set! seed (modulo (+ (* a seed) c) m)) seed))
         (init (lambda (seed) (let ((box (make-vector k)))
           (do ((j 0 (+ j 1))) ((= j k) box)
             (vector-set! box j (next))))))
         (box (init seed)))
    (lambda args
      (when (pair? args)
        (set! seed (modulo (car args) m)) (set! box (init seed)))
      (let* ((j (quotient (* k seed) m)) (n (vector-ref box j)))
        (set! seed (next)) (vector-set! box j seed) (/ n m)))))

(define (randint . args)
  (let ((lo (if (pair? (cdr args)) (car args) 0))
        (hi (if (pair? (cdr args)) (cadr args) (car args))))
    (inexact->exact (+ lo (floor (* (rand) (- hi lo)))))))

(define (shuffle x)
  (do ((v (list->vector x)) (n (length x) (- n 1)))
      ((zero? n) (vector->list v))
    (let* ((r (randint n)) (t (vector-ref v r)))
      (vector-set! v r (vector-ref v (- n 1)))
      (vector-set! v (- n 1) t))))

(define (sum xs) (apply + xs))

(define (average xs) (/ (sum xs) (length xs)))

(define (zerobits n)
  (vector-ref (vector 32 0 1 26 2 23 27 0 3 16 24 30 28 11 0 13
      4 7 17 0 25 22 31 15 29 10 12 6 0 21 14 9 5 20 8 19 18)
    (modulo (bitwise-and n (- n)) 37)))

(define pearson ; 32-bit pearson hash
  (let ((t (list->vector (shuffle (range 256)))))
    (lambda (str)
      (let ((cs (map char->integer (string->list str))))
        (define (init c j) (vector-ref t (modulo (+ c j) 256)))
        (define (next n h) (vector-ref t (bitwise-xor n h)))
        (do ((j 0 (+ j 1))
             (h 0 (+ (fold-left next (init (car cs) j) (cdr cs)) (* 256 h))))
            ((= j 4) h))))))

(define (read-word)
  (let loop ((c (read-char)) (cs (list)))
    (cond ((eof-object? c)
            (if (null? cs) c
              (list->string (reverse cs))))
          ((char-alphabetic? c)
            (loop (read-char) (cons c cs)))
          ((pair? cs) (list->string (reverse cs)))
          (else (loop (read-char) cs)))))

(define (count k)
  (let* ((2^k (expt 2 k)) (counts (make-vector 2^k -1)))
    (do ((word (read-word) (read-word)))
        ((eof-object? word) (vector->list counts))
      (let* ((hash (pearson word))
             (bucket (modulo hash 2^k))
             (last-1bit (zerobits (quotient hash 2^k))))
        (vector-set! counts bucket
          (max (vector-ref counts bucket) (min last-1bit (- 32 k))))))))

(define (loglog k)
  (let ((x (exact->inexact (average (count k)))))
    (* (expt 2 x) (expt 2 k) 0.79402)))

; there are 141 distinct words in the gettysburg address
(display (loglog 7)) (newline)