; heavy hitters, the britney spears algorithm
(define (take n xs)
(let loop ((n n) (xs xs) (ys '()))
(if (or (zero? n) (null? xs))
(reverse ys)
(loop (- n 1) (cdr xs)
(cons (car xs) ys)))))
(define (hashtable-pairs ht)
(call-with-values
(lambda () (hashtable-entries ht))
(lambda (keys values)
(map cons (vector->list keys) (vector->list values)))))
(define (read-word)
(let loop ((c (read-char)) (cs '()))
(cond ((eof-object? c)
(if (null? cs) c
(list->string (reverse cs))))
((char-alphabetic? c)
(loop (read-char) (cons (char-downcase c) cs)))
((pair? cs) (list->string (reverse cs)))
(else (loop (read-char) cs)))))
(define (word-freq n file-name)
(define (freq-gt? a b) (> (cdr a) (cdr b)))
(with-input-from-file file-name (lambda ()
(let ((freqs (make-hashtable string-hash string=?)))
(do ((word (read-word) (read-word)))
((eof-object? word) (map car (take n
(sort freq-gt? (hashtable-pairs freqs)))))
(hashtable-update! freqs word add1 0))))))
(define (misra-gries n file-name)
(define (freq-gt? a b) (> (cdr a) (cdr b)))
(let ((keys (make-hashtable string-hash string=?)))
(with-input-from-file file-name (lambda ()
(do ((word (read-word) (read-word)))
((eof-object? word) (map car (take n
(sort freq-gt? (hashtable-pairs keys)))))
(if (or (hashtable-contains? keys word)
(< (hashtable-size keys) n))
(hashtable-update! keys word add1 0)
(vector-for-each
(lambda (word)
(hashtable-update! keys word sub1 1)
(when (zero? (hashtable-ref keys word 0))
(hashtable-delete! keys word)))
(hashtable-keys keys))))))))
(define (space-saving n file-name)
(define (freq-gt? a b) (> (cdr a) (cdr b)))
(let ((keys (make-hashtable string-hash string=?)))
(with-input-from-file file-name (lambda ()
(do ((word (read-word) (read-word)))
((eof-object? word) (map car (take n
(sort freq-gt? (hashtable-pairs keys)))))
(if (or (hashtable-contains? keys word)
(< (hashtable-size keys) n))
(hashtable-update! keys word add1 0)
(let ((kv-pairs (hashtable-pairs keys)))
(let loop ((min-k (caar kv-pairs))
(min-v (cdar kv-pairs))
(kv-pairs (cdr kv-pairs)))
(if (pair? kv-pairs)
(if (< (cdar kv-pairs) min-v)
(loop (caar kv-pairs)
(cdar kv-pairs)
(cdr kv-pairs))
(loop min-k min-v (cdr kv-pairs)))
(begin
(hashtable-delete! keys min-k)
(hashtable-update! keys word
add1 min-v))))))))))