; heavy hitters, the britney spears algorithm

(define (take n xs)
  (let loop ((n n) (xs xs) (ys '()))
    (if (or (zero? n) (null? xs))
        (reverse ys)
        (loop (- n 1) (cdr xs)
              (cons (car xs) ys)))))

(define (hashtable-pairs ht)
  (call-with-values
    (lambda () (hashtable-entries ht))
    (lambda (keys values)
      (map cons (vector->list keys) (vector->list values)))))

(define (read-word)
  (let loop ((c (read-char)) (cs '()))
    (cond ((eof-object? c)
            (if (null? cs) c
              (list->string (reverse cs))))
          ((char-alphabetic? c)
            (loop (read-char) (cons (char-downcase c) cs)))
          ((pair? cs) (list->string (reverse cs)))
          (else (loop (read-char) cs)))))

(define (word-freq n file-name)
  (define (freq-gt? a b) (> (cdr a) (cdr b)))
  (with-input-from-file file-name (lambda ()
    (let ((freqs (make-hashtable string-hash string=?)))
      (do ((word (read-word) (read-word)))
          ((eof-object? word) (map car (take n
            (sort freq-gt? (hashtable-pairs freqs)))))
        (hashtable-update! freqs word add1 0))))))

(define (misra-gries n file-name)
  (define (freq-gt? a b) (> (cdr a) (cdr b)))
  (let ((keys (make-hashtable string-hash string=?)))
    (with-input-from-file file-name (lambda ()
      (do ((word (read-word) (read-word)))
          ((eof-object? word) (map car (take n
            (sort freq-gt? (hashtable-pairs keys)))))
        (if (or (hashtable-contains? keys word)
                (< (hashtable-size keys) n))
            (hashtable-update! keys word add1 0)
            (vector-for-each
              (lambda (word)
                (hashtable-update! keys word sub1 1)
                (when (zero? (hashtable-ref keys word 0))
                  (hashtable-delete! keys word)))
              (hashtable-keys keys))))))))

(define (space-saving n file-name)
  (define (freq-gt? a b) (> (cdr a) (cdr b)))
  (let ((keys (make-hashtable string-hash string=?)))
    (with-input-from-file file-name (lambda ()
      (do ((word (read-word) (read-word)))
          ((eof-object? word) (map car (take n
            (sort freq-gt? (hashtable-pairs keys)))))
        (if (or (hashtable-contains? keys word)
                (< (hashtable-size keys) n))
            (hashtable-update! keys word add1 0)
            (let ((kv-pairs (hashtable-pairs keys)))
              (let loop ((min-k (caar kv-pairs))
                         (min-v (cdar kv-pairs))
                         (kv-pairs (cdr kv-pairs)))
                (if (pair? kv-pairs)
                    (if (< (cdar kv-pairs) min-v)
                        (loop (caar kv-pairs)
                              (cdar kv-pairs)
                              (cdr kv-pairs))
                        (loop min-k min-v (cdr kv-pairs)))
                    (begin
                      (hashtable-delete! keys min-k)
                      (hashtable-update! keys word
                        add1 min-v))))))))))