; external sorting
(define (read-line . port)
(define (eat p c)
(if (and (not (eof-object? (peek-char p)))
(char=? (peek-char p) c))
(read-char p)))
(let ((p (if (null? port) (current-input-port) (car port))))
(let loop ((c (read-char p)) (line '()))
(cond ((eof-object? c) (if (null? line) c (list->string (reverse line))))
((char=? #\newline c) (eat p #\return) (list->string (reverse line)))
((char=? #\return c) (eat p #\newline) (list->string (reverse line)))
(else (loop (read-char p) (cons c line)))))))
(define (read-lines max-lines)
(let loop ((x (read-line)) (xs (list)) (k max-lines))
(if (or (zero? k) (eof-object? x))
(reverse xs)
(loop (read-line) (cons x xs) (- k 1)))))
(define (temp num)
(string-append
"./external-sort."
(number->string (get-process-id))
"."
(number->string num)))
(define max-lines 10000)
(define pq-empty '())
(define pq-empty? null?)
(define (pq-first pq)
(if (null? pq)
(error 'pq-first "can't extract minimum from null queue")
(car pq)))
(define (pq-merge lt? p1 p2)
(cond ((null? p1) p2)
((null? p2) p1)
((lt? (car p2) (car p1))
(cons (car p2) (cons p1 (cdr p2))))
(else (cons (car p1) (cons p2 (cdr p1))))))
(define (pq-insert lt? x pq)
(pq-merge lt? (list x) pq))
(define (pq-merge-pairs lt? ps)
(cond ((null? ps) '())
((null? (cdr ps)) (car ps))
(else (pq-merge lt? (pq-merge lt? (car ps) (cadr ps))
(pq-merge-pairs lt? (cddr ps))))))
(define (pq-rest lt? pq)
(if (null? pq)
(error 'pq-rest "can't delete minimum from null queue")
(pq-merge-pairs lt? (cdr pq))))
(define (read-into-temps)
(let loop ((file-num 1))
(let ((lines (read-lines max-lines)))
(if (null? lines) (- file-num 1)
(let ((temp-file-name (temp file-num)))
(with-output-to-file temp-file-name
(lambda ()
(do ((lines (sort string<? lines) (cdr lines)))
((null? lines))
(display (car lines)) (newline))))
(loop (+ file-num 1)))))))
(define (write-output num-files)
(define (lt? x y) (string<? (car x) (car y)))
(let loop ((num-files num-files) (pq pq-empty))
(if (positive? num-files)
(loop (- num-files 1)
(let ((p (open-input-file (temp num-files))))
(pq-insert lt? (cons (read-line p) p) pq)))
(let loop ((pq pq))
(when (not (pq-empty? pq))
(let ((node (pq-first pq)))
(display (car node)) (newline)
(let ((next (read-line (cdr node))))
(if (eof-object? next)
(loop (pq-rest lt? pq))
(loop (pq-insert
lt?
(cons next (cdr node))
pq))))))))))
(define (x-sort in-file out-file)
(with-input-from-file in-file (lambda ()
(with-output-to-file out-file (lambda ()
(write-output (read-into-temps)))))))
; (x-sort "bible.txt" "bbeil.txt")