; grep-csv n regex -- read csv-formatted file line-by-line
; from standard input and write to standard output those
; lines for which the n'th field (counting from 1) matches
; the regular expression
; --+----1----+----2----+----3----+----4----+----5----+----6---
; READ-CSV-RECORD [DELIM] [PORT]
(define (read-csv-record . args)
(define (read-csv delim port)
(define (add-field field fields)
(cons (list->string (reverse field)) fields))
(define (start field fields)
(let ((c (read-char port)))
(cond ((eof-object? c) (reverse fields))
((char=? #\return c)
(carriage-return field fields))
((char=? #\newline c) (line-feed field fields))
((char=? #\" c) (quoted-field field fields))
((char=? delim c)
(not-field '() (add-field field fields)))
(else (unquoted-field (cons c field) fields)))))
(define (not-field field fields)
(let ((c (read-char port)))
(cond ((eof-object? c) (cons "" fields))
((char=? #\return c)
(carriage-return '() (add-field field fields)))
((char=? #\newline c)
(line-feed '() (add-field field fields)))
((char=? #\" c) (quoted-field field fields))
((char=? delim c)
(not-field '() (add-field field fields)))
(else (unquoted-field (cons c field) fields)))))
(define (quoted-field field fields)
(let ((c (read-char port)))
(cond ((eof-object? c) (add-field field fields))
((char=? #\" c)
(may-be-doubled-quotes field fields))
(else (quoted-field (cons c field) fields)))))
(define (may-be-doubled-quotes field fields)
(let ((c (read-char port)))
(cond ((eof-object? c) (add-field field fields))
((char=? #\return c)
(carriage-return '() (add-field field fields)))
((char=? #\newline c)
(line-feed '() (add-field field fields)))
((char=? #\" c)
(quoted-field (cons #\" field) fields))
((char=? delim c)
(not-field '() (add-field field fields)))
(else (unquoted-field (cons c field) fields)))))
(define (unquoted-field field fields)
(let ((c (read-char port)))
(cond ((eof-object? c) (add-field field fields))
((char=? #\return c)
(carriage-return '() (add-field field fields)))
((char=? #\newline c)
(line-feed '() (add-field field fields)))
((char=? delim c)
(not-field '() (add-field field fields)))
(else (unquoted-field (cons c field) fields)))))
(define (carriage-return field fields)
(let ((c (peek-char port)))
(cond ((eof-object? c) fields)
((char=? #\newline c) (read-char port) fields)
(else fields))))
(define (line-feed field fields)
(let ((c (peek-char port)))
(cond ((eof-object? c) fields)
((char=? #\return c) (read-char port) fields)
(else fields))))
(if (eof-object? (peek-char port)) (peek-char port)
(reverse (start '() '()))))
(cond ((null? args) (read-csv #\, (current-input-port)))
((and (null? (cdr args)) (char? (car args)))
(read-csv (car args) (current-input-port)))
((and (null? (cdr args)) (port? (car args)))
(read-csv #\, (car args)))
((and (pair? (cdr args)) (null? (cddr args))
(char? (car args)) (port? (cadr args)))
(read-csv (car args) (cadr args)))
(else (read-csv #\, (current-input-port)))))
; FILTER-PORT READER PRED?
(define (filter-port reader pred?)
(lambda args
(let loop ((x (apply reader args)))
(cond ((eof-object? x) x)
((pred? x) x)
(else (loop (apply reader args)))))))
; FOR-EACH-PORT READER PROC [PORT]
(define (for-each-port reader proc . port)
(let ((p (if (null? port) (current-input-port) (car port))))
(let loop ((item (reader p)))
(if (not (eof-object? item))
(begin (proc item) (loop (reader p)))))))
; QUOTE-CSV DELIM STR
(define (quote-csv delim str)
(define (string-find str pat)
(let loop ((i 0))
(cond ((<= (string-length str) i) #f)
((string=? (substring str i (+ i (string-length pat))) pat) i)
(else (loop (+ i 1))))))
(define (string-replace-all str pat repl)
(let ((len-str (string-length str))
(len-pat (string-length pat))
(spot (string-find str pat)))
(if spot
(string-append
(substring str 0 spot)
repl
(string-replace-all (substring (+ spot len-pat) len-str pat repl)))
str)))
(let ((new-str (string-replace-all str "\"" "\"\"")))
(if (or (string-find str (string delim))
(not (string=? str new-str))
(string-find str (string #\return))
(string-find str (string #\newline)))
(string-append "\"" new-str "\"")
str)))
; WRITE-CSV-RECORD REC [DELIM] [PORT]
(define (write-csv-record rec . args)
(define (write-csv delim port)
(do ((rec rec (cdr rec)))
((null? rec) (newline port))
(display (quote-csv delim (car rec)) port)
(if (pair? (cdr rec)) (display delim port))))
(cond ((null? args) (write-csv #\, (current-output-port)))
((and (null? (cdr args)) (char? (car args)))
(write-csv (car args) (current-output-port)))
((and (null? (cdr args)) (port? (car args)))
(write-csv #\, (car args)))
((and (pair? (cdr args)) (null? (cddr args))
(char? (car args)) (port? (cadr args)))
(write-csv (car args) (cadr args)))
(else (write-csv #\, (current-output-port)))))
(define (trex regex text) ; tiny regular expression matcher
; c -- match the literal character c
; . -- (dot) match any single character
; * -- match zero or more of preceding character
; ^ -- match beginning of search string
; $ -- match end of search string
; based on Kernighan/Pike -- The Practice of Programming
; https://p...content-available-to-author-only...s.com/2009/09/11/beautiful-code/
(define (match regex text)
(cond ((null? regex) #t)
((null? text) #f)
((char=? (car regex) #\^)
(match-here (cdr regex) text))
(else (or (match-here regex text)
(match regex (cdr text))))))
(define (match-here regex text)
(cond ((null? regex) #t)
((and (pair? (cdr regex))
(char=? (cadr regex) #\*))
(match-star (car regex) (cddr regex) text))
((and (char=? (car regex) #\$)
(null? (cdr regex)))
(null? text))
((and (pair? text)
(or (char=? (car regex) #\.)
(char=? (car regex) (car text))))
(match-here (cdr regex) (cdr text)))
(else #f)))
(define (match-star c regex text)
(cond ((match-here regex text) #t)
((and (pair? text)
(or (char=? (car text) c)
(char=? c #\.)))
(match-star c regex (cdr text)))
(else #f)))
(match (string->list regex) (string->list text)))
(define (grep-csv n regex)
(for-each-port
(filter-port read-csv-record
(lambda (line) (trex regex (list-ref line (- n 1)))))
(lambda (line) (write-csv-record line))))
(grep-csv 2 "e.*s")