;;;; -*- mode:lisp;coding:utf-8 -*-
;;;;**************************************************************************
;;;;FILE: c-string-reader.lisp
;;;;LANGUAGE: Common-Lisp
;;;;SYSTEM: Common-Lisp
;;;;USER-INTERFACE: NONE
;;;;DESCRIPTION
;;;;
;;;; A C string reader, implementing C string back-slash escapes.
;;;; Also includes a writer to print strings with C back-slash escapes.
;;;;
;;;;AUTHORS
;;;; <PJB> Pascal J. Bourguignon <pjb@informatimago.com>
;;;;MODIFICATIONS
;;;; 2013-05-22 <PJB> Added character-code-reader-macro, factorized
;;;; out c-escaped-character-map.
;;;; Published as http://p...content-available-to-author-only...p.org/display/137262
;;;; 2011-05-21 <PJB> Updated from http://p...content-available-to-author-only...p.org/display/69905 (lost).
;;;;BUGS
;;;;LEGAL
;;;; AGPL3
;;;;
;;;; Copyright Pascal J. Bourguignon 2013 - 2013
;;;;
;;;; This program is free software: you can redistribute it and/or modify
;;;; it under the terms of the GNU Affero General Public License as published by
;;;; the Free Software Foundation, either version 3 of the License, or
;;;; (at your option) any later version.
;;;;
;;;; This program is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;;; GNU Affero General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU Affero General Public License
;;;; along with this program. If not, see <http://w...content-available-to-author-only...u.org/licenses/>.
;;;;**************************************************************************
(eval-when (:compile-toplevel :load-toplevel :execute)
(ql:quickload "babel"))
(defpackage "COM.INFORMATIMAGO.COMMON-LISP.C-STRING"
(:use "COMMON-LISP" "BABEL")
(:export "ENABLE-C-STRING-READER-MACRO"))
(in-package "COM.INFORMATIMAGO.COMMON-LISP.C-STRING")
(defun c-escaped-character-map (escaped-character)
(case escaped-character
((#\' #\" #\? #\\) escaped-character)
((#\newline) -1)
((#\a) 7)
((#\b) 8)
((#\t) 9)
((#\n) 10)
((#\v) 11)
((#\f) 12)
((#\r) 13)
((#\x) :hexa)
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7) :octal)
(otherwise :default)))
(defun char-unicode (character)
(let ((bytes (babel:string-to-octets (string character)
:encoding :utf-32be
:use-bom nil)))
(+ (* 256 (+ (* 256 (+ (* 256 (aref bytes 0))
(aref bytes 1)))
(aref bytes 2)))
(aref bytes 3))))
(defun character-code-reader-macro (stream quotation-mark)
(declare (ignore quotation-mark))
(flet ((encode (ch) (char-unicode ch)))
(let ((ch (read-char stream)))
(if (char/= #\\ ch)
(encode ch)
(let* ((ch (read-char stream))
(code (c-escaped-character-map ch)))
(flet ((read-code (*read-base* base-name)
(let ((code (read stream)))
(if (and (integerp code) (<= 0 code (1- char-code-limit)))
code
(error "Invalid ~A character code: ~A" base-name code)))))
(case code
(:hexa (read-code 16 "hexadecimal"))
(:octal (unread-char ch stream) (read-code 8 "octal"))
(:default ;; In emacs ?\x = ?x
(encode ch))
(otherwise
(if (characterp code)
(encode code)
code)))))))))
(set-macro-character #\? 'character-code-reader-macro t)
;; (set-macro-character #\? 'character-code-reader-macro t)
(defun read-c-string (stream)
"Read a C string from the STREAM
The initial double-quote must have been read already."
(let ((buffer (make-array 80 :element-type 'character
:adjustable t :fill-pointer 0))
(state :in-string)
(start 0))
(flet ((process-token (ch)
(ecase state
((:in-string)
(setf state (case ch
((#\") :out)
((#\\) :escape)
(otherwise (vector-push-extend ch buffer)
:in-string)))
nil)
((:escape)
(setf state :in-string)
(let ((code (c-escaped-character-map ch)))
(case code
(:hexa
(setf state :in-hexa
start (fill-pointer buffer)))
(:octal
(setf state :in-octal
start (fill-pointer buffer))
(vector-push-extend ch buffer))
(:default
(error "Invalid escape character \\~C at position ~D"
ch (fill-pointer buffer)))
(otherwise
(cond
((characterp code) (vector-push-extend code buffer))
((eql -1 code) #|remove it|#)
(t (vector-push-extend (aref #(- - - - - - -
#\bell #\backspace #\tab
#\linefeed #\vt #\page
#\return)
code)
buffer))))))
nil)
((:in-octal)
(flet ((insert-octal ()
(setf (aref buffer start) (code-char (parse-integer buffer :start start :radix 8))
(fill-pointer buffer) (1+ start)
state :in-string)))
(case ch
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)
(vector-push-extend ch buffer)
(when (<= 3 (- (fill-pointer buffer) start))
(insert-octal))
nil)
(otherwise
(insert-octal)
:again))))
((:in-hexa)
(case ch
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
#\a #\b #\c #\d #\e #\f
#\A #\B #\C #\D #\E #\F)
(vector-push-extend ch buffer)
nil)
(otherwise
(if (< start (fill-pointer buffer))
(setf (aref buffer start) (code-char (parse-integer buffer :start start :radix 16))
(fill-pointer buffer) (1+ start))
(error "Invalid hexadecimal digit at position ~A" (fill-pointer buffer)))
(setf state :in-string)
:again))))))
(loop
:for ch = (read-char stream)
:do (loop :while (process-token ch))
:until (eq state :out)
:finally (return buffer)))))
(defun write-c-string (string &optional (stream *standard-output*))
"Prints the string as a C string, with C escape sequences."
(loop
:for ch :across string
:initially (princ "\"" stream)
:do (princ (case ch
((#\bell) "\\a")
((#\backspace) "\\b")
((#\page) "\\f")
((#\newline
#-#.(cl:if (cl:char= #\newline #\linefeed) '(:and) '(:or))
#\linefeed) "\\n")
((#\return) "\\r")
((#\tab) "\\t")
((#\vt) "\\v")
((#\") "\\\"")
((#\\) "\\\\")
(otherwise
(cond ((< (char-code ch) 32)
(format nil "\\~3,'0o" (char-code ch)))
((< (char-code ch) 127)
ch)
((< (char-code ch) #x10000)
(format nil "\\u~4,'0X" (char-code ch)))
(t
(format nil "\\U~8,'0X" (char-code ch))))))
stream)
:finally (princ "\"" stream)))
;; (write-c-string "꧁Été꧂ Летом 🚀")
;; "\uA9C1\u00C9t\u00E9\uA9C2 \uF83D\u041B\u0435\u0442\u043E\u043C \U0001F680"
(defun test/read-c-string ()
(let ((*readtable*
(let ((rt (copy-readtable nil)))
(set-macro-character #\"
(lambda (stream ch)
(declare (ignore ch))
(read-c-string stream))
nil
rt)
rt)))
(read-from-string "\"Hello, bell=\\a, backspace=\\b, page=\\f, newline=\\n, return=\\r, tab=\\t, vt=\\v, \\
\\\"double-quotes\\\", \\'single-quotes\\', question\\?, backslash=\\\\, \\
hexa=\\x3BB, octal=\\101, \\7\\77\\107\\3071\"")))
(defmacro enable-c-string-reader-macro ()
`(eval-when (:compile-toplevel :load-toplevel :execute)
(set-macro-character #\"
(lambda (stream ch)
(declare (ignore ch))
(read-c-string stream))
nil)))
;;;; THE END ;;;;