;Syntax for (some) chemical formulae:
;  <formula> = "" | <element-with-multiplicity> <formula> | <group-with-multiplicity> <formula>
;  <element-with-multiplicity> = <element> <optional-number>
;  <group-with-multiplicity> = <group> <number>
;  <group> = "(" <formula> ")"
;  <element> = "C" | "H" | "O" | "Cl" | "F" | "Na" | "Pb" | "N"   
;  <optional-number> = <number> | ""
;  <number> = "2" | "3" | "4" | ...

(defun empty-clause (continuation input-string)
  (funcall continuation input-string nil))

(defun match-and (clause1 clause2 &key (combinator #'list))
  (lambda (continuation input-string)
    (funcall clause1
             (lambda (remainder1 value1)
               (funcall clause2
                        (lambda (remainder2 value2)
                          (funcall continuation remainder2 (funcall combinator value1 value2)))
                        remainder1))
             input-string)))

(defun match-all (&rest clauses)
  (if clauses
    (match-and (car clauses)
               (apply #'match-all (cdr clauses))
               :combinator #'cons)
    #'empty-clause))

(defun match-any (&rest clauses)
  (lambda (continuation input-string)
    (dolist (clause clauses)
      (funcall clause continuation input-string))))

(defun make-literal-clause (string-to-match value)
  (lambda (continuation input-string)
    (unless (mismatch string-to-match input-string :end2 (min (length string-to-match)
                                                              (length input-string)))
      (funcall continuation (subseq input-string (length string-to-match)) value))))

(defclass chemical-element ()
  ((name
     :initarg :name
     :reader name)
   (chemical-symbol
     :initarg :chemical-symbol
     :reader chemical-symbol)
   (atomic-weight
     :initarg :atomic-weight
     :reader atomic-weight)))

(defvar *elements*
  (list (make-instance 'chemical-element
                       :name "carbon"
                       :chemical-symbol "C"
                       :atomic-weight 12.011)
        (make-instance 'chemical-element
                       :name "hydrogen"
                       :chemical-symbol "H"
                       :atomic-weight 1.008)
        (make-instance 'chemical-element
                       :name "oxygen"
                       :chemical-symbol "O"
                       :atomic-weight 15.999)
        (make-instance 'chemical-element
                       :name "chlorine"
                       :chemical-symbol "Cl"
                       :atomic-weight 35.45)
        (make-instance 'chemical-element
                       :name "fluorine"
                       :chemical-symbol "F"
                       :atomic-weight 18.998403163)
        (make-instance 'chemical-element
                       :name "sodium"
                       :chemical-symbol "Na"
                       :atomic-weight 22.98976928)
        (make-instance 'chemical-element
                       :name "lead"
                       :chemical-symbol "Pb"
                       :atomic-weight 207.2)
        (make-instance 'chemical-element
                       :name "nitrogen"
                       :chemical-symbol "N"
                       :atomic-weight 14.007)))

(setf (symbol-function 'element-clause)
      (apply #'match-any (mapcar
                           (lambda (e)
                             (make-literal-clause (chemical-symbol e) e))
                           *elements*)))

(defun number-clause (continuation input-string)
  (dotimes (i (length input-string))
    (let ((code (char-code (char input-string i))))
      (when (or (< code (char-code #\0))
                (> code (char-code #\9)))
        (return))
      (when (and (= i 0)
                 (= code (char-code #\0)))
        (return))
      (unless (and (= i 0)
                   (= code (char-code #\1)))
        (funcall continuation
                 (subseq input-string (1+ i))
                 (read-from-string (subseq input-string 0 (1+ i))))))))
(setf (symbol-function 'optional-number-clause)
      (match-any #'empty-clause
                 #'number-clause))

(defun element-with-multiplicity-clause (continuation input-string)
  (funcall (match-all #'element-clause #'optional-number-clause)
           (lambda (remainder value)
             (if (cadr value)
               (funcall continuation remainder (cons (car value) (cadr value)))
               (funcall continuation remainder (car value))))
           input-string))

(defun group-clause (continuation input-string)
  (funcall (match-all (make-literal-clause "(" nil)
                      'formula-clause
                      (make-literal-clause ")" nil))
           (lambda (remainder value)
             (funcall continuation remainder (cadr value)))
           input-string))

(defun group-with-multiplicity-clause (continuation input-string)
  (funcall (match-all #'group-clause #'number-clause)
           (lambda (remainder value)
             (funcall continuation remainder (cons (car value) (cadr value))))
           input-string))

(setf (symbol-function 'formula-clause)
      (match-any
        #'empty-clause
        (match-and #'element-with-multiplicity-clause 'formula-clause :combinator #'cons)
        (match-and #'group-with-multiplicity-clause 'formula-clause :combinator #'cons)))

(define-condition parsing-error (error) ())

(defun parse (clause input-string)
  (block parse-block
         (funcall clause
                  (lambda (remainder value)
                    (when (string= remainder "")
                      (return-from parse-block value)))
                  input-string)
         (error 'parsing-error)))

(defun atom-counts-zero ()
  (mapcar (lambda (e) (cons e 0)) *elements*))

(defun atom-counts-of-element (element)
  (mapcar (lambda (e) (if (eq e element)
                        (cons e 1)
                        (cons e 0)))
          *elements*))

(defun mult-atom-counts (n atom-counts)
  (mapcar (lambda (p) (cons (car p) (* n (cdr p))))
          atom-counts))

(defun add-atom-counts (&rest atom-counts)
  (flet ((add (count1 count2)
           (mapcar (lambda (p) (cons (car p)
                                     (+ (cdr p)
                                        (cdr (assoc (car p) count2)))))
                   count1)))
    (reduce #'add atom-counts :initial-value (atom-counts-zero))))

(defun atom-counts (parse-tree)
  (if (consp parse-tree)
    (if (numberp (cdr parse-tree))
      (mult-atom-counts (cdr parse-tree) (atom-counts (car parse-tree)))
      (apply #'add-atom-counts (mapcar #'atom-counts parse-tree)))
    (if parse-tree
      (atom-counts-of-element parse-tree)
      (atom-counts-zero))))

(defun analyze-formula (formula-string)
  (let* ((parse-tree (parse #'formula-clause formula-string))
         (atom-counts (atom-counts parse-tree))
         (atomic-weight (apply #'+ (mapcar (lambda (p)
                                             (* (cdr p)
                                                (atomic-weight (car p))))
                                           atom-counts))))
    (format t "Chemical analysis for ~a: ~%" formula-string)
    (format t "   contains~%")
    (dolist (p atom-counts)
      (when (> (cdr p) 0)
        (format t "      ~a ~a atom~a~%" (cdr p) (name (car p)) (if (> (cdr p) 1) "s" ""))))
    (format t "   has atomic weight ~a~%" atomic-weight)
    (finish-output nil)))

; (analyze-formula "C6H2(NO2)3CH3") ; TNT
(analyze-formula "CCl2F2") ; dichlorodifluoromethane
(analyze-formula "NaHCO3") ; sodium bicarbonate
(analyze-formula "C4H8(OH)2") ; trimethylene glycol monomethyl ether
(analyze-formula "PbCl(NH3)2(COOH)2") ; ?!?