;; -*- mode:lisp;coding:us-ascii -*-
(defpackage "COM.INFORMATIMAGO.BENCHMARK.CONCATENATE-STRINGS"
(:use "COMMON-LISP")
(:export "BENCHMARK-CONCATENATE-STRINGS"))
(in-package "COM.INFORMATIMAGO.BENCHMARK.CONCATENATE-STRINGS")
(defparameter *internal-time-unit*
(coerce (/ internal-time-units-per-second) 'double-float)
"The internal time slice, in seconds, as a DOUBLE-FLOAT.")
(defun get-run-time ()
"
RETURN: The run-time (in seconds).
Run-time is not synchronized with real-time,
since the process is scheduled by the kernel.
"
(* (get-internal-run-time) *internal-time-unit*))
(defun chrono-run-time* (thunk)
"
Call the THUNK and return the run-time spent on it.
The results of THUNK are ignored.
"
(let ((start (get-run-time)))
(funcall thunk)
(- (get-run-time) start)))
(defmacro chrono-run-time (&body body)
`(chrono-run-time* (lambda () ,@body)))
(defun iota (count &optional (start 0) (step 1))
"
RETURN: A list containing the elements
(start start+step ... start+(count-1)*step)
The start and step parameters default to 0 and 1, respectively.
This procedure takes its name from the APL primitive.
EXAMPLE: (iota 5) => (0 1 2 3 4)
(iota 5 0 -0.1) => (0 -0.1 -0.2 -0.3 -0.4)
"
(loop
:repeat count
:for item = start :then (+ item step)
:collect item))
(defgeneric group-by (sequence n)
(:documentation "Returns a list of subsequences of SEQUENCE of length N,
whose concatenation is equal to SEQUENCE.")
(:method ((sequence vector) n)
(check-type n (integer 1))
(loop
:with length := (length sequence)
:for i :from 0 :by n
:while (< i length)
:collect (subseq sequence i (min length (+ i n)))))
(:method ((sequence list) n)
(check-type n (integer 1))
(loop
:for sub := sequence :then rest
:for rest := (nthcdr #+sbcl #| is idiotic! |# (min (length sub) n)
#-sbcl n
sub)
:while sub
:collect (ldiff sub rest))))
(defun concatenate-strings (list-of-string-designators)
"
LIST-OF-STRING-DESIGNATORS:
EACH element may be either a string-designator or a list of characters,
or a list containing a string-designator or a list of character,
and a start and end position denoting a substring.
RETURN: A string containing the concatenation of the strings
of the LIST-OF-STRINGS.
"
(flet ((slength (string)
(if (stringp string)
(length string)
(- (or (third string) (length (first string)))
(second string)))))
(loop
:with strings = (mapcar
(lambda (item)
(etypecase item
(null "")
((or string symbol character) (string item))
(cons (if (every (function characterp) item)
(coerce item 'string)
(list (etypecase (first item)
(null "")
((or string symbol character) (string (first item)))
(cons (coerce (first item) 'string)))
(second item)
(third item))))))
list-of-string-designators)
:with result = (make-string (reduce (function +) strings :key (function slength)))
:for pos = 0
:then (+ pos (slength string))
:for string :in strings
:do (if (stringp string)
(replace result string :start1 pos)
(replace result (first string) :start1 pos
:start2 (second string) :end2 (third string)))
:finally (return result))))
(defun benchmark-concatenate-strings ()
(let ((headers
'("#strings"
"concat-string"
"w/out-to-str"
"reduce-concat"
"apply-concat")))
(format t "~2%** ~{~A~^, ~}~2%" (list (lisp-implementation-type)
(lisp-implementation-version)
(machine-type)))
(format t "| ~{~A |~}~%" headers)
(format t "|-~%")
(format t "| ~{~*<r> |~}~%" headers)
(dolist (n '(100 1000 5000 10000 50000 100000))
(let* ((strings (map-into (make-list n)
(lambda (i) (format nil "+~R" i))
(iota n)))
(results '())
(times
(list
(chrono-run-time (push (concatenate-strings strings) results))
(chrono-run-time (push (with-output-to-string (out)
(dolist (string strings)
(write-string string out)))
results))
(chrono-run-time (push (if (<= n 10000)
(reduce (lambda (a b)
(concatenate 'string a b))
strings :initial-value "")
;; otherwise it's too slow.
nil)
results))
(chrono-run-time (push (if #-sbcl t #+sbcl (<= n 50000)
(loop ; apply-concat
:for groups := (group-by strings (1- call-arguments-limit))
:then (group-by concats (1- call-arguments-limit))
:for concats := (mapcar (lambda (strings)
(apply (function concatenate) 'string strings))
groups)
:while (rest concats)
:finally (return (first concats)))
;; sbcl cannot support that
;; many arguments on the
;; control stack, which is
;; idiotic, since it has a
;; call-arguments-limit =
;; 4611686018427387903
nil)
results)))))
(setf results (nreverse results))
(format t "| ~8D | ~:{~:[N/A~;~12,6F~] |~}~%"
n (mapcar (function list) results times))
(loop
:with f := (first results)
:with fn := (first (rest headers))
:for s :in (rest results)
:for n :in (rest (rest headers))
:do (assert (or (null s) (string= f s))
(f s)
"Different results between ~A and ~A"
fn n))))))
(benchmark-concatenate-strings)
#|
clall '(load (compile-file "/Users/pjb/src/lisp/encours/benchmark-string-concatenate.lisp"))'
* Benchmark string concatenation
** "Armed Bear Common Lisp", "1.8.0", "X86_64"
| #strings | concat-string | w/out-to-str | reduce-concat | apply-concat |
|----------+---------------+--------------+---------------+--------------|
| <r> | <r> | <r> | <r> | <r> |
| 100 | 0.009000 | 0.000000 | 0.002000 | 0.00100 |
| 1000 | 0.011000 | 0.00100 | 0.097000 | 0.00100 |
| 5000 | 0.046000 | 0.005000 | 1.986000 | 0.004000 |
| 10000 | 0.075000 | 0.007000 | 7.264000 | 0.007000 |
| 50000 | 0.401000 | 0.033000 | N/A | 0.043000 |
| 100000 | 0.822000 | 0.066000 | N/A | 0.109000 |
==> with-output-to-string faster overall.
** "Clozure Common Lisp", "Version 1.12 (v1.12-39-g6c1a9458) DarwinX8664", "x86_64"
| #strings | concat-string | w/out-to-str | reduce-concat | apply-concat |
|----------+---------------+--------------+---------------+--------------|
| <r> | <r> | <r> | <r> | <r> |
| 100 | 0.000024 | 0.000010 | 0.000060 | 0.000051 |
| 1000 | 0.000291 | 0.000254 | 0.018234 | 0.000258 |
| 5000 | 0.001795 | 0.001753 | 0.307806 | 0.001815 |
| 10000 | 0.003970 | 0.004644 | 1.184964 | 0.003426 |
| 50000 | 0.022194 | 0.034771 | N/A | 0.024760 |
| 100000 | 0.062293 | 0.071735 | N/A | 0.105054 |
CONCATENATE-STRING is faster when you have more than between 50000 and 100000 strings to concatenate.
WITH-OUTPUT-STRING is faster when you have about 100 strings.
APPLY-CONCAT is faster when you have about 1000 strings.
(In ccl, call-arguments-limit is 65536).
** ECL, 21.2.1, x86_64
| #strings | concat-string | w/out-to-str | reduce-concat | apply-concat |
|----------+---------------+--------------+---------------+--------------|
| <r> | <r> | <r> | <r> | <r> |
| 100 | 0.001000 | 0.000000 | 0.001000 | 0.000000 |
| 1000 | 0.000000 | 0.001000 | 0.391000 | 0.001000 |
| 5000 | 0.003000 | 0.038000 | 14.063000 | 0.007000 |
| 10000 | 0.006000 | 0.005000 | 59.637000 | 0.013000 |
| 50000 | 0.029000 | 0.088000 | N/A | 0.072000 |
| 100000 | 0.059000 | 0.133000 | N/A | 0.305000 |
CONCATENATE-STRING is faster when you have more than about 50000 strings to concatenate.
APPLY-CONCAT is faster between about 5000 and 10000 strings.
Any function but REDUCE-CONCAT but is as fast when you have less than about 1000 strings.
** SBCL, 2.1.3, X86-64
| #strings | concat-string | w/out-to-str | reduce-concat | apply-concat |
|----------+---------------+--------------+---------------+--------------|
| <r> | <r> | <r> | <r> | <r> |
| 100 | 0.000018 | 0.000005 | 0.000027 | 0.000017 |
| 1000 | 0.000198 | 0.000055 | 0.035400 | 0.000095 |
| 5000 | 0.001590 | 0.000875 | 0.370234 | 0.000288 |
| 10000 | 0.003036 | 0.001767 | 1.534191 | 0.001092 |
| 50000 | 0.017485 | 0.006019 | N/A | 0.009070 |
| 100000 | 0.053100 | 0.032041 | N/A | N/A |
SBCL is a liar, and cannot do call-arguments-limit parameters, far
from it! So apply-concat is limited to less than about 50000
arguments.
WITH-OUTPUT-TO-STRING is fastest above about 50000 strings.
APPLY-CONCAT is fastest between about 5000 and 50000 strings.
WITH-OUTPUT-TO-STRING is fastest below 5000 strings.
|#