#lang racket
(require (only-in srfi/1 alist-cons alist-delete unfold zip))
(define (codeInfo n)
(let ((lst (cons 0 (unfold (lambda (x)
(= x n))
(lambda (x)
(apply + (map (lambda (y)
(expt 2 y)) (range 1 (add1 x)))))
add1
1))))
`(,@lst ,(add1 (last lst)))))
;; 各要素の発生回数を数える
(define (countDatum data)
(map (match-lambda
((cons n c) `(,c ,n)))
(sort
(foldl (lambda (elt lst)
(cond ((assv elt lst)
=> (lambda (x)
(alist-cons elt (add1 (cdr x))
(alist-delete elt lst))))
(else (alist-cons elt 1 lst))))
; 今度はデカイ値から降順に並べる
'() (string->list data)) #:key cdr >)))
(define (compress data)
(let* ((lst0 (string->list data))
(lst1 (countDatum data))
(lst2 (codeInfo (sub1 (length lst1))))
(lst3 (zip (map cadr lst1) lst2)))
(values (map (lambda (key)
(cadr (assv key lst3))) lst0)
(map (lambda (x)
(match-let ((`(,a ,b) x))
(cons b a))) lst3))))
#| 「二分木ヴァージョン」と、DとEにアテられる数値が違うが、そもそもDとEの出現率は同じだ。
ソートによっては「安定ソート」な為、元の順番を保持しようとする。
しかし、結果、こちらのエンコード/デコードでも「理論上は」問題がない。
(compress "DAEBCBACBBBC") |#
I2xhbmcgcmFja2V0CgoocmVxdWlyZSAob25seS1pbiBzcmZpLzEgYWxpc3QtY29ucyBhbGlzdC1kZWxldGUgdW5mb2xkIHppcCkpCgooZGVmaW5lIChjb2RlSW5mbyBuKQogIChsZXQgKChsc3QgKGNvbnMgMCAodW5mb2xkIChsYW1iZGEgKHgpCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAoPSB4IG4pKQogICAgICAgICAgICAgICAgICAgICAgICAgICAgIChsYW1iZGEgKHgpCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAoYXBwbHkgKyAobWFwIChsYW1iZGEgKHkpCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgKGV4cHQgMiB5KSkgKHJhbmdlIDEgKGFkZDEgeCkpKSkpCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYWRkMQogICAgICAgICAgICAgICAgICAgICAgICAgICAgIDEpKSkpCiAgICBgKCxAbHN0ICwoYWRkMSAobGFzdCBsc3QpKSkpKQoKOzsg5ZCE6KaB57Sg44Gu55m655Sf5Zue5pWw44KS5pWw44GI44KLCihkZWZpbmUgKGNvdW50RGF0dW0gZGF0YSkKICAobWFwIChtYXRjaC1sYW1iZGEKICAgICAgICAgKChjb25zIG4gYykgYCgsYyAsbikpKQogICAgICAgKHNvcnQKICAgICAgICAoZm9sZGwgKGxhbWJkYSAoZWx0IGxzdCkKICAgICAgICAgICAgICAgICAoY29uZCAoKGFzc3YgZWx0IGxzdCkKICAgICAgICAgICAgICAgICAgICAgICAgPT4gKGxhbWJkYSAoeCkKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAoYWxpc3QtY29ucyBlbHQgKGFkZDEgKGNkciB4KSkKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAoYWxpc3QtZGVsZXRlIGVsdCBsc3QpKSkpCiAgICAgICAgICAgICAgICAgICAgICAgKGVsc2UgKGFsaXN0LWNvbnMgZWx0IDEgbHN0KSkpKQogICAgICAgICAgICAgICA7IOS7iuW6puOBr+ODh+OCq+OCpOWApOOBi+OCiemZjemghuOBq+S4puOBueOCiwogICAgICAgICAgICAgICAnKCkgKHN0cmluZy0+bGlzdCBkYXRhKSkgIzprZXkgY2RyID4pKSkgCgooZGVmaW5lIChjb21wcmVzcyBkYXRhKQogIChsZXQqICgobHN0MCAoc3RyaW5nLT5saXN0IGRhdGEpKQogICAgICAgICAobHN0MSAoY291bnREYXR1bSBkYXRhKSkKICAgICAgICAgKGxzdDIgKGNvZGVJbmZvIChzdWIxIChsZW5ndGggbHN0MSkpKSkKICAgICAgICAgKGxzdDMgKHppcCAobWFwIGNhZHIgbHN0MSkgbHN0MikpKQogICAgKHZhbHVlcyAobWFwIChsYW1iZGEgKGtleSkKICAgICAgICAgICAgICAgICAgIChjYWRyIChhc3N2IGtleSBsc3QzKSkpIGxzdDApCiAgICAgICAgICAgIChtYXAgKGxhbWJkYSAoeCkKICAgICAgICAgICAgICAgICAgIChtYXRjaC1sZXQgKChgKCxhICxiKSB4KSkKICAgICAgICAgICAgICAgICAgICAgKGNvbnMgYiBhKSkpIGxzdDMpKSkpCgojfCDjgIzkuozliIbmnKjjg7TjgqHjg7zjgrjjg6fjg7PjgI3jgajjgIFE44GoReOBq+OCouODhuOCieOCjOOCi+aVsOWApOOBjOmBleOBhuOBjOOAgeOBneOCguOBneOCgkTjgahF44Gu5Ye654++546H44Gv5ZCM44GY44Gg44CCCiAgIOOCveODvOODiOOBq+OCiOOBo+OBpuOBr+OAjOWuieWumuOCveODvOODiOOAjeOBqueCuuOAgeWFg+OBrumghueVquOCkuS/neaMgeOBl+OCiOOBhuOBqOOBmeOCi+OAggogICDjgZfjgYvjgZfjgIHntZDmnpzjgIHjgZPjgaHjgonjga7jgqjjg7PjgrPjg7zjg4kv44OH44Kz44O844OJ44Gn44KC44CM55CG6KuW5LiK44Gv44CN5ZWP6aGM44GM44Gq44GE44CCCihjb21wcmVzcyAiREFFQkNCQUNCQkJDIikgfCM=