#! /usr/bin/env python3
from functools import reduce
def createValue2Code( data) :
# 各要素の発生回数を数える
def countDatum( ) : # ローカル関数の場合、スコープが createValue2Code の data を捕捉する
def foo( d, elt) :
try :
d[ elt] += 1
except KeyError :
d[ elt] = 1
finally :
return d
return sorted ( [ [ v, k] for k, v in reduce ( foo, list ( data) , { } ) .items ( ) ] )
# ハフマン木を生成
def generateHuffman( lst) :
return reduce ( lambda acc, elt: elt if acc == [ ] else [ elt[ 0 ] + acc[ 0 ] , [ elt, acc] ] , lst, [ ] )
# 符号情報を作成
def codeInfo( n) :
return sum ( [ 2 ** i for i in range ( 1 , n + 1 ) ] )
def value2code( tree) :
def foo( x, y) :
match y:
case [ parent, [ left, [ ] ] ] :
return [ parent, [ left, x] ]
acc = [ ]
while True :
match tree:
case [ x, xs] :
if isinstance ( xs, str ) :
return reduce ( foo, acc, [ 1 + codeInfo( x - 1 ) , xs] )
match xs:
case [ [ m, left] , [ n, right] ] :
k = codeInfo( x)
tree, acc = [ x + 1 , right] , [ [ x, [ [ k, left] , [ ] ] ] ] + acc
def dfs( tree) :
acc = [ ]
while True :
match tree:
case [ a, [ b, bs] ] :
match bs:
case [ x, xs] :
if isinstance ( xs, str ) :
lst = lambda : reversed ( [ bs, b, *acc] )
return { xs: x for x, xs in lst( ) } , dict ( lst( ) )
tree, acc = bs, [ b] + acc
return dfs( value2code( [ 0 ] + generateHuffman( countDatum( ) ) [ 1 :] ) )
def compress( data) :
compress_table, uncompress_table = createValue2Code( data)
return [ compress_table[ key] for key in data] , uncompress_table
if __name__ == '__main__' :
# Wikipedia での実行例に比べるとDとEが逆に思うかもしれんが気にしない事。
# Dの出現率とEの出現率は全く同じで、ソーティングアルゴリズムによっては
# 順序を取り替えたり、あるいは保持したり、する場合がある(安定ソートと不安定ソート)。
# どちらにせよ、「理論的には」同値の順序はどっちでもいい。
print ( compress( "DAEBCBACBBBC" ) )
IyEgL3Vzci9iaW4vZW52IHB5dGhvbjMKCmZyb20gZnVuY3Rvb2xzIGltcG9ydCByZWR1Y2UKCmRlZiBjcmVhdGVWYWx1ZTJDb2RlKGRhdGEpOgoKICAgICMg5ZCE6KaB57Sg44Gu55m655Sf5Zue5pWw44KS5pWw44GI44KLCiAgICBkZWYgY291bnREYXR1bSgpOiAjIOODreODvOOCq+ODq+mWouaVsOOBruWgtOWQiOOAgeOCueOCs+ODvOODl+OBjCBjcmVhdGVWYWx1ZTJDb2RlIOOBriBkYXRhIOOCkuaNleaNieOBmeOCiwogICAgICAgIGRlZiBmb28oZCwgZWx0KToKICAgICAgICAgICAgdHJ5OgogICAgICAgICAgICAgICAgZFtlbHRdICs9IDEKICAgICAgICAgICAgZXhjZXB0IEtleUVycm9yOgogICAgICAgICAgICAgICAgZFtlbHRdID0gMQogICAgICAgICAgICBmaW5hbGx5OgogICAgICAgICAgICAgICAgcmV0dXJuIGQKICAgICAgICByZXR1cm4gc29ydGVkKFtbdiwga11mb3IgaywgdiBpbiByZWR1Y2UoZm9vLCBsaXN0KGRhdGEpLCB7fSkuaXRlbXMoKV0pCgogICAgIyDjg4/jg5Xjg57jg7PmnKjjgpLnlJ/miJAKICAgIGRlZiBnZW5lcmF0ZUh1ZmZtYW4obHN0KToKICAgICAgICByZXR1cm4gcmVkdWNlKGxhbWJkYSBhY2MsIGVsdDogZWx0IGlmIGFjYyA9PSBbXSBlbHNlIFtlbHRbMF0gKyBhY2NbMF0sIFtlbHQsIGFjY11dLCBsc3QsIFtdKQoKICAgICMg56ym5Y+35oOF5aCx44KS5L2c5oiQCiAgICBkZWYgY29kZUluZm8obik6CiAgICAgICAgcmV0dXJuIHN1bShbMiAqKiBpIGZvciBpIGluIHJhbmdlKDEsIG4gKyAxKV0pCgogICAgZGVmIHZhbHVlMmNvZGUodHJlZSk6CiAgICAgICAgZGVmIGZvbyh4LCB5KToKICAgICAgICAgICAgbWF0Y2ggeToKICAgICAgICAgICAgICAgIGNhc2UgW3BhcmVudCwgW2xlZnQsIFtdXV06CiAgICAgICAgICAgICAgICAgICAgcmV0dXJuIFtwYXJlbnQsIFtsZWZ0LCB4XV0KICAgICAgICBhY2MgPSBbXQogICAgICAgIHdoaWxlIFRydWU6CiAgICAgICAgICAgIG1hdGNoIHRyZWU6CiAgICAgICAgICAgICAgICBjYXNlIFt4LCB4c106CiAgICAgICAgICAgICAgICAgICAgaWYgaXNpbnN0YW5jZSh4cywgc3RyKToKICAgICAgICAgICAgICAgICAgICAgICAgcmV0dXJuIHJlZHVjZShmb28sIGFjYywgWzEgKyBjb2RlSW5mbyh4IC0gMSksIHhzXSkKICAgICAgICAgICAgICAgICAgICBtYXRjaCB4czoKICAgICAgICAgICAgICAgICAgICAgICAgY2FzZSBbW20sIGxlZnRdLCBbbiwgcmlnaHRdXToKICAgICAgICAgICAgICAgICAgICAgICAgICAgIGsgPSBjb2RlSW5mbyh4KQogICAgICAgICAgICAgICAgICAgICAgICAgICAgdHJlZSwgYWNjID0gW3ggKyAxLCByaWdodF0sIFtbeCwgW1trLCBsZWZ0XSwgW11dXV0gKyBhY2MKCiAgICBkZWYgZGZzKHRyZWUpOgogICAgICAgIGFjYyA9IFtdCiAgICAgICAgd2hpbGUgVHJ1ZToKICAgICAgICAgICAgbWF0Y2ggdHJlZToKICAgICAgICAgICAgICAgIGNhc2UgW2EsIFtiLCBic11dOgogICAgICAgICAgICAgICAgICAgIG1hdGNoIGJzOgogICAgICAgICAgICAgICAgICAgICAgICBjYXNlIFt4LCB4c106CiAgICAgICAgICAgICAgICAgICAgICAgICAgICBpZiBpc2luc3RhbmNlKHhzLCBzdHIpOgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGxzdCA9IGxhbWJkYTogcmV2ZXJzZWQoW2JzLCBiLCAqYWNjXSkKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICByZXR1cm4ge3hzOiB4IGZvciB4LCB4cyBpbiBsc3QoKX0sIGRpY3QobHN0KCkpCiAgICAgICAgICAgICAgICAgICAgICAgICAgICB0cmVlLCBhY2MgPSBicywgW2JdICsgYWNjCgogICAgcmV0dXJuIGRmcyh2YWx1ZTJjb2RlKFswXSArIGdlbmVyYXRlSHVmZm1hbihjb3VudERhdHVtKCkpWzE6XSkpCgpkZWYgY29tcHJlc3MoZGF0YSk6CiAgICBjb21wcmVzc190YWJsZSwgdW5jb21wcmVzc190YWJsZSA9IGNyZWF0ZVZhbHVlMkNvZGUoZGF0YSkKICAgIHJldHVybiBbY29tcHJlc3NfdGFibGVba2V5XSBmb3Iga2V5IGluIGRhdGFdLCB1bmNvbXByZXNzX3RhYmxlCgppZiBfX25hbWVfXyA9PSAnX19tYWluX18nOgogICAgIyBXaWtpcGVkaWEg44Gn44Gu5a6f6KGM5L6L44Gr5q+U44G544KL44GoROOBqEXjgYzpgIbjgavmgJ3jgYbjgYvjgoLjgZfjgozjgpPjgYzmsJfjgavjgZfjgarjgYTkuovjgIIKICAgICMgROOBruWHuuePvueOh+OBqEXjga7lh7rnj77njofjga/lhajjgY/lkIzjgZjjgafjgIHjgr3jg7zjg4bjgqPjg7PjgrDjgqLjg6vjgrTjg6rjgrrjg6DjgavjgojjgaPjgabjga8KICAgICMg6aCG5bqP44KS5Y+W44KK5pu/44GI44Gf44KK44CB44GC44KL44GE44Gv5L+d5oyB44GX44Gf44KK44CB44GZ44KL5aC05ZCI44GM44GC44KLKOWuieWumuOCveODvOODiOOBqOS4jeWuieWumuOCveODvOODiCnjgIIKICAgICMg44Gp44Gh44KJ44Gr44Gb44KI44CB44CM55CG6KuW55qE44Gr44Gv44CN5ZCM5YCk44Gu6aCG5bqP44Gv44Gp44Gj44Gh44Gn44KC44GE44GE44CCCiAgICBwcmludChjb21wcmVzcygiREFFQkNCQUNCQkJDIikpCg==
compilation info
Traceback (most recent call last):
File "/usr/lib/python3.9/py_compile.py", line 144, in compile
code = loader.source_to_code(source_bytes, dfile or file,
File "<frozen importlib._bootstrap_external>", line 918, in source_to_code
File "<frozen importlib._bootstrap>", line 228, in _call_with_frames_removed
File "./prog.py", line 28
match y:
^
SyntaxError: invalid syntax
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/usr/lib/python3.9/py_compile.py", line 150, in compile
raise py_exc
py_compile.PyCompileError: File "./prog.py", line 28
match y:
^
SyntaxError: invalid syntax
stdout