#include <iostream>
#include <algorithm>
#include <vector>
#include <string>
using namespace std;
struct ltcode
{
unsigned long long frequency;
unsigned char letter;
string code = "";
};
void zeroset(ltcode *c)
{
for (unsigned int i = 0; i < 256; ++i)
{
c[i].frequency = 0;
c[i].letter = i;
}
}
bool f(ltcode x, ltcode y)
{
if (x.frequency != y.frequency) return x.frequency > y.frequency;
else return x.letter > y.letter;
}
struct B
{
string list;
int i;
int left;
int right;
string code = "";
B()
{
list = "";
i = 0;
}
};
void print_binary_tree(int num, vector <B> &tree)
{
if (tree[num].list.size() > 1)
{
cout << "\"'" << tree[num].list << "', " << tree[num].i << ", code: \'" << tree[num].code << "\'\" -> \"'" << tree[tree[num].left].list << "', " << tree[tree[num].left].i << ", code: \'" << tree[tree[num].left].code << "\'\" [ label = \"0\" ];\n";
cout << "\"'" << tree[num].list << "', " << tree[num].i << ", code: \'" << tree[num].code << "\'\" -> \"'" << tree[tree[num].right].list << "', " << tree[tree[num].right].i << ", code: \'" << tree[tree[num].right].code << "\'\" [ label = \"1\" ];\n";
print_binary_tree(tree[num].left, tree);
print_binary_tree(tree[num].right, tree);
}
}
void print_tree_graph(vector <B> &tree)
{
cout << "digraph G {\n";
print_binary_tree(tree.size()-1, tree);
cout << "}\n";
}
void huffman_encoding_in(int num, string code, vector <B> &tree, ltcode *letter_codes)
{
tree[num].code = code;
if (tree[num].list.size() > 1)
{
huffman_encoding_in(tree[num].left, code+"0", tree, letter_codes);
huffman_encoding_in(tree[num].right, code+"1", tree, letter_codes);
}
else
{
for (int j = 0; ; ++j)
{
if (letter_codes[j].letter == tree[num].list[0])
{
letter_codes[j].code = code;
break;
}
}
}
}
void huffman_encoding(vector <B> &sorted_tree, ltcode *letter_codes)
{
huffman_encoding_in(sorted_tree.size()-1, "", sorted_tree, letter_codes);
}
int main() {
unsigned char c;
string S;
ltcode count[256], stringlinks[256];
zeroset(count);
while (scanf("%c", &c))
{
if (c == '\n') break;
S += c;
++count[c].frequency;
}
sort(count, count+256, f);
vector <B> tree;
B tmp;
tmp.list = "0";
int j = 0, letter_amount;
for (j = 0; (count[j].frequency); ++j)
{
tmp.i = count[j].frequency;
tmp.list[0] = count[j].letter;
tree.push_back(tmp);
}
int maxsize = j-1, size;
sort(tree.begin(), tree.end(), [] (B a, B b)
{
if (a.i != b.i) return (a.i < b.i);
else return (a.list.size() < b.list.size());
});
for (j = 0; size < maxsize;)
{
tmp.list = tree[j].list + tree[j+1].list;
tmp.i = tree[j].i + tree[j+1].i;
tmp.left = j;
tmp.right = j+1;
size = tmp.list.size();
tree.push_back(tmp);
j += 2;
sort(tree.begin()+j, tree.end(), [] (B a, B b)
{
if (a.i != b.i) return (a.i < b.i);
else return (a.list.size() < b.list.size());
}
);
}
huffman_encoding(tree, count);
print_tree_graph(tree);
cout << "\nCodes of letters:\n";
for (j = 0; (count[j].frequency); ++j)
{
cout << '\'' << count[j].letter << "\'(" << count[j].code << ") ";
stringlinks[count[j].letter].code = count[j].code;
}
cout << "\n\nEncoded string:\n";
for (unsigned int i = 0; i < S.size(); ++i)
{
cout << stringlinks[S[i]].code;
}
cout << '\n';
return 0;
}
I2luY2x1ZGUgPGlvc3RyZWFtPgojaW5jbHVkZSA8YWxnb3JpdGhtPgojaW5jbHVkZSA8dmVjdG9yPgojaW5jbHVkZSA8c3RyaW5nPgp1c2luZyBuYW1lc3BhY2Ugc3RkOwoKc3RydWN0IGx0Y29kZQp7Cgl1bnNpZ25lZCBsb25nIGxvbmcgZnJlcXVlbmN5OwoJdW5zaWduZWQgY2hhciBsZXR0ZXI7CglzdHJpbmcgY29kZSA9ICIiOwp9OwoKdm9pZCB6ZXJvc2V0KGx0Y29kZSAqYykKewoJZm9yICh1bnNpZ25lZCBpbnQgaSA9IDA7IGkgPCAyNTY7ICsraSkKCXsKCQljW2ldLmZyZXF1ZW5jeSA9IDA7CgkJY1tpXS5sZXR0ZXIgPSBpOwoJfQp9Cgpib29sIGYobHRjb2RlIHgsIGx0Y29kZSB5KQp7CglpZiAoeC5mcmVxdWVuY3kgIT0geS5mcmVxdWVuY3kpIHJldHVybiB4LmZyZXF1ZW5jeSA+IHkuZnJlcXVlbmN5OwoJZWxzZSByZXR1cm4geC5sZXR0ZXIgPiB5LmxldHRlcjsKfQoKc3RydWN0IEIKewoJc3RyaW5nIGxpc3Q7CglpbnQgaTsKCQoJaW50IGxlZnQ7CglpbnQgcmlnaHQ7CgkKCXN0cmluZyBjb2RlID0gIiI7CgkKCUIoKQoJewoJCWxpc3QgPSAiIjsKCQlpID0gMDsKCX0KfTsKCnZvaWQgcHJpbnRfYmluYXJ5X3RyZWUoaW50IG51bSwgdmVjdG9yIDxCPiAmdHJlZSkKewoJaWYgKHRyZWVbbnVtXS5saXN0LnNpemUoKSA+IDEpCgl7CgkJY291dCA8PCAiXCInIiA8PCB0cmVlW251bV0ubGlzdCA8PCAiJywgIiA8PCB0cmVlW251bV0uaSA8PCAiLCBjb2RlOiBcJyIgPDwgdHJlZVtudW1dLmNvZGUgPDwgIlwnXCIgLT4gXCInIiA8PCB0cmVlW3RyZWVbbnVtXS5sZWZ0XS5saXN0IDw8ICInLCAiIDw8IHRyZWVbdHJlZVtudW1dLmxlZnRdLmkgPDwgIiwgY29kZTogXCciIDw8IHRyZWVbdHJlZVtudW1dLmxlZnRdLmNvZGUgPDwgIlwnXCIgWyBsYWJlbCA9IFwiMFwiIF07XG4iOwoJCWNvdXQgPDwgIlwiJyIgPDwgdHJlZVtudW1dLmxpc3QgPDwgIicsICIgPDwgdHJlZVtudW1dLmkgPDwgIiwgY29kZTogXCciIDw8IHRyZWVbbnVtXS5jb2RlIDw8ICJcJ1wiIC0+IFwiJyIgPDwgdHJlZVt0cmVlW251bV0ucmlnaHRdLmxpc3QgPDwgIicsICIgPDwgdHJlZVt0cmVlW251bV0ucmlnaHRdLmkgPDwgIiwgY29kZTogXCciIDw8IHRyZWVbdHJlZVtudW1dLnJpZ2h0XS5jb2RlIDw8ICJcJ1wiIFsgbGFiZWwgPSBcIjFcIiBdO1xuIjsKCQlwcmludF9iaW5hcnlfdHJlZSh0cmVlW251bV0ubGVmdCwgdHJlZSk7CgkJcHJpbnRfYmluYXJ5X3RyZWUodHJlZVtudW1dLnJpZ2h0LCB0cmVlKTsKCX0KfQoKdm9pZCBwcmludF90cmVlX2dyYXBoKHZlY3RvciA8Qj4gJnRyZWUpCnsKCWNvdXQgPDwgImRpZ3JhcGggRyB7XG4iOwoJcHJpbnRfYmluYXJ5X3RyZWUodHJlZS5zaXplKCktMSwgdHJlZSk7Cgljb3V0IDw8ICJ9XG4iOwp9Cgp2b2lkIGh1ZmZtYW5fZW5jb2RpbmdfaW4oaW50IG51bSwgc3RyaW5nIGNvZGUsIHZlY3RvciA8Qj4gJnRyZWUsIGx0Y29kZSAqbGV0dGVyX2NvZGVzKQp7Cgl0cmVlW251bV0uY29kZSA9IGNvZGU7CglpZiAodHJlZVtudW1dLmxpc3Quc2l6ZSgpID4gMSkKCXsKCQlodWZmbWFuX2VuY29kaW5nX2luKHRyZWVbbnVtXS5sZWZ0LCBjb2RlKyIwIiwgdHJlZSwgbGV0dGVyX2NvZGVzKTsKCQlodWZmbWFuX2VuY29kaW5nX2luKHRyZWVbbnVtXS5yaWdodCwgY29kZSsiMSIsIHRyZWUsIGxldHRlcl9jb2Rlcyk7Cgl9CgllbHNlCgl7CgkJZm9yIChpbnQgaiA9IDA7IDsgKytqKQoJCXsKCQkJaWYgKGxldHRlcl9jb2Rlc1tqXS5sZXR0ZXIgPT0gdHJlZVtudW1dLmxpc3RbMF0pCgkJCXsKCQkJCWxldHRlcl9jb2Rlc1tqXS5jb2RlID0gY29kZTsKCQkJCWJyZWFrOwoJCQl9CgkJfQoJfQp9Cgp2b2lkIGh1ZmZtYW5fZW5jb2RpbmcodmVjdG9yIDxCPiAmc29ydGVkX3RyZWUsIGx0Y29kZSAqbGV0dGVyX2NvZGVzKQp7CglodWZmbWFuX2VuY29kaW5nX2luKHNvcnRlZF90cmVlLnNpemUoKS0xLCAiIiwgc29ydGVkX3RyZWUsIGxldHRlcl9jb2Rlcyk7Cn0KCmludCBtYWluKCkgewoJdW5zaWduZWQgY2hhciBjOwoJc3RyaW5nIFM7CglsdGNvZGUgY291bnRbMjU2XSwgc3RyaW5nbGlua3NbMjU2XTsKCXplcm9zZXQoY291bnQpOwoKCXdoaWxlIChzY2FuZigiJWMiLCAmYykpCgl7CgkJaWYgKGMgPT0gJ1xuJykgYnJlYWs7CgkJUyArPSBjOwoJCSsrY291bnRbY10uZnJlcXVlbmN5OwoJfQoJc29ydChjb3VudCwgY291bnQrMjU2LCBmKTsKCQoJdmVjdG9yIDxCPiB0cmVlOwoJQiB0bXA7Cgl0bXAubGlzdCA9ICIwIjsKCWludCBqID0gMCwgbGV0dGVyX2Ftb3VudDsKCWZvciAoaiA9IDA7IChjb3VudFtqXS5mcmVxdWVuY3kpOyArK2opCgl7CgkJdG1wLmkgPSBjb3VudFtqXS5mcmVxdWVuY3k7CgkJdG1wLmxpc3RbMF0gPSBjb3VudFtqXS5sZXR0ZXI7CgkJdHJlZS5wdXNoX2JhY2sodG1wKTsKCX0KCWludCBtYXhzaXplID0gai0xLCBzaXplOwoJCglzb3J0KHRyZWUuYmVnaW4oKSwgdHJlZS5lbmQoKSwgW10gKEIgYSwgQiBiKQoJewoJCWlmIChhLmkgIT0gYi5pKSByZXR1cm4gKGEuaSA8IGIuaSk7CgkJZWxzZSByZXR1cm4gKGEubGlzdC5zaXplKCkgPCBiLmxpc3Quc2l6ZSgpKTsKCX0pOwoJCglmb3IgKGogPSAwOyBzaXplIDwgbWF4c2l6ZTspCgl7CgkJdG1wLmxpc3QgPSB0cmVlW2pdLmxpc3QgKyB0cmVlW2orMV0ubGlzdDsKCQl0bXAuaSA9IHRyZWVbal0uaSArIHRyZWVbaisxXS5pOwoJCXRtcC5sZWZ0ID0gajsKCQl0bXAucmlnaHQgPSBqKzE7CgkJCgkJc2l6ZSA9IHRtcC5saXN0LnNpemUoKTsKCQkKCQl0cmVlLnB1c2hfYmFjayh0bXApOwoJCWogKz0gMjsKCQlzb3J0KHRyZWUuYmVnaW4oKStqLCB0cmVlLmVuZCgpLCBbXSAoQiBhLCBCIGIpCgkJewoJCQlpZiAoYS5pICE9IGIuaSkgcmV0dXJuIChhLmkgPCBiLmkpOwoJCQllbHNlIHJldHVybiAoYS5saXN0LnNpemUoKSA8IGIubGlzdC5zaXplKCkpOwoJCX0KCQkpOwoJfQoJaHVmZm1hbl9lbmNvZGluZyh0cmVlLCBjb3VudCk7CglwcmludF90cmVlX2dyYXBoKHRyZWUpOwoJY291dCA8PCAiXG5Db2RlcyBvZiBsZXR0ZXJzOlxuIjsKCWZvciAoaiA9IDA7IChjb3VudFtqXS5mcmVxdWVuY3kpOyArK2opCgl7CgkJY291dCA8PCAnXCcnIDw8IGNvdW50W2pdLmxldHRlciA8PCAiXCcoIiA8PCBjb3VudFtqXS5jb2RlIDw8ICIpICAiOwoJCXN0cmluZ2xpbmtzW2NvdW50W2pdLmxldHRlcl0uY29kZSA9IGNvdW50W2pdLmNvZGU7Cgl9Cgljb3V0IDw8ICJcblxuRW5jb2RlZCBzdHJpbmc6XG4iOwoJZm9yICh1bnNpZ25lZCBpbnQgaSA9IDA7IGkgPCBTLnNpemUoKTsgKytpKQoJewoJCWNvdXQgPDwgc3RyaW5nbGlua3NbU1tpXV0uY29kZTsKCX0KCWNvdXQgPDwgJ1xuJzsKCXJldHVybiAwOwp9
digraph G {
"'MLO KITP', 12, code: ''" -> "'MLO', 5, code: '0'" [ label = "0" ];
"'MLO KITP', 12, code: ''" -> "' KITP', 7, code: '1'" [ label = "1" ];
"'MLO', 5, code: '0'" -> "'ML', 2, code: '00'" [ label = "0" ];
"'MLO', 5, code: '0'" -> "'O', 3, code: '01'" [ label = "1" ];
"'ML', 2, code: '00'" -> "'M', 1, code: '000'" [ label = "0" ];
"'ML', 2, code: '00'" -> "'L', 1, code: '001'" [ label = "1" ];
"' KITP', 7, code: '1'" -> "' K', 3, code: '10'" [ label = "0" ];
"' KITP', 7, code: '1'" -> "'ITP', 4, code: '11'" [ label = "1" ];
"' K', 3, code: '10'" -> "' ', 1, code: '100'" [ label = "0" ];
"' K', 3, code: '10'" -> "'K', 2, code: '101'" [ label = "1" ];
"'ITP', 4, code: '11'" -> "'I', 2, code: '110'" [ label = "0" ];
"'ITP', 4, code: '11'" -> "'TP', 2, code: '111'" [ label = "1" ];
"'TP', 2, code: '111'" -> "'T', 1, code: '1110'" [ label = "0" ];
"'TP', 2, code: '111'" -> "'P', 1, code: '1111'" [ label = "1" ];
}
Codes of letters:
'O'(01) 'K'(101) 'I'(110) 'T'(1110) 'P'(1111) 'M'(000) 'L'(001) ' '(100)
Encoded string:
00001001011010110010111011111101110