fork(6) download
  1. /**
  2.  * Demonstration of the Huffman coding algorithm.
  3.  * Huffman coding is an entropy encoding algorithm used for lossless data compression.
  4.  * @see http://e...content-available-to-author-only...a.org/wiki/Huffman_coding )
  5.  *
  6.  * INPUT: text for compression.
  7.  * OUTPUT: The Huffman coding of the text, as a binary string.
  8.  * Author: Erel Segal http://t...content-available-to-author-only...s.fm/rentabrain
  9.  * Created: 2010-09
  10.  */
  11.  
  12. #include <queue>
  13. #include <map>
  14. #include <iostream>
  15. #include <string>
  16. #include <sstream>
  17. using namespace std;
  18.  
  19.  
  20. /* Interface: */
  21. void encodeInto(string text, ostream& out);
  22. string decodeFrom(istream& in);
  23.  
  24.  
  25. /* Class prototype:
  26. class HuffmanCode {
  27. public:
  28. string encode(string source);
  29. string decode(string binary);
  30.  
  31. void createOptimalEncodingFromText(string text);
  32.  
  33. friend ostream& operator<<(ostream& out, const HuffmanCode& object);
  34. friend istream& operator>>(istream& in, HuffmanCode& object);
  35. };
  36. */
  37.  
  38.  
  39.  
  40. /**
  41.  * 1
  42.  */
  43.  
  44.  
  45. class LetterGroup;
  46. typedef LetterGroup* PLetterGroup;
  47.  
  48.  
  49. /**
  50.  * @class LetterGroup - A node in a binary Huffman tree
  51.  * includes a group of letters, their total frequency, and pointers to the left and right sons (if not a leaf)
  52.  */
  53. class LetterGroup {
  54. private:
  55. string myLetters;
  56. int myTotalFrequency;
  57. PLetterGroup myLeft, myRight;
  58.  
  59. public:
  60. LetterGroup(
  61. string newLetters, int newTotalFrequency,
  62. PLetterGroup newLeft=NULL, PLetterGroup newRight=NULL):
  63. myLetters(newLetters), myTotalFrequency(newTotalFrequency),
  64. myLeft(newLeft), myRight(newRight) {}
  65.  
  66. /* In the Huffman algorithm, the priority of the letter group is HIGHER when the frequency is LOWER! */
  67. bool operator== (const LetterGroup& other) { return myTotalFrequency == other.myTotalFrequency; }
  68. bool operator!= (const LetterGroup& other) { return myTotalFrequency != other.myTotalFrequency; }
  69. bool operator> (const LetterGroup& other) { return myTotalFrequency > other.myTotalFrequency; }
  70. bool operator< (const LetterGroup& other) { return myTotalFrequency < other.myTotalFrequency; }
  71. bool operator>= (const LetterGroup& other) { return myTotalFrequency >= other.myTotalFrequency; }
  72. bool operator<= (const LetterGroup& other) { return myTotalFrequency <= other.myTotalFrequency; }
  73.  
  74. string letters() const { return myLetters; }
  75. int totalFrequency() const { return myTotalFrequency; }
  76. const PLetterGroup left() const {return myLeft;}
  77. const PLetterGroup right() const {return myRight;}
  78.  
  79. bool isLeaf() const { return myLeft==NULL; }
  80.  
  81. ~LetterGroup() {
  82. if (myLeft) {
  83. delete myLeft;
  84. }
  85. if (myRight) {
  86. delete myRight;
  87. }
  88. }
  89.  
  90. friend ostream& operator<<(ostream& out, const LetterGroup& object) {
  91. out << object.letters() << " " << object.totalFrequency() << endl;
  92. return out;
  93. }
  94. }; // class LetterGroup
  95.  
  96.  
  97.  
  98.  
  99. /**
  100.  * 2
  101.  */
  102.  
  103. class CompareLetterGroupPointer {
  104. public:
  105. bool operator()(PLetterGroup a, PLetterGroup b) {
  106. return a->totalFrequency() > b->totalFrequency();
  107. }
  108. };
  109.  
  110. typedef priority_queue<PLetterGroup, vector<PLetterGroup>, CompareLetterGroupPointer> LetterGroupPriorityQueue;
  111.  
  112.  
  113.  
  114.  
  115. /**
  116.  * 3 4
  117.  */
  118.  
  119. class HuffmanCode {
  120. private:
  121. map<char,string> myEncoding;
  122. map<string,char> myDecoding;
  123.  
  124. public:
  125. /// Return a binary string encoding the given text string
  126. string encode(string source) {
  127. string binary="";
  128. for (size_t i=0; i<source.length(); ++i) {
  129. binary += myEncoding[source[i]];
  130. }
  131. return binary;
  132. }
  133.  
  134. /// Return the original string
  135. string decode(string binary) {
  136. string source="";
  137. string currentLetterBinary="";
  138. for (size_t i=0; i<binary.length(); ++i) {
  139. currentLetterBinary += binary[i];
  140. if (myDecoding[currentLetterBinary]) {
  141. source += myDecoding[currentLetterBinary];
  142. currentLetterBinary = "";
  143. }
  144. }
  145. return source;
  146. }
  147.  
  148. /// Write the entire encoding table into an output stream
  149. friend ostream& operator<<(ostream& out, const HuffmanCode& object) {
  150. for (map<char,string>::const_iterator it=object.myEncoding.begin(); it!=object.myEncoding.end(); ++it) {
  151. out << " " << it->first << it->second;
  152. }
  153. out << endl;
  154. return out;
  155. }
  156.  
  157. /// Read the entire encoding table from an input stream
  158. friend istream& operator>>(istream& in, HuffmanCode& object) {
  159. char c = in.get();
  160. while (c==' ') { // read a new char encoding
  161. char source = in.get();
  162. string binary = "";
  163. for (;;) { // read 0's and 1's
  164. char digit = in.get();
  165. if (digit=='0' || digit=='1') {
  166. binary += digit;
  167. } else {
  168. in.unget();
  169. break;
  170. }
  171. }
  172. object.myEncoding[source] = binary;
  173. object.myDecoding[binary] = source;
  174. c = in.get();
  175. }
  176. // The first char that is not '=' should be the 'endl' written at operator<<
  177. return in;
  178. }
  179.  
  180.  
  181. /**
  182. * Use a Huffman tree to create an optimal encoding to the given text
  183. */
  184. void createOptimalEncodingFromText(string text) {
  185.  
  186. // Count the frequency of each letter in the text:
  187. map<char,int> letterFrequency;
  188. for (size_t i=0; i<text.length(); ++i) {
  189. letterFrequency[text[i]]++;
  190. }
  191.  
  192. // Create the initial priority queue:
  193. LetterGroupPriorityQueue myQueue;
  194. for (map<char,int>::iterator it=letterFrequency.begin(); it!=letterFrequency.end(); ++it) {
  195. string singleLetter = "";
  196. singleLetter += it->first;
  197. PLetterGroup p = new LetterGroup(singleLetter, it->second);
  198. myQueue.push(p);
  199. }
  200.  
  201. // Create the Huffman Tree:
  202. while (myQueue.size()>1) {
  203. PLetterGroup top1 = myQueue.top();
  204. myQueue.pop();
  205. PLetterGroup top2 = myQueue.top();
  206. myQueue.pop();
  207. PLetterGroup sum = new LetterGroup(
  208. top1->letters()+top2->letters(),
  209. top1->totalFrequency()+top2->totalFrequency(),
  210. top1, top2);
  211. myQueue.push(sum);
  212. }
  213.  
  214. // Create the binary Coding:
  215. PLetterGroup huffmanTreeRoot = myQueue.top();
  216. createEncodingFromHuffmanTreeNode("", huffmanTreeRoot);
  217. delete huffmanTreeRoot;
  218. } // createOptimalEncodingFromText
  219.  
  220.  
  221.  
  222. private:
  223.  
  224. // Recursively build the binary coding from the given inner node of the Huffman Tree:
  225. void createEncodingFromHuffmanTreeNode(string binary, PLetterGroup theHuffmanTreeNode) {
  226. if (theHuffmanTreeNode->isLeaf()) {
  227. string singleLetter = theHuffmanTreeNode->letters();
  228. char source = singleLetter[0];
  229. myEncoding[source] = binary;
  230. myDecoding[binary] = source;
  231. } else {
  232. createEncodingFromHuffmanTreeNode(binary+"0", theHuffmanTreeNode->left());
  233. createEncodingFromHuffmanTreeNode(binary+"1", theHuffmanTreeNode->right());
  234. }
  235. }
  236. }; // HuffmanCode
  237.  
  238.  
  239.  
  240.  
  241.  
  242. /**
  243.  * 5
  244.  */
  245.  
  246. void encodeInto(string text, ostream& out) {
  247. HuffmanCode encoder;
  248. encoder.createOptimalEncodingFromText(text);
  249. out << encoder << encoder.encode(text);
  250. }
  251.  
  252.  
  253. string decodeFrom(istream& in) {
  254. HuffmanCode encoder;
  255. string binary;
  256. in >> encoder >> binary;
  257. return encoder.decode(binary);
  258. }
  259.  
  260.  
  261. int main() {
  262. // Read the entire standard input into a string:
  263. string text = "", line = "";
  264. while (getline(cin, line)) {
  265. text += line + "\n";
  266. }
  267.  
  268. // Encode and decode:
  269. HuffmanCode theHuffmanCode;
  270. theHuffmanCode.createOptimalEncodingFromText(text);
  271. cout << "Original text:" << endl << text << endl;
  272. string encoded = theHuffmanCode.encode(text);
  273. cout << "Encoded text:" << endl << encoded << endl;
  274. string decoded = theHuffmanCode.decode(encoded);
  275. cout << "Re-decoded text:" << endl << decoded << endl;
  276.  
  277. // Encode and decode all, including the encoding itself:
  278. ostringstream outputFile;
  279. encodeInto(text, outputFile);
  280. string outputFileContents = outputFile.str();
  281. cout << "Encoded text with encoding:" << endl << outputFileContents << endl;
  282.  
  283. istringstream inputFile(outputFileContents);
  284. string decodedInputFileContents = decodeFrom(inputFile);
  285. cout << "Re-decoded text:" << endl << decodedInputFileContents << endl;
  286. }
  287.  
  288.  
Success #stdin #stdout 0s 2884KB
stdin
Hello World!!!!!!!!!!!!!!!
What's up?
stdout
Original text:
Hello World!!!!!!!!!!!!!!!
What's up?

Encoded text:
00101011110000000100001101100100100010000101011111111111111111111111111111110110110101001010110011001000101100111001010000011101011
Re-decoded text:
Hello World!!!!!!!!!!!!!!!
What's up?

Encoded text with encoding:
 
1011  0011 !11 '00100 ?01110 H00101 W0110 a10101 d01010 e01111 h10100 l000 o0100 p10000 r10001 s01011 t10011 u10010
00101011110000000100001101100100100010000101011111111111111111111111111111110110110101001010110011001000101100111001010000011101011
Re-decoded text:
Hello World!!!!!!!!!!!!!!!
What's up?