import java.util.Comparator ;
import java.util.HashMap ;
import java.util.Map ;
import java.util.PriorityQueue ;
// A Tree node
class Node
{
char ch;
int freq;
Node left = null , right = null ;
Node( char ch, int freq)
{
this .ch = ch;
this .freq = freq;
}
public Node( char ch, int freq, Node left, Node right) {
this .ch = ch;
this .freq = freq;
this .left = left;
this .right = right;
}
} ;
class Huffman
{
// traverse the Huffman Tree and store Huffman Codes
// in a map.
public static void encode
( Node root,
String str,
{
if ( root == null )
return ;
// found a leaf node
if ( root.left == null && root.right == null ) {
huffmanCode.put ( root.ch , str) ;
}
encode( root.left , str + "0" , huffmanCode) ;
encode( root.right , str + "1" , huffmanCode) ;
}
// traverse the Huffman Tree and decode the encoded string
public static int decode( Node root, int index, StringBuilder sb)
{
if ( root == null )
return index;
// found a leaf node
if ( root.left == null && root.right == null )
{
return index;
}
index++;
if ( sb.charAt ( index) == '0' )
index = decode( root.left , index, sb) ;
else
index = decode( root.right , index, sb) ;
return index;
}
// Builds Huffman Tree and huffmanCode and decode given input text
public static void buildHuffmanTree
( String text
) {
// count frequency of appearance of each character
// and store it in a map
Map
< Character , Integer
> freq
= new HashMap
<> ( ) ; for ( int i = 0 ; i < text.length ( ) ; i++ ) {
if ( ! freq.containsKey ( text.charAt ( i) ) ) {
freq.put ( text.charAt ( i) , 0 ) ;
}
freq.put ( text.charAt ( i) , freq.get ( text.charAt ( i) ) + 1 ) ;
}
// Create a priority queue to store live nodes of Huffman tree
// Notice that highest priority item has lowest frequency
PriorityQueue< Node> pq = new PriorityQueue<> ( new Comparator< Node> ( ) {
@Override
public int compare( Node l, Node r) {
return l.freq - r.freq ;
}
} ) ;
// Create a leaf node for each character and add it
// to the priority queue.
for ( Map .
Entry < Character , Integer
> entry
: freq.
entrySet ( ) ) { pq.add ( new Node( entry.getKey ( ) , entry.getValue ( ) ) ) ;
}
// do till there is more than one node in the queue
while ( pq.size ( ) != 1 )
{
// Remove the two nodes of highest priority
// (lowest frequency) from the queue
Node left = pq.poll ( ) ;
Node right = pq.poll ( ) ;
// Create a new internal node with these two nodes as children
// and with frequency equal to the sum of the two nodes
// frequencies. Add the new node to the priority queue.
int sum = left.freq + right.freq ;
pq.add ( new Node( '\0 ' , sum, left, right) ) ;
}
// root stores pointer to root of Huffman Tree
Node root = pq.peek ( ) ;
// traverse the Huffman tree and store the Huffman codes in a map
Map
< Character , String
> huffmanCode
= new HashMap
<> ( ) ; encode( root, "" , huffmanCode) ;
// print the Huffman codes
System .
out .
println ( "Huffman Codes are :\n " ) ; for ( Map .
Entry < Character , String
> entry
: huffmanCode.
entrySet ( ) ) { System .
out .
println ( entry.
getKey ( ) + " " + entry.
getValue ( ) ) ; }
System .
out .
println ( "\n Original string was :\n " + text
) ;
// print encoded string
StringBuilder sb = new StringBuilder( ) ;
for ( int i = 0 ; i < text.length ( ) ; i++ ) {
sb.append ( huffmanCode.get ( text.charAt ( i) ) ) ;
}
System .
out .
println ( "\n Encoded string is :\n " + sb
) ;
// traverse the Huffman Tree again and this time
// decode the encoded string
int index = - 1 ;
System .
out .
println ( "\n Decoded string is: \n " ) ; while ( index < sb.length ( ) - 2 ) {
index = decode( root, index, sb) ;
}
}
public static void main
( String [ ] args
) {
String text
= "Huffman coding is a data compression algorithm." ;
buildHuffmanTree( text) ;
}
}
aW1wb3J0IGphdmEudXRpbC5Db21wYXJhdG9yOwppbXBvcnQgamF2YS51dGlsLkhhc2hNYXA7CmltcG9ydCBqYXZhLnV0aWwuTWFwOwppbXBvcnQgamF2YS51dGlsLlByaW9yaXR5UXVldWU7CgovLyBBIFRyZWUgbm9kZQpjbGFzcyBOb2RlCnsKICAgIGNoYXIgY2g7CiAgICBpbnQgZnJlcTsKICAgIE5vZGUgbGVmdCA9IG51bGwsIHJpZ2h0ID0gbnVsbDsKCiAgICBOb2RlKGNoYXIgY2gsIGludCBmcmVxKQogICAgewogICAgICAgIHRoaXMuY2ggPSBjaDsKICAgICAgICB0aGlzLmZyZXEgPSBmcmVxOwogICAgfQoKICAgIHB1YmxpYyBOb2RlKGNoYXIgY2gsIGludCBmcmVxLCBOb2RlIGxlZnQsIE5vZGUgcmlnaHQpIHsKICAgICAgICB0aGlzLmNoID0gY2g7CiAgICAgICAgdGhpcy5mcmVxID0gZnJlcTsKICAgICAgICB0aGlzLmxlZnQgPSBsZWZ0OwogICAgICAgIHRoaXMucmlnaHQgPSByaWdodDsKICAgIH0KfTsKCmNsYXNzIEh1ZmZtYW4KewogICAgLy8gdHJhdmVyc2UgdGhlIEh1ZmZtYW4gVHJlZSBhbmQgc3RvcmUgSHVmZm1hbiBDb2RlcwogICAgLy8gaW4gYSBtYXAuCiAgICBwdWJsaWMgc3RhdGljIHZvaWQgZW5jb2RlKE5vZGUgcm9vdCwgU3RyaW5nIHN0ciwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgTWFwPENoYXJhY3RlciwgU3RyaW5nPiBodWZmbWFuQ29kZSkKICAgIHsKICAgICAgICBpZiAocm9vdCA9PSBudWxsKQogICAgICAgICAgICByZXR1cm47CgogICAgICAgIC8vIGZvdW5kIGEgbGVhZiBub2RlCiAgICAgICAgaWYgKHJvb3QubGVmdCA9PSBudWxsICYmIHJvb3QucmlnaHQgPT0gbnVsbCkgewogICAgICAgICAgICBodWZmbWFuQ29kZS5wdXQocm9vdC5jaCwgc3RyKTsKICAgICAgICB9CgoKICAgICAgICBlbmNvZGUocm9vdC5sZWZ0LCBzdHIgKyAiMCIsIGh1ZmZtYW5Db2RlKTsKICAgICAgICBlbmNvZGUocm9vdC5yaWdodCwgc3RyICsgIjEiLCBodWZmbWFuQ29kZSk7CiAgICB9CgogICAgLy8gdHJhdmVyc2UgdGhlIEh1ZmZtYW4gVHJlZSBhbmQgZGVjb2RlIHRoZSBlbmNvZGVkIHN0cmluZwogICAgcHVibGljIHN0YXRpYyBpbnQgZGVjb2RlKE5vZGUgcm9vdCwgaW50IGluZGV4LCBTdHJpbmdCdWlsZGVyIHNiKQogICAgewogICAgICAgIGlmIChyb290ID09IG51bGwpCiAgICAgICAgICAgIHJldHVybiBpbmRleDsKCiAgICAgICAgLy8gZm91bmQgYSBsZWFmIG5vZGUKICAgICAgICBpZiAocm9vdC5sZWZ0ID09IG51bGwgJiYgcm9vdC5yaWdodCA9PSBudWxsKQogICAgICAgIHsKICAgICAgICAgICAgU3lzdGVtLm91dC5wcmludChyb290LmNoKTsKICAgICAgICAgICAgcmV0dXJuIGluZGV4OwogICAgICAgIH0KCiAgICAgICAgaW5kZXgrKzsKCiAgICAgICAgaWYgKHNiLmNoYXJBdChpbmRleCkgPT0gJzAnKQogICAgICAgICAgICBpbmRleCA9IGRlY29kZShyb290LmxlZnQsIGluZGV4LCBzYik7CiAgICAgICAgZWxzZQogICAgICAgICAgICBpbmRleCA9IGRlY29kZShyb290LnJpZ2h0LCBpbmRleCwgc2IpOwoKICAgICAgICByZXR1cm4gaW5kZXg7CiAgICB9CgogICAgLy8gQnVpbGRzIEh1ZmZtYW4gVHJlZSBhbmQgaHVmZm1hbkNvZGUgYW5kIGRlY29kZSBnaXZlbiBpbnB1dCB0ZXh0CiAgICBwdWJsaWMgc3RhdGljIHZvaWQgYnVpbGRIdWZmbWFuVHJlZShTdHJpbmcgdGV4dCkKICAgIHsKICAgICAgICAvLyBjb3VudCBmcmVxdWVuY3kgb2YgYXBwZWFyYW5jZSBvZiBlYWNoIGNoYXJhY3RlcgogICAgICAgIC8vIGFuZCBzdG9yZSBpdCBpbiBhIG1hcAogICAgICAgIE1hcDxDaGFyYWN0ZXIsIEludGVnZXI+IGZyZXEgPSBuZXcgSGFzaE1hcDw+KCk7CiAgICAgICAgZm9yIChpbnQgaSA9IDAgOyBpIDwgdGV4dC5sZW5ndGgoKTsgaSsrKSB7CiAgICAgICAgICAgIGlmICghZnJlcS5jb250YWluc0tleSh0ZXh0LmNoYXJBdChpKSkpIHsKICAgICAgICAgICAgICAgIGZyZXEucHV0KHRleHQuY2hhckF0KGkpLCAwKTsKICAgICAgICAgICAgfQogICAgICAgICAgICBmcmVxLnB1dCh0ZXh0LmNoYXJBdChpKSwgZnJlcS5nZXQodGV4dC5jaGFyQXQoaSkpICsgMSk7CiAgICAgICAgfQoKICAgICAgICAvLyBDcmVhdGUgYSBwcmlvcml0eSBxdWV1ZSB0byBzdG9yZSBsaXZlIG5vZGVzIG9mIEh1ZmZtYW4gdHJlZQogICAgICAgIC8vIE5vdGljZSB0aGF0IGhpZ2hlc3QgcHJpb3JpdHkgaXRlbSBoYXMgbG93ZXN0IGZyZXF1ZW5jeQogICAgICAgIFByaW9yaXR5UXVldWU8Tm9kZT4gcHEgPSBuZXcgUHJpb3JpdHlRdWV1ZTw+KG5ldyBDb21wYXJhdG9yPE5vZGU+KCkgewogICAgICAgICAgICBAT3ZlcnJpZGUKICAgICAgICAgICAgcHVibGljIGludCBjb21wYXJlKE5vZGUgbCwgTm9kZSByKSB7CiAgICAgICAgICAgICAgICByZXR1cm4gbC5mcmVxIC0gci5mcmVxOwogICAgICAgICAgICB9CiAgICAgICAgfSk7CgogICAgICAgIC8vIENyZWF0ZSBhIGxlYWYgbm9kZSBmb3IgZWFjaCBjaGFyYWN0ZXLCoGFuZCBhZGQgaXQKICAgICAgICAvLyB0byB0aGUgcHJpb3JpdHkgcXVldWUuCiAgICAgICAgZm9yIChNYXAuRW50cnk8Q2hhcmFjdGVyLCBJbnRlZ2VyPiBlbnRyeSA6IGZyZXEuZW50cnlTZXQoKSkgewogICAgICAgICAgICBwcS5hZGQobmV3IE5vZGUoZW50cnkuZ2V0S2V5KCksIGVudHJ5LmdldFZhbHVlKCkpKTsKICAgICAgICB9CgogICAgICAgIC8vIGRvIHRpbGwgdGhlcmUgaXMgbW9yZSB0aGFuIG9uZSBub2RlIGluIHRoZSBxdWV1ZQogICAgICAgIHdoaWxlIChwcS5zaXplKCkgIT0gMSkKICAgICAgICB7CiAgICAgICAgICAgIC8vIFJlbW92ZSB0aGUgdHdvIG5vZGVzIG9mIGhpZ2hlc3QgcHJpb3JpdHkKICAgICAgICAgICAgLy8gKGxvd2VzdCBmcmVxdWVuY3kpIGZyb20gdGhlIHF1ZXVlCiAgICAgICAgICAgIE5vZGUgbGVmdCA9IHBxLnBvbGwoKTsKICAgICAgICAgICAgTm9kZSByaWdodCA9IHBxLnBvbGwoKTsKCiAgICAgICAgICAgIC8vIENyZWF0ZSBhIG5ldyBpbnRlcm5hbCBub2RlIHdpdGggdGhlc2UgdHdvIG5vZGVzIGFzIGNoaWxkcmVuCiAgICAgICAgICAgIC8vIGFuZCB3aXRoIGZyZXF1ZW5jeSBlcXVhbCB0byB0aGUgc3VtIG9mIHRoZSB0d28gbm9kZXMKICAgICAgICAgICAgLy8gZnJlcXVlbmNpZXMuIEFkZCB0aGUgbmV3IG5vZGUgdG8gdGhlIHByaW9yaXR5IHF1ZXVlLgogICAgICAgICAgICBpbnQgc3VtID0gbGVmdC5mcmVxICsgcmlnaHQuZnJlcTsKICAgICAgICAgICAgcHEuYWRkKG5ldyBOb2RlKCdcMCcsIHN1bSwgbGVmdCwgcmlnaHQpKTsKICAgICAgICB9CgogICAgICAgIC8vIHJvb3Qgc3RvcmVzIHBvaW50ZXIgdG8gcm9vdCBvZiBIdWZmbWFuIFRyZWUKICAgICAgICBOb2RlIHJvb3QgPSBwcS5wZWVrKCk7CgogICAgICAgIC8vIHRyYXZlcnNlIHRoZSBIdWZmbWFuIHRyZWUgYW5kIHN0b3JlIHRoZSBIdWZmbWFuIGNvZGVzIGluIGEgbWFwCiAgICAgICAgTWFwPENoYXJhY3RlciwgU3RyaW5nPiBodWZmbWFuQ29kZSA9IG5ldyBIYXNoTWFwPD4oKTsKICAgICAgICBlbmNvZGUocm9vdCwgIiIsIGh1ZmZtYW5Db2RlKTsKCiAgICAgICAgLy8gcHJpbnQgdGhlIEh1ZmZtYW4gY29kZXMKICAgICAgICBTeXN0ZW0ub3V0LnByaW50bG4oIkh1ZmZtYW4gQ29kZXMgYXJlIDpcbiIpOwogICAgICAgIGZvciAoTWFwLkVudHJ5PENoYXJhY3RlciwgU3RyaW5nPiBlbnRyeSA6IGh1ZmZtYW5Db2RlLmVudHJ5U2V0KCkpIHsKICAgICAgICAgICAgU3lzdGVtLm91dC5wcmludGxuKGVudHJ5LmdldEtleSgpICsgIiAiICsgZW50cnkuZ2V0VmFsdWUoKSk7CiAgICAgICAgfQoKICAgICAgICBTeXN0ZW0ub3V0LnByaW50bG4oIlxuT3JpZ2luYWwgc3RyaW5nIHdhcyA6XG4iICsgdGV4dCk7CgogICAgICAgIC8vIHByaW50IGVuY29kZWQgc3RyaW5nCiAgICAgICAgU3RyaW5nQnVpbGRlciBzYiA9IG5ldyBTdHJpbmdCdWlsZGVyKCk7CiAgICAgICAgZm9yIChpbnQgaSA9IDAgOyBpIDwgdGV4dC5sZW5ndGgoKTsgaSsrKSB7CiAgICAgICAgICAgIHNiLmFwcGVuZChodWZmbWFuQ29kZS5nZXQodGV4dC5jaGFyQXQoaSkpKTsKICAgICAgICB9CgogICAgICAgIFN5c3RlbS5vdXQucHJpbnRsbigiXG5FbmNvZGVkIHN0cmluZyBpcyA6XG4iICsgc2IpOwoKICAgICAgICAvLyB0cmF2ZXJzZSB0aGUgSHVmZm1hbiBUcmVlIGFnYWluIGFuZCB0aGlzIHRpbWUKICAgICAgICAvLyBkZWNvZGUgdGhlIGVuY29kZWQgc3RyaW5nCiAgICAgICAgaW50IGluZGV4ID0gLTE7CiAgICAgICAgU3lzdGVtLm91dC5wcmludGxuKCJcbkRlY29kZWQgc3RyaW5nIGlzOiBcbiIpOwogICAgICAgIHdoaWxlIChpbmRleCA8IHNiLmxlbmd0aCgpIC0gMikgewogICAgICAgICAgICBpbmRleCA9IGRlY29kZShyb290LCBpbmRleCwgc2IpOwogICAgICAgIH0KICAgIH0KCiAgICBwdWJsaWMgc3RhdGljIHZvaWQgbWFpbihTdHJpbmdbXSBhcmdzKQogICAgewogICAgICAgIFN0cmluZyB0ZXh0ID0gIkh1ZmZtYW4gY29kaW5nIGlzIGEgZGF0YSBjb21wcmVzc2lvbiBhbGdvcml0aG0uIjsKCiAgICAgICAgYnVpbGRIdWZmbWFuVHJlZSh0ZXh0KTsKICAgIH0KfQ==