#include <map>
#include <string>
#include <vector>
#include <iostream>
#include <fstream>
#include <algorithm>
#include <string.h>
#include <assert.h>
#include <cstdlib>
#include <stdio.h>
using namespace std;
template<typename T>
void pop_front(std::vector<T>& vec)
{
assert(!vec.empty());
vec.erase(vec.begin());
}
struct cNode
{
unsigned char ch; // character
int freq; // freqbability
};
struct treeNode: public cNode
{
treeNode(){
ch = ' ';
freq = 0;
left = NULL;
right = NULL;
min = ' ';
}
char min;
char lcode;
char rcode;
treeNode *left; // left child
treeNode *right; // right child
};
static int nodeCharCompare(const void *elem1, const void *elem2)
{
const cNode a = *(cNode*)elem1;
const cNode b = *(cNode*)elem2;
if (a.ch > b.ch)
return 1;
else if(a.ch < b.ch)
return -1;
else
return 0;
}
static int nodeCompare(const void *elem1, const void *elem2)
{
const cNode a = *(cNode*)elem1;
const cNode b = *(cNode*)elem2;
if (a.freq > b.freq)
return 1;
else if(a.freq < b.freq)
return -1;
else
return 0;
}
bool cmp(treeNode *n1,treeNode* n2){
return n1->freq < n2->freq;
}
bool cmpByChar(treeNode *n1,treeNode* n2){
return n1->ch < n2->ch;
}
class HCode
{
private:
int tsize; // table size (number of chars)
cNode *ptable; // table of freqbabilities
map<char, string> codes; // codeword for each char
public:
void enCode(const char* inputFilepath, const char* outputFilepath)
{
map<char, int> freqs; // frequency for each char from input text
int i;
// Opening input file
//
ifstream inputFile;
inputFile.open(inputFilepath,std::ifstream::binary | std::ifstream::in);
if (!inputFile)
{
cerr<<"error: unable to open input file: " << inputFilepath <<endl;
}
char ch[1]; //char
unsigned total = 0;
while (true)
{
inputFile.read((char*)ch,sizeof(ch));
if(inputFile.eof())
break;
freqs[ch[0]]++;
total++;
}
tsize = (int)freqs.size();
// Building decreasing freqs table
//
ptable =new cNode[tsize];
//assert(ptable);
float ftot = float(total);
map<char, int>::iterator fi;
for (fi = freqs.begin(), i = 0; fi != freqs.end(); ++fi, ++i)
{
ptable[i].ch = (*fi).first;
ptable[i].freq = (*fi).second;
}
qsort(ptable, tsize, sizeof(cNode), nodeCompare);
// Encoding
//
EncHuffman();
// Opening output file
//
ofstream outputFile;
outputFile.open(outputFilepath, ofstream::out | ofstream::binary);
if (!outputFile)
{
cerr<<"error: unable to open output file: " << outputFilepath <<endl;
}
/*FILE *outputFile;
if((outputFile = fopen(outputFilepath,"wb"))==NULL){
cout<<"Write file failed.\n";
exit(1);
}*/
// Outputing ptable and codes
//
std::cout<<endl<<tsize<<endl;
outputFile<<tsize<<endl;
for (int i = 0; i < tsize; i++)
{
std::cout <<ptable[i].ch<<"\t"<<ptable[i].freq<<"\t"<<codes[ptable[i].ch].c_str()<<endl;
outputFile<<ptable[i].ch<<"\t"<<ptable[i].freq<<"\t"<<codes[ptable[i].ch].c_str()<<endl;
}
// Outputing encoded text
inputFile.clear();
inputFile.seekg(0,inputFile.beg);
std::cout<<endl;
outputFile<<endl;
while (true)
{
inputFile.read((char*)ch,sizeof(ch));
if (inputFile.eof())
break;
std::cout<<codes[ch[0]].c_str();
outputFile<<codes[ch[0]].c_str();
}
std::cout<<endl;
// Cleaning
//
codes.clear();
delete[] ptable;
// Closing files
//
// outputFile.close();
// outputFile.clear();
//fclose(outputFile);
//fclose(file);
outputFile.close();
outputFile.clear();
inputFile.close();
inputFile.clear();
}
void Decode(const char* inputFilename, const char* outputFilename)
{
ifstream inputFile;
inputFile.open(inputFilename,ios::in | ios::binary);
if (!inputFile)
{
cerr<<"error: unable to open input file: " << inputFilename <<endl;
}
// Loading codes
//
inputFile>>tsize;
char ch, code[128];
float p;
int i;
inputFile.get();
for (i = 0; i < tsize; i++)
{
inputFile.get(ch);
inputFile>>p>>code;
codes[ch] = code;
inputFile.get();
}
inputFile.get();
// Opening output file
//
ofstream outputFile;
outputFile.open(outputFilename,ios::out | ios::binary);
if (!outputFile)
{
cerr<<"error: unable to open output file: "<<outputFilename<<endl;
}
// Decoding and outputing to file
//
string accum = "";
map<char, string>::iterator ci;
while (true)
{
inputFile.get(ch);
if(inputFile.eof())
break;
accum += ch;
for (ci = codes.begin(); ci != codes.end(); ++ci)
{
if (!strcmp((*ci).second.c_str(), accum.c_str()))
{
accum = "";
std::cout<<(*ci).first;
outputFile<<(*ci).first;
}
}
}
std::cout<<endl;
// Cleaning
//
outputFile.close();
outputFile.clear();
inputFile.close();
inputFile.clear();
}
private:
treeNode* leftmost(vector<treeNode*> ptr,int i){
treeNode *temp = ptr[i];
while(temp->left!=NULL)temp = temp->left;
return temp;
}
void EncHuffman()
{
// Creating leaves (initial top-nodes)
//
treeNode *n;
vector<treeNode*> tops; // top-nodes
int i, numtop = tsize;
for (i = 0; i < numtop; i++)
{
n = new treeNode;
//assert(n);
n->ch = ptable[i].ch;
n->freq = ptable[i].freq;
n->left = NULL;
n->right = NULL;
tops.push_back(n);
}
sort(tops.begin(),tops.end(),cmpByChar);
sort(tops.begin(),tops.end(),cmp);
for(i=0;i<tops.size();i++){
cout<<"chars:"<<tops[i]->ch<<" times:"<<tops[i]->freq<<endl;
}
// Building binary tree.
// Combining last two nodes, replacing them by new node
// without invalidating sort
//
int count=1;
while (numtop > 1)
{
n = new treeNode;
//assert(n);
n->freq = tops[0]->freq + tops[1]->freq;
if(tops[0]->ch!=' '&& tops[1]->ch!=' '){
n->left = tops[0]->ch < tops[1]->ch ? tops[0]: tops[1];
n->right = tops[0]->ch < tops[1]->ch ? tops[1]: tops[0];
n->min = tops[0]->ch < tops[1]->ch? tops[0]->ch : tops[1]->ch;
tops[0]->ch < tops[1]->ch ? cout<<"YES" : cout<<"NO";
cout<<endl;
}
else{
cout<<"HI"<<endl;
if(tops[0] ->min !=' ' && tops[1]->min ==' '){
n->left = tops[0]->min > tops[1]->ch ? tops[1] : tops[0];
n->right = tops[0]->min > tops[1]->ch ? tops[0] : tops[1];
}
else if(tops[0] ->min !=' ' && tops[1]->min !=' '){
n->left = tops[0]->min > tops[1]->min ? tops[1] : tops[0];
n->right = tops[0]->min > tops[1]->min ? tops[0] : tops[1];
}
else if(tops[0]->ch == ' '&& tops[1]->ch ==' '){
treeNode *left_leftmost = leftmost(tops,0);
treeNode *right_leftmost = leftmost(tops,1);
n->left = left_leftmost->ch < right_leftmost->ch ? tops[0] : tops[1];
n->right = left_leftmost->ch < right_leftmost->ch ? tops[1] : tops[0];
cout<<"XCC"<<endl;
}
else if(tops[0]->ch == ' '){
cout<<"GGG"<<endl;
}
else if(tops[1]->ch == ' '){
treeNode *left_leftmost = leftmost(tops,0);
treeNode *right_leftmost = leftmost(tops,1);
n->left = left_leftmost->ch < right_leftmost->ch ? tops[0] : tops[1];
n->right = left_leftmost->ch < right_leftmost->ch ? tops[1] : tops[0];
}
}
pop_front(tops);
pop_front(tops);
tops.push_back(n);
sort(tops.begin(),tops.end(),cmpByChar);
sort(tops.begin(),tops.end(),cmp);
cout<<count++<<endl;
for(i=0;i<tops.size();i++){
cout<<"chars:"<<tops[i]->ch<<" times:"<<tops[i]->freq<<endl;
}
cout<<"\n\n\n";
numtop--;
}
// Building codes
//
cout<<"generate..."<<endl;
treeNode *root = tops[0];
GenerateCode(root);
// Cleaning
//
DestroyNode(root);
tops.clear();
}
void GenerateCode( treeNode *node ) // for outside call: node is root
{
static string sequence = "";
if( node->left )
{
sequence += "0";
GenerateCode( node->left );
}
if( node->right )
{
sequence += "1";
GenerateCode( node->right );
}
if( !node->left && !node->right )
codes[node->ch] = sequence;
int l = (int)sequence.length();
if( l > 1 )
sequence = sequence.substr( 0, l-1 );
else
sequence = "";
}
void DestroyNode( treeNode *node) // for outside call: node is root
{
if (node->left)
{
DestroyNode(node->left);
delete node->left;
node->left = NULL;
}
if (node->right)
{
DestroyNode(node->right);
delete node->right;
node->right = NULL;
}
}
};
int show_usage()
{
cout<<"Usage:"<<endl;
cout<<" huffman [OPTIONS] input [output]"<<endl;
cout<<" The defaul action is to encode the input file."<<endl;
cout<<" -d\tDecode file."<<endl;
cout<<endl;
cout<<"Examples:"<<endl;
cout<<" huffman input.txt"<<endl;
cout<<" huffman input.txt encoded.txt"<<endl;
cout<<" huffman -d encoded.txt"<<endl;
exit(0);
}
int main(int argc, char **argv)
{
int i = 1;
bool decFlag = false; // decode flag
char inputFilename[128];
char outputFilename[128];
if (argc < 2)
{
show_usage();
}
if (strcmp(argv[i],"-d") == 0)
{
decFlag = true;
++i;
if (i == argc)
{
show_usage();
}
}
strcpy(inputFilename, argv[i]);
++i;
if (i < argc)
{
strcpy(outputFilename, argv[i]);
}
else
{
if (decFlag) strcpy(outputFilename, "decoded.txt");
else strcpy(outputFilename, "encoded.txt");
}
// Calling encoding or decoding subroutine
//
HCode *pCoder;
pCoder = new HCode;
if (!pCoder)
{
cerr<<"error: unable to create a pointer to HCode"<<endl;
}
if (!decFlag)
{
pCoder->enCode(inputFilename, outputFilename);
}
else
{
pCoder->Decode(inputFilename, outputFilename);
}
delete pCoder;
return 0;
}