#include<iostream>
#include<fstream>
#include<vector>
#include<map>
#include<cmath>
#include<algorithm>
using namespace std;
void tf_idf_compute(map<string,int> &frequency,vector<vector<string> > &documents,vector<string> &terms)//function for creating a dictionery
{
fstream file("regs.txt");//opens the file named regs.
if(!file)// reading file is not found
{
cout<<"file not found"<<endl;
}
else
{
while(!file.eof())//reading file is not found doesnot mark end of fuction
{
string hb;//variable of type string for holding a term
vector<string> words;//container for storing terms before storing the terms into a memory
while(file>>hb && hb!="#")//condition which direct raeding of documents with specified delimiter as the sign of an end of a document
{
words.push_back(hb);//put terms into a temporary holding vector
terms.push_back(hb);//keep terms in memory
frequency[hb]++;
sort(terms.begin(),terms.end());//sorting the terms
terms.erase(unique(terms.begin(),terms.end()),terms.end());//remove term repeatation
}
if(!words.empty())//if the vector is not empty
{
documents.push_back(words);//push the words in temporary vector into the vector of vectors in order to be stored in a memory
}
}
int a;
vector<int> ting;
for(int j=0;j<terms.size();j++)
{
a=0;
for(int i=0;i<documents.size();i++)
{
//finds if a term occurs or doesn't occcur in the document
vector<string>::const_iterator p=find(documents[i].begin(),documents[i].end(),terms[j]);
if(p!=documents[i].end())
{
a=a+1;
}
}
ting.push_back(a);
//cout<<cnt<<endl;
}
//operation for calculating tf_idf
map<string,int>::iterator iter;
float tf_idf;
float cosine;
for(iter=frequency.begin();iter!=frequency.end();iter++)
{
tf_idf=(1+log10(iter->second))*log10(documents.size()/a);//formular to calculate tf_idf & cosine similarities
cosine=(tf_idf*iter->second)/(abs(tf_idf)*abs(iter->second));
cout<<iter->first<<" "<<iter->second<<" "<<tf_idf<<" "<<cosine<<endl;
}
cin.get();//holds the screen
}
}
int main()
{
cout<<"Terms\t\t""Term frequency\t\t\t""TF_IDF\t\t""CosineSimilarity";
cout<<"\n";
vector<vector<string> > dc;// dictionary
vector<string> tokens;//container for documents which represent refered vector in prototype
map<string,int>S;//A vector "s"that stores int type values.
tf_idf_compute(S,dc,tokens);
}