fork(1) download
  1. #include<iostream>
  2. #include<fstream>
  3. #include<vector>
  4. #include<map>
  5. #include<cmath>
  6. #include<algorithm>
  7. using namespace std;
  8.  
  9. void tf_idf_compute(map<string,int> &frequency,vector<vector<string> > &documents,vector<string> &terms)//function for creating a dictionery
  10. {
  11. fstream file("regs.txt");//opens the file named regs.
  12. if(!file)// reading file is not found
  13. {
  14. cout<<"file not found"<<endl;
  15. }
  16. else
  17. {
  18. while(!file.eof())//reading file is not found doesnot mark end of fuction
  19. {
  20. string hb;//variable of type string for holding a term
  21. vector<string> words;//container for storing terms before storing the terms into a memory
  22. while(file>>hb && hb!="#")//condition which direct raeding of documents with specified delimiter as the sign of an end of a document
  23. {
  24. words.push_back(hb);//put terms into a temporary holding vector
  25. terms.push_back(hb);//keep terms in memory
  26. frequency[hb]++;
  27. sort(terms.begin(),terms.end());//sorting the terms
  28. terms.erase(unique(terms.begin(),terms.end()),terms.end());//remove term repeatation
  29. }
  30. if(!words.empty())//if the vector is not empty
  31. {
  32. documents.push_back(words);//push the words in temporary vector into the vector of vectors in order to be stored in a memory
  33. }
  34.  
  35. }
  36. int a;
  37. vector<int> ting;
  38. for(int j=0;j<terms.size();j++)
  39. {
  40. a=0;
  41.  
  42. for(int i=0;i<documents.size();i++)
  43. {
  44. //finds if a term occurs or doesn't occcur in the document
  45. vector<string>::const_iterator p=find(documents[i].begin(),documents[i].end(),terms[j]);
  46. if(p!=documents[i].end())
  47. {
  48. a=a+1;
  49. }
  50. }
  51. ting.push_back(a);
  52. //cout<<cnt<<endl;
  53. }
  54.  
  55.  
  56. //operation for calculating tf_idf
  57. map<string,int>::iterator iter;
  58. float tf_idf;
  59. float cosine;
  60. for(iter=frequency.begin();iter!=frequency.end();iter++)
  61. {
  62. tf_idf=(1+log10(iter->second))*log10(documents.size()/a);//formular to calculate tf_idf & cosine similarities
  63. cosine=(tf_idf*iter->second)/(abs(tf_idf)*abs(iter->second));
  64. cout<<iter->first<<" "<<iter->second<<" "<<tf_idf<<" "<<cosine<<endl;
  65. }
  66.  
  67. cin.get();//holds the screen
  68. }
  69. }
  70. int main()
  71. {
  72. cout<<"Terms\t\t""Term frequency\t\t\t""TF_IDF\t\t""CosineSimilarity";
  73. cout<<"\n";
  74. vector<vector<string> > dc;// dictionary
  75. vector<string> tokens;//container for documents which represent refered vector in prototype
  76. map<string,int>S;//A vector "s"that stores int type values.
  77.  
  78. tf_idf_compute(S,dc,tokens);
  79. }
  80.  
  81.  
  82.  
Success #stdin #stdout 0s 2832KB
stdin
Standard input is empty
stdout
Terms		Term frequency			TF_IDF		CosineSimilarity
file not found