fork(3) download
  1. #include<iostream>
  2. #include<map>
  3. #include<vector>
  4. #include<string>
  5.  
  6. typedef std::string Word_t;
  7. typedef unsigned int WordPosition_t;
  8. typedef unsigned int IDdocument_t;
  9.  
  10. typedef std::pair<IDdocument_t, WordPosition_t> DocumentPosition_t;
  11. typedef std::vector<DocumentPosition_t> WordReferences_t;
  12.  
  13. WordReferences_t _intersect_two_words(const WordReferences_t& v1, const WordReferences_t& v2)
  14. {
  15. // all the locations where the words occur one after the other.
  16. WordReferences_t intersection;
  17.  
  18. auto firstIt = v1.begin();
  19. auto secondIt = v2.begin();
  20. while (firstIt != v1.end() && secondIt != v2.end())
  21. {
  22. if (firstIt->first < secondIt->first)
  23. {
  24. ++firstIt;
  25. continue;
  26. }
  27. // find the second word in the same document and AFTER the first word.
  28. if (secondIt->first < firstIt->first || secondIt->second < firstIt->second + 1)
  29. {
  30. ++secondIt;
  31. continue;
  32. }
  33. // first word wasn't just before the second, it's not a phrase.
  34. if (secondIt->second > firstIt->second + 1)
  35. {
  36. ++firstIt;
  37. continue;
  38. }
  39. // We found a phrase.
  40. intersection.emplace_back(*firstIt);
  41. ++firstIt;
  42. ++secondIt;
  43. }
  44.  
  45. return intersection;
  46. }
  47.  
  48. int main()
  49. {
  50. WordReferences_t v1, v2;
  51. v1.push_back(std::make_pair(10, 5));
  52. v1.push_back(std::make_pair(10, 25));
  53. v1.push_back(std::make_pair(11, 10));
  54. v1.push_back(std::make_pair(12, 1));
  55. v1.push_back(std::make_pair(12, 11));
  56. v1.push_back(std::make_pair(12, 21));
  57. v1.push_back(std::make_pair(12, 31));
  58. v1.push_back(std::make_pair(15, 11));
  59. v1.push_back(std::make_pair(100, 1));
  60. v1.push_back(std::make_pair(100, 11));
  61. v1.push_back(std::make_pair(100, 21));
  62. v1.push_back(std::make_pair(101, 11));
  63. v1.push_back(std::make_pair(102, 11));
  64. v1.push_back(std::make_pair(102, 13));
  65. v1.push_back(std::make_pair(102, 14));
  66. v1.push_back(std::make_pair(103, 11));
  67. v1.push_back(std::make_pair(103, 13));
  68.  
  69. v2.push_back(std::make_pair(10, 11));
  70. v2.push_back(std::make_pair(12, 10));
  71. v2.push_back(std::make_pair(12, 40));
  72. v2.push_back(std::make_pair(16, 11));
  73. v2.push_back(std::make_pair(100, 12)); // match
  74. v2.push_back(std::make_pair(101, 12)); // match
  75. v2.push_back(std::make_pair(101, 13));
  76. v2.push_back(std::make_pair(101, 14));
  77. v2.push_back(std::make_pair(102, 12)); //match
  78. v2.push_back(std::make_pair(103, 1));
  79. v2.push_back(std::make_pair(103, 10));
  80. v2.push_back(std::make_pair(103, 12)); // match
  81. v2.push_back(std::make_pair(103, 15));
  82.  
  83. auto intersection = _intersect_two_words(v1, v2);
  84. for (auto entry : intersection)
  85. {
  86. std::cout << entry.first << ", " << entry.second << "+" << (entry.second + 1) << std::endl;
  87. }
  88.  
  89. return 0;
  90. }
  91.  
Success #stdin #stdout 0s 2988KB
stdin
Standard input is empty
stdout
100, 11+12
101, 11+12
102, 11+12
103, 11+12