/*
Given a list of words, write a program to find the longest word made of other words in the list.
find the longest compound word in the list that is constructed by concatenating the words
in the list. the compound word also needs to exist in the list of words.
the algorithm will be used for 300k words!
the words are lexicographical sorted in the file
EXAMPLE
Input: banana, cat, dog, dogwalker, nana, walk, walker
Output: dogwalker
*/
#include <iostream>
#include <string>
#include <vector>
#include <chrono>
#include <fstream>
#include <unordered_set>
using namespace std;
class LCW {
private:
vector<string> words;
unordered_set<string> words_ht;
private:
bool is_compound_word(string candidate, int &count) {
vector<string> prefixes;
generate_prefix(candidate, prefixes);
bool found = false;
for (auto prefix = prefixes.begin(); prefix != prefixes.end(); prefix++) {
count++;
auto suffix = candidate.substr(prefix->size(), candidate.size() - prefix->size());
if (suffix.size() == 0 && count > 1) {
return true;
} else {
auto found = is_compound_word(suffix, count);
if (found) {
return true;
}
}
}
count--;
return found;
}
void generate_prefix(string candidate, vector<string> &prefixes) {
for (auto i = 2; i <= candidate.size(); i++) {
auto prefix = candidate.substr(0, i);
auto it = words_ht.find(prefix);
if (it != words_ht.end()) {
prefixes.push_back(prefix);
}
}
}
public:
bool load_words(string filename) {
//string tmp;
//while (cin>>tmp) {
// t.insert(tmp);
// words.push_back(tmp);
//}
std::ifstream file;
file.open(filename.c_str(), std::ios::in);
if (!file.is_open()) {
return false;
}
std::string line;
while (getline(file, line)) {
words.push_back(line);
words_ht.insert(line);
}
cout << words.size() << endl;
return true;
}
string get_result() {
string longest_word;
int count;
for (auto word: words) {
count = 0;
if (word.size() < longest_word.size()) {
continue;
}
if (is_compound_word(word, count)) {
if (word.size() > longest_word.size()) {
longest_word = word;
}
}
}
return longest_word;
}
};
int main() {
// your code goes here
auto start = chrono::steady_clock::now();
LCW lcw;
bool file_found = lcw.load_words("words.txt");
if (!file_found) {
cerr << "file not found" << endl;
return 0;
}
string longest_comp_word = lcw.get_result();
auto end = chrono::steady_clock::now();
auto diff = end - start;
cout << chrono::duration <double, milli> (diff).count() << " ms" << endl;
cout << longest_comp_word << endl;
return 0;
}