fork download
  1. #include <stdio.h>
  2. #include <string.h>
  3. #include <stdlib.h>
  4.  
  5. typedef struct DOCUMENT {
  6. int id;
  7. char* body;
  8. struct DOCUMENT* next;
  9. } DOCUMENT;
  10.  
  11. typedef struct ArrayList {
  12. int count;
  13. int size;
  14. int* data;
  15. } ArrayList;
  16.  
  17. typedef struct WordList {
  18. char* word;
  19. ArrayList* list;
  20. struct WordList* next;
  21. } WordList;
  22.  
  23. typedef struct Hashtable {
  24. int size;
  25. int count;
  26. WordList** table;
  27. } Hashtable;
  28.  
  29. void String_destroy(char* s) {
  30. free(s);
  31. }
  32.  
  33. char* String_new(char* cs) {
  34. int len = strlen(cs) + 1;
  35. char* s = malloc(sizeof(char) * len);
  36. if (s != NULL) {
  37. memcpy(s, cs, len);
  38. }
  39. return s;
  40. }
  41.  
  42. void DOCUMENT_destroy(DOCUMENT* s) {
  43. if (s->body != NULL) {
  44. String_destroy(s->body);
  45. }
  46. if (s->next != NULL) {
  47. DOCUMENT_destroy(s->next);
  48. }
  49. free(s);
  50. }
  51.  
  52. DOCUMENT* DOCUMENT_new(int id, char* body) {
  53. DOCUMENT* s = malloc(sizeof(DOCUMENT));
  54. if (s != NULL) {
  55. s->body = NULL;
  56. s->next = NULL;
  57.  
  58. s->body = String_new(body);
  59. if (s->body != NULL) {
  60. s->id = id;
  61. return s;
  62. }
  63. DOCUMENT_destroy(s);
  64. }
  65. return NULL;
  66. }
  67.  
  68. int DOCUMENT_add(DOCUMENT** s, int id, char* body) {
  69. DOCUMENT* n = DOCUMENT_new(id, body);
  70. DOCUMENT* a = *s;
  71. if (n != NULL) {
  72. if (a == NULL) {
  73. *s = n;
  74. } else {
  75. while (a->next != NULL) {
  76. a = a->next;
  77. }
  78. a->next = n;
  79. }
  80. return 1;
  81. }
  82. return 0;
  83. }
  84.  
  85. char* DOCUMENT_find(DOCUMENT* s, int id) {
  86. while (s != NULL) {
  87. if (s->id == id) {
  88. return s->body;
  89. }
  90. s = s->next;
  91. }
  92. return NULL;
  93. }
  94.  
  95. void ArrayList_destroy(ArrayList* s) {
  96. free(s->data);
  97. free(s);
  98. }
  99.  
  100. ArrayList* ArrayList_new(void) {
  101. ArrayList* s = malloc(sizeof(ArrayList));
  102. s = malloc(sizeof(ArrayList));
  103. if (s != NULL) {
  104. s->data = NULL;
  105.  
  106. s->count = 0;
  107. s->size = 4;
  108. s->data = malloc(sizeof(int) * s->size);
  109. if (s->data != NULL) {
  110. return s;
  111. }
  112. }
  113. ArrayList_destroy(s);
  114. return NULL;
  115. }
  116.  
  117. int ArrayList_add(ArrayList* s, int value) {
  118. int* data;
  119. if (s->count >= s->size) {
  120. s->size = s->size * 2;
  121. data = realloc(s->data, sizeof(int) * s->size);
  122. if (data == NULL) {
  123. return 0;
  124. }
  125. s->data = data;
  126. }
  127. s->data[s->count] = value;
  128. s->count = s->count + 1;
  129. return 1;
  130. }
  131.  
  132. void ArrayList_print(ArrayList* s) {
  133. int i;
  134. for (i = 0; i < s->count; i++) {
  135. printf("%d\n", s->data[i]);
  136. }
  137. }
  138.  
  139. void ArrayList_clear(ArrayList* s) {
  140. s->count = 0;
  141. }
  142.  
  143. void ArrayList_intersect(ArrayList* s, ArrayList* m) {
  144. int i = 0;
  145. int j = 0;
  146. int n = 0;
  147. while (i < s->count) {
  148. if (j >= m->count) {
  149. break;
  150. }
  151. if (s->data[i] < m->data[j]) {
  152. i++;
  153. } else if (s->data[i] > m->data[j]) {
  154. j++;
  155. } else {
  156. s->data[n] = s->data[i];
  157. i++;
  158. j++;
  159. n++;
  160. }
  161. }
  162. s->count = n;
  163. }
  164.  
  165. void ArrayList_distinct(ArrayList* s) {
  166. int i;
  167. int j;
  168. int c = s->count;
  169. int* data = s->data;
  170. for (i = 1; i < c; i++) {
  171. if (data[i - 1] == data[i]) {
  172. for (j = i + 1; j < c; j++) {
  173. if (data[j - 1] != data[j]) {
  174. data[i] = data[j];
  175. i = i + 1;
  176. }
  177. }
  178. s->count = i;
  179. return;
  180. }
  181. }
  182. }
  183.  
  184. void WordList_destroy(WordList* s) {
  185. if (s->word != NULL) {
  186. String_destroy(s->word);
  187. }
  188. if (s->list != NULL) {
  189. ArrayList_destroy(s->list);
  190. }
  191. if (s->next != NULL) {
  192. WordList_destroy(s->next);
  193. }
  194. free(s);
  195. }
  196.  
  197. WordList* WordList_new(char* word, int id, WordList* next) {
  198. WordList* s = malloc(sizeof(WordList));
  199. if (s != NULL) {
  200. s->word = NULL;
  201. s->list = NULL;
  202. s->next = next;
  203.  
  204. s->word = String_new(word);
  205. if (s->word != NULL) {
  206. s->list = ArrayList_new();
  207. if (s->list != NULL) {
  208. if (ArrayList_add(s->list, id)) {
  209. return s;
  210. }
  211. }
  212. }
  213. }
  214. WordList_destroy(s);
  215. return NULL;
  216. }
  217.  
  218. void Hashtable_destroy(Hashtable* s) {
  219. free(s->table);
  220. free(s);
  221. }
  222.  
  223. Hashtable* Hashtable_new(void) {
  224. int i;
  225. Hashtable* s = malloc(sizeof(Hashtable));
  226. if (s != NULL) {
  227. s->table = NULL;
  228.  
  229. s->count = 0;
  230. s->size = 65521;
  231. s->table = malloc(sizeof(WordList*) * s->size);
  232. if (s->table != NULL) {
  233. for (i = 0; i < s->size; i++) {
  234. s->table[i] = NULL;
  235. }
  236. return s;
  237. }
  238. }
  239. Hashtable_destroy(s);
  240. return NULL;
  241. }
  242.  
  243. unsigned int Hashtable_hash(Hashtable* s, char* c)
  244. {
  245. unsigned int hashval;
  246. for (hashval = 0; *c != '\0'; c++)
  247. hashval = *c + (31 * hashval);
  248. return hashval % s->size;
  249. }
  250.  
  251. ArrayList* Hashtable_find(Hashtable* s, char* word) {
  252. unsigned int hash = Hashtable_hash(s, word);
  253. WordList* a = s->table[hash];
  254. while (a != NULL) {
  255. if (strcmp(a->word, word) == 0) {
  256. return a->list;
  257. }
  258. a = a->next;
  259. }
  260. return NULL;
  261. }
  262.  
  263. int Hashtable_insert(Hashtable* s, char* word, int id) {
  264. unsigned int hash = Hashtable_hash(s, word);
  265. WordList* n = WordList_new(word, id, s->table[hash]);
  266. if (n != NULL) {
  267. s->table[hash] = n;
  268. return 1;
  269. }
  270. return 0;
  271. }
  272.  
  273. int Hashtable_add(Hashtable* s, char* word, int id) {
  274. ArrayList* a = Hashtable_find(s, word);
  275. if (a != NULL) {
  276. if (ArrayList_add(a, id)) {
  277. return 1;
  278. }
  279. } else {
  280. if (Hashtable_insert(s, word, id)) {
  281. s->count = s->count + 1;
  282. return 1;
  283. }
  284. }
  285. return 0;
  286. }
  287.  
  288. DOCUMENT* use_file(FILE* f) {
  289. char row[65536];
  290. DOCUMENT* d = NULL;
  291. int i = 0;
  292. while (fgets(row, 65536, f) != NULL) {
  293. row[strlen(row) - 1] = '\0';
  294. if (!DOCUMENT_add(&d, i, row)) {
  295. if (d != NULL) {
  296. DOCUMENT_destroy(d);
  297. }
  298. return NULL;
  299. }
  300. i = i + 1;
  301. }
  302. return d;
  303. }
  304.  
  305. DOCUMENT* create_document(char* filename) {
  306. DOCUMENT* d = NULL;
  307. FILE* f = fopen(filename, "r");
  308. if (f != NULL) {
  309. d = use_file(f);
  310. fclose(f);
  311. }
  312. return d;
  313. }
  314.  
  315. int add_words(int id, char* words, Hashtable* hash) {
  316. char ws[65536];
  317. char* w;
  318. strcpy(ws, words);
  319. w = strtok(ws, " \t\n");
  320. while (w != NULL) {
  321. if (!Hashtable_add(hash, w, id)) {
  322. return 0;
  323. }
  324. w = strtok(NULL, " \t\n");
  325. }
  326. return 1;
  327. }
  328.  
  329. Hashtable* create_hashtable(DOCUMENT* docs) {
  330. Hashtable* h = Hashtable_new();
  331. if (h != NULL) {
  332. while (docs != NULL) {
  333. if (!add_words(docs->id, docs->body, h)) {
  334. Hashtable_destroy(h);
  335. return NULL;
  336. }
  337. docs = docs->next;
  338. }
  339. }
  340. return h;
  341. }
  342.  
  343. ArrayList* execute_query(Hashtable* h) {
  344. char row[65536];
  345. char* w;
  346. int i;
  347. ArrayList* a = ArrayList_new();
  348. ArrayList* b;
  349. if (a != NULL) {
  350. printf("Enter Query:");
  351. fgets(row, 65536, stdin);
  352. w = strtok(row, " \t\n");
  353. if (w != NULL) {
  354. b = Hashtable_find(h, w);
  355. if (b != NULL) {
  356. for (i = 0; i < b->count; i++) {
  357. if (!ArrayList_add(a, b->data[i])) {
  358. ArrayList_destroy(a);
  359. return NULL;
  360. }
  361. }
  362. while (1) {
  363. w = strtok(NULL, " \t\n");
  364. if (w == NULL) {
  365. break;
  366. }
  367. b = Hashtable_find(h, w);
  368. if (b == NULL) {
  369. ArrayList_clear(a);
  370. break;
  371. }
  372. ArrayList_intersect(a, b);
  373. }
  374. ArrayList_distinct(a);
  375. }
  376. }
  377. }
  378. return a;
  379. }
  380.  
  381. int main(int argc, char* argv[]) {
  382. int i;
  383. ArrayList* list = NULL;
  384. Hashtable* hash = NULL;
  385. DOCUMENT* doc = NULL;
  386. char* filename;
  387.  
  388. if (argc < 3) {
  389. return 1;
  390. }
  391. if (strcmp(argv[1], "-f") != 0) {
  392. return 1;
  393. }
  394.  
  395. filename = argv[2];
  396.  
  397. doc = create_document(filename);
  398. if (doc != NULL) {
  399. hash = create_hashtable(doc);
  400. if (hash != NULL) {
  401. printf("Total number of words in the index = %d\n", hash->count);
  402. printf("john = %d\n", Hashtable_find(hash, "john")->count);
  403. printf("and = %d\n", Hashtable_find(hash, "and")->count);
  404. printf("said = %d\n", Hashtable_find(hash, "said")->count);
  405. list = execute_query(hash);
  406. if (list != NULL) {
  407. if (list->count < 1) {
  408. printf("words not found\n");
  409. } else {
  410. for (i = 0; i < list->count; i++) {
  411. printf("%d\n", list->data[i]);
  412. printf("%s\n", DOCUMENT_find(doc, list->data[i]));
  413. printf("\n");
  414. }
  415. }
  416. }
  417. }
  418. }
  419.  
  420. if (list != NULL) {
  421. ArrayList_destroy(list);
  422. }
  423. if (hash != NULL) {
  424. Hashtable_destroy(hash);
  425. }
  426. if (doc != NULL) {
  427. DOCUMENT_destroy(doc);
  428. }
  429.  
  430. return 0;
  431. }
  432.  
Not running #stdin #stdout 0s 0KB
stdin
Standard input is empty
stdout
Standard output is empty