fork download
  1. #include <stdlib.h>
  2. #include <string.h>
  3. #include <stdio.h>
  4.  
  5. //HASHITEM
  6.  
  7. /*Holds the word and the number of times it has appeared in the document */
  8. typedef struct {
  9. char *word;
  10. int occurences;
  11. } HashItem;
  12.  
  13. /* Creates and returns a new HashItem struct */
  14. HashItem *item_new(char *word){
  15. HashItem *item = (HashItem*)malloc(sizeof(HashItem));
  16. item->occurences = 1;
  17. int chars = strlen(word);
  18. item->word = (char*)malloc(sizeof(char) * (chars+1));
  19. strcpy(item->word, word);
  20. return item;
  21. }
  22.  
  23. /* Get the word containted within the passed HashItem */
  24. char *item_get_word(HashItem *item){
  25. return item->word;
  26. }
  27.  
  28. /* Get the occurences of the passed HashItem */
  29. int item_get_occurences(HashItem *item){
  30. return item->occurences;
  31. }
  32.  
  33. /* Increment the occurences of the passed HashItem, used when there is a verified collision */
  34. void item_increment_occurences(HashItem *item){
  35. item->occurences++;
  36. }
  37.  
  38.  
  39. //HASHTABLE
  40.  
  41. /* Hashtable struct holds all the HashItems and a null if there is no HashItem in the position */
  42. typedef struct {
  43. int size;
  44. HashItem **items;
  45. } Hash;
  46.  
  47. /* Create and return a new Hash struct */
  48. Hash *hash_new (int size) {
  49. Hash *hash = (Hash*)calloc(1, sizeof (Hash));
  50. hash->size = size;
  51. hash->items = (HashItem**)calloc(size, sizeof (HashItem*));
  52. return hash;
  53. }
  54.  
  55. /* Hash function used to assign a integer value to a word */
  56. int hash_func(char *word, int mod){
  57. int i = 0;
  58. int value = 0;
  59. while(word[i] != '\0'){
  60. value = ((word[i] * i) + value) % mod;
  61. i++;
  62. }
  63. return value;
  64. }
  65.  
  66. /* Get the index to place a given word within the passed hash table */
  67. int hash_index (Hash *hash, char *word) {
  68. int index = hash_func(word, hash->size);
  69. HashItem *item = (hash->items)[index];
  70. while (item){
  71. if(!strcmp(item->word, word)){
  72. printf("word %s is equal to %s \n", item->word, word );
  73. return index;
  74. }
  75. index = (index + 1) % hash->size;
  76. item = hash->items[index];
  77. }
  78. return index;
  79. }
  80.  
  81. /* Returns the Hashitem that corresponds to the word given, null is returned if it is not found */
  82. HashItem *hash_get (Hash *hash, char *word) {
  83. int index = hash_index(hash, word);
  84. return hash->items[index];
  85. }
  86.  
  87. /* Update the passed HashTable with a given word, adds the word if it is not in the table, increments the
  88. corresponding Hashitem's occurence value if it is. */
  89. HashItem *hash_update (Hash *hash, char *word) {
  90. HashItem *item = hash_get(hash, word);
  91. int index = hash_index(hash, word);
  92. if(item){
  93. item->occurences++;
  94. return NULL;
  95. }else{
  96. int index = hash_index(hash, word);
  97. item = item_new(word);
  98. hash->items[index] = item;
  99. return item;
  100. }
  101. }
  102.  
  103. //Dynamic Array
  104.  
  105. /*DynArray is a dynamically resizing array that is used to hold values and retain size data throughout*/
  106. typedef struct{
  107. int number_of_values;
  108. int capacity;
  109. HashItem **items;
  110. }DynArray;
  111.  
  112. /*Method to create a new dynamic array and return it */
  113. DynArray* array_new(int file_size){
  114. DynArray *array = (DynArray*)malloc(sizeof(DynArray));
  115. array->number_of_values = 0;
  116. array->capacity = file_size / 10;
  117. printf("capacity is %d " , array->capacity);
  118. array->items = (HashItem**)malloc(sizeof(HashItem*)* array->capacity);
  119. return array;
  120. }
  121.  
  122. /*Method used to increase the size of the array and reallocate memory*/
  123. void array_increase_if_full(DynArray *array){
  124. if (array->number_of_values >= array->capacity){
  125. array->capacity *= 1.25;
  126. array->items = (HashItem**)realloc(array->items, sizeof(HashItem)*array->capacity);
  127. }
  128. }
  129.  
  130. /*Method to add a string to the dynamic array specified */
  131. void array_append(DynArray *array, HashItem *item){
  132. array_increase_if_full(array);
  133. array->items[array->number_of_values] = item;
  134. //printf("item %s added \n at position %d ", array->items[array->number_of_values]->word, array->number_of_values);
  135. array->number_of_values++;
  136. }
  137.  
  138. /*Method used to get value at specified position for given array*/
  139. HashItem *array_get(DynArray *array, int position){
  140. if(position >= array->number_of_values || position <0){
  141. printf("Index specified out of range");
  142. exit(1);
  143. }
  144. //printf("item %s at position %d retrieved", array->items[position]->word, position);
  145. return array->items[position];
  146. }
  147.  
  148. HashItem **array_getarray(DynArray *array){
  149. HashItem **toreturn = (HashItem**)malloc(sizeof(HashItem*) * (array->number_of_values));
  150. int i;
  151. for(i = 0; i < array->number_of_values; i++){
  152. toreturn[i] = array_get(array, i);
  153. }
  154. return toreturn;
  155. }
  156.  
  157.  
  158. DynArray *readFile(char* file_path){
  159. FILE *file;
  160. file = fopen(file_path, "r");
  161. if(file == NULL){
  162. printf("Error opening file %s", file_path);
  163. exit(1);
  164. }
  165. //Work out size of file and create hash table and array accordingly
  166. fseek(file, 0, SEEK_END);
  167. int file_size = ftell(file);
  168. fseek(file, 0, SEEK_SET);
  169. //Create hash table assuming average word length is 3 (somewhat worst case) and
  170. //add extra spaces to reduce collisions. Ensures hash table will not overflow at cost
  171. //of some extra memory. More spaces also means less collisions so increases performance.
  172. Hash* hash = hash_new((file_size/3)*1.25);
  173. DynArray *array = array_new(file_size);
  174.  
  175. //String with memory allocation related to the longest word in english dictionary
  176. int c;
  177. int characters = 0;
  178. int n = 0;
  179. char *str = (char*)malloc(45);
  180. while((c = fgetc(file)) != EOF){
  181. //Check if character is a ' or an alphabetic character
  182. if ((c > 64 && c < 91) || (c > 96 && c < 123) || c == 39){
  183. if(c > 64 && c < 91){
  184. c += 32; //If character is upper case convert to lower
  185. }
  186. str[n] = c;
  187. n++;
  188. }else{
  189. str[n] = '\0';
  190. if(strcmp(str, "")){
  191. printf("String is : %s \n", str);
  192. HashItem *temp_item = hash_update(hash, str);
  193. //If update returns a HashItem it already exists therefore add HashItem to array
  194. if(temp_item != NULL){
  195. array_append(array, temp_item);
  196. printf("String %s added! \n ", str);
  197. //printf("Item : %s added \n", temp_item->word);
  198. }else{
  199. printf("String %s already exists, current count is %d \n", hash_get(hash, str)->word, hash_get(hash, str)->occurences);
  200. }
  201. n = 0;
  202. //allocate new memory for string
  203. str = (char*)malloc(45);
  204. }
  205. }
  206. }
  207. fclose(file);
  208. free(hash);
  209. printf("\n \n HERE: %s SIZE : %d \n",array_get(array, 1)->word, array->number_of_values);
  210. return array;
  211. }
  212.  
  213. int compare_func(const void *a, const void *b)
  214. {
  215. HashItem *aItem = (HashItem*)malloc (sizeof (HashItem));
  216. HashItem *bItem = (HashItem*)malloc (sizeof (HashItem));
  217.  
  218. memcpy (aItem, a, sizeof (HashItem));
  219. memcpy (bItem, b, sizeof (HashItem));
  220.  
  221. int aCount = aItem->occurences;
  222. int bCount = bItem->occurences;
  223.  
  224. free(aItem);
  225. free(bItem);
  226.  
  227. printf("comparing %d to %d \n", aCount, bCount);
  228. return (aCount-bCount);
  229. }
  230.  
  231. int main (int argc, char *argv[]) {
  232. DynArray *array = readFile(argv[1]);
  233. int n = array->number_of_values;
  234. int i;
  235. HashItem **standard_array = array_getarray(array);
  236. for(i = 0; i < n; i++){
  237. printf("%s : %d \n" , array_get(array, i)->word, array_get(array, i)->occurences);
  238. }
  239. printf("trying to sort");
  240. qsort(standard_array, n, sizeof(HashItem*), compare_func);
  241. printf("sorted");
  242. for(n; n > 0; n--){
  243. printf("Word : %s , occurences : %d \n", item_get_word(standard_array[n]),item_get_occurences(standard_array[n]));
  244. }
  245. return 0;
  246. }
Runtime error #stdin #stdout 0s 2384KB
stdin
Standard input is empty
stdout
Error opening file (null)