#include <stdlib.h>
#include <string.h>
#include <stdio.h>
//HASHITEM
/*Holds the word and the number of times it has appeared in the document */
typedef struct {
char *word;
int occurences;
} HashItem;
/* Creates and returns a new HashItem struct */
HashItem *item_new(char *word){
HashItem
*item
= (HashItem
*)malloc(sizeof(HashItem
)); item->occurences = 1;
item
->word
= (char*)malloc(sizeof(char) * (chars
+1)); return item;
}
/* Get the word containted within the passed HashItem */
char *item_get_word(HashItem *item){
return item->word;
}
/* Get the occurences of the passed HashItem */
int item_get_occurences(HashItem *item){
return item->occurences;
}
/* Increment the occurences of the passed HashItem, used when there is a verified collision */
void item_increment_occurences(HashItem *item){
item->occurences++;
}
//HASHTABLE
/* Hashtable struct holds all the HashItems and a null if there is no HashItem in the position */
typedef struct {
int size;
HashItem **items;
} Hash;
/* Create and return a new Hash struct */
Hash *hash_new (int size) {
Hash
*hash
= (Hash
*)calloc(1, sizeof (Hash
)); hash->size = size;
hash
->items
= (HashItem
**)calloc(size
, sizeof (HashItem
*)); return hash;
}
/* Hash function used to assign a integer value to a word */
int hash_func(char *word, int mod){
int i = 0;
int value = 0;
while(word[i] != '\0'){
value = ((word[i] * i) + value) % mod;
i++;
}
return value;
}
/* Get the index to place a given word within the passed hash table */
int hash_index (Hash *hash, char *word) {
int index = hash_func(word, hash->size);
HashItem *item = (hash->items)[index];
while (item){
if(!strcmp(item
->word
, word
)){ printf("word %s is equal to %s \n", item
->word
, word
); return index;
}
index = (index + 1) % hash->size;
item = hash->items[index];
}
return index;
}
/* Returns the Hashitem that corresponds to the word given, null is returned if it is not found */
HashItem *hash_get (Hash *hash, char *word) {
int index = hash_index(hash, word);
return hash->items[index];
}
/* Update the passed HashTable with a given word, adds the word if it is not in the table, increments the
corresponding Hashitem's occurence value if it is. */
HashItem *hash_update (Hash *hash, char *word) {
HashItem *item = hash_get(hash, word);
int index = hash_index(hash, word);
if(item){
item->occurences++;
return NULL;
}else{
int index = hash_index(hash, word);
item = item_new(word);
hash->items[index] = item;
return item;
}
}
//Dynamic Array
/*DynArray is a dynamically resizing array that is used to hold values and retain size data throughout*/
typedef struct{
int number_of_values;
int capacity;
HashItem **items;
}DynArray;
/*Method to create a new dynamic array and return it */
DynArray* array_new(int file_size){
DynArray
*array
= (DynArray
*)malloc(sizeof(DynArray
)); array->number_of_values = 0;
array->capacity = file_size / 10;
printf("capacity is %d " , array
->capacity
); array
->items
= (HashItem
**)malloc(sizeof(HashItem
*)* array
->capacity
); return array;
}
/*Method used to increase the size of the array and reallocate memory*/
void array_increase_if_full(DynArray *array){
if (array->number_of_values >= array->capacity){
array->capacity *= 1.25;
array
->items
= (HashItem
**)realloc(array
->items
, sizeof(HashItem
)*array
->capacity
); }
}
/*Method to add a string to the dynamic array specified */
void array_append(DynArray *array, HashItem *item){
array_increase_if_full(array);
array->items[array->number_of_values] = item;
//printf("item %s added \n at position %d ", array->items[array->number_of_values]->word, array->number_of_values);
array->number_of_values++;
}
/*Method used to get value at specified position for given array*/
HashItem *array_get(DynArray *array, int position){
if(position >= array->number_of_values || position <0){
printf("Index specified out of range"); }
//printf("item %s at position %d retrieved", array->items[position]->word, position);
return array->items[position];
}
HashItem **array_getarray(DynArray *array){
HashItem
**toreturn
= (HashItem
**)malloc(sizeof(HashItem
*) * (array
->number_of_values
)); int i;
for(i = 0; i < array->number_of_values; i++){
toreturn[i] = array_get(array, i);
}
return toreturn;
}
DynArray *readFile(char* file_path){
FILE *file;
file
= fopen(file_path
, "r"); if(file == NULL){
printf("Error opening file %s", file_path
); }
//Work out size of file and create hash table and array accordingly
fseek(file
, 0, SEEK_END
); int file_size
= ftell(file
); fseek(file
, 0, SEEK_SET
); //Create hash table assuming average word length is 3 (somewhat worst case) and
//add extra spaces to reduce collisions. Ensures hash table will not overflow at cost
//of some extra memory. More spaces also means less collisions so increases performance.
Hash* hash = hash_new((file_size/3)*1.25);
DynArray *array = array_new(file_size);
//String with memory allocation related to the longest word in english dictionary
int c;
int characters = 0;
int n = 0;
char *str
= (char*)malloc(45); while((c
= fgetc(file
)) != EOF
){ //Check if character is a ' or an alphabetic character
if ((c > 64 && c < 91) || (c > 96 && c < 123) || c == 39){
if(c > 64 && c < 91){
c += 32; //If character is upper case convert to lower
}
str[n] = c;
n++;
}else{
str[n] = '\0';
printf("String is : %s \n", str
); HashItem *temp_item = hash_update(hash, str);
//If update returns a HashItem it already exists therefore add HashItem to array
if(temp_item != NULL){
array_append(array, temp_item);
printf("String %s added! \n ", str
); //printf("Item : %s added \n", temp_item->word);
}else{
printf("String %s already exists, current count is %d \n", hash_get
(hash
, str
)->word
, hash_get
(hash
, str
)->occurences
); }
n = 0;
//allocate new memory for string
}
}
}
printf("\n \n HERE: %s SIZE : %d \n",array_get
(array
, 1)->word
, array
->number_of_values
); return array;
}
int compare_func(const void *a, const void *b)
{
HashItem
*aItem
= (HashItem
*)malloc (sizeof (HashItem
)); HashItem
*bItem
= (HashItem
*)malloc (sizeof (HashItem
));
memcpy (aItem
, a
, sizeof (HashItem
)); memcpy (bItem
, b
, sizeof (HashItem
));
int aCount = aItem->occurences;
int bCount = bItem->occurences;
printf("comparing %d to %d \n", aCount
, bCount
); return (aCount-bCount);
}
int main (int argc, char *argv[]) {
DynArray *array = readFile(argv[1]);
int n = array->number_of_values;
int i;
HashItem **standard_array = array_getarray(array);
for(i = 0; i < n; i++){
printf("%s : %d \n" , array_get
(array
, i
)->word
, array_get
(array
, i
)->occurences
); }
qsort(standard_array
, n
, sizeof(HashItem
*), compare_func
); for(n; n > 0; n--){
printf("Word : %s , occurences : %d \n", item_get_word
(standard_array
[n
]),item_get_occurences
(standard_array
[n
])); }
return 0;
}
#include <stdlib.h>
#include <string.h>
#include <stdio.h>

//HASHITEM

/*Holds the word and the number of times it has appeared in the document */
typedef struct {
	char *word;
	int occurences;
} HashItem;

/* Creates and returns a new HashItem struct */
HashItem *item_new(char *word){
	HashItem *item = (HashItem*)malloc(sizeof(HashItem));
	item->occurences = 1;
	int chars = strlen(word);
	item->word = (char*)malloc(sizeof(char) * (chars+1));
	strcpy(item->word, word);
	return item;
}

/* Get the word containted within the passed HashItem */
char *item_get_word(HashItem *item){
	return item->word;
}

/* Get the occurences of the passed HashItem */
int item_get_occurences(HashItem *item){
	return item->occurences;
}

/* Increment the occurences of the passed HashItem, used when there is a verified collision */
void item_increment_occurences(HashItem *item){
	item->occurences++;
}


//HASHTABLE

/* Hashtable struct holds all the HashItems and a null if there is no HashItem in the position */
typedef struct {
	int size;
	HashItem **items;
} Hash;

/* Create and return a new Hash struct */
Hash *hash_new (int size) {
	Hash *hash = (Hash*)calloc(1, sizeof (Hash));
	hash->size = size;
	hash->items = (HashItem**)calloc(size, sizeof (HashItem*));
	return hash;
}

/* Hash function used to assign a integer value to a word */
int hash_func(char *word, int mod){
	int i = 0;     
	int value = 0;
	while(word[i] != '\0'){
		value = ((word[i] * i) + value) % mod;
		i++;
	}
	return value;
}

/* Get the index to place a given word within the passed hash table */
int hash_index (Hash *hash, char *word) {
	int index = hash_func(word, hash->size);
	HashItem *item = (hash->items)[index];
	while (item){
		if(!strcmp(item->word, word)){
			printf("word %s is equal to %s \n", item->word, word );
			return index;                          
		}
		index = (index + 1) % hash->size;
		item = hash->items[index];
	}
	return index;
}

/* Returns the Hashitem that corresponds to the word given, null is returned if it is not found */
HashItem *hash_get (Hash *hash, char *word) {
	int index = hash_index(hash, word);
	return hash->items[index];
}

/* Update the passed HashTable with a given word, adds the word if it is not in the table, increments the
corresponding Hashitem's occurence value if it is. */
HashItem *hash_update (Hash *hash, char *word) {
	HashItem *item = hash_get(hash, word);
	int index = hash_index(hash, word);
	if(item){
		item->occurences++;    
		return NULL;
	}else{
		int index = hash_index(hash, word);
		item = item_new(word);
		hash->items[index] = item;
		return item;
	}
}

//Dynamic Array

/*DynArray is a dynamically resizing array that is used to hold values and retain size data throughout*/
typedef struct{
	int number_of_values;
	int capacity;
	HashItem **items;
}DynArray;

/*Method to create a new dynamic array and return it */
DynArray* array_new(int file_size){
	DynArray *array = (DynArray*)malloc(sizeof(DynArray));
	array->number_of_values = 0;
	array->capacity = file_size / 10;
	printf("capacity is %d " , array->capacity);
	array->items = (HashItem**)malloc(sizeof(HashItem*)* array->capacity);
	return array;
}

/*Method used to increase the size of the array and reallocate memory*/
void array_increase_if_full(DynArray *array){
	if (array->number_of_values >= array->capacity){
		array->capacity *= 1.25;
		array->items = (HashItem**)realloc(array->items, sizeof(HashItem)*array->capacity);
	}
}

/*Method to add a string to the dynamic array specified */
void array_append(DynArray *array, HashItem *item){
	array_increase_if_full(array);
	array->items[array->number_of_values] = item;
	//printf("item %s added \n at position %d ", array->items[array->number_of_values]->word, array->number_of_values);
	array->number_of_values++;
}

/*Method used to get value at specified position for given array*/
HashItem *array_get(DynArray *array, int position){
	if(position >= array->number_of_values || position <0){
		printf("Index specified out of range");
		exit(1);
	}
	//printf("item %s at position %d retrieved", array->items[position]->word, position);
	return array->items[position];
}

HashItem **array_getarray(DynArray *array){
	HashItem **toreturn = (HashItem**)malloc(sizeof(HashItem*) * (array->number_of_values));
	int i;
	for(i = 0; i < array->number_of_values; i++){
		toreturn[i] = array_get(array, i);
	}
	return toreturn;
}


DynArray *readFile(char* file_path){
	FILE *file;
	file = fopen(file_path, "r");
	if(file == NULL){
		printf("Error opening file %s", file_path);
		exit(1);       
	}
	//Work out size of file and create hash table and array accordingly
	fseek(file, 0, SEEK_END);
	int file_size = ftell(file);
	fseek(file, 0, SEEK_SET);
	//Create hash table assuming average word length is 3 (somewhat worst case) and
	//add extra spaces to reduce collisions. Ensures hash table will not overflow at cost
	//of some extra memory. More spaces also means less collisions so increases performance.
	Hash* hash = hash_new((file_size/3)*1.25);
	DynArray *array = array_new(file_size);

	//String with memory allocation related to the longest word in english dictionary
	int c;
	int characters = 0;
	int n = 0;
	char *str = (char*)malloc(45);
	while((c = fgetc(file)) != EOF){
		//Check if character is a ' or an alphabetic character
		if ((c > 64 && c < 91) || (c > 96 && c < 123) || c == 39){
			if(c > 64 && c < 91){
				c += 32; //If character is upper case convert to lower
			}
			str[n] = c;
			n++;
		}else{
			str[n] = '\0';
			if(strcmp(str, "")){
				printf("String is : %s \n", str);           
				HashItem *temp_item = hash_update(hash, str);
				//If update returns a HashItem it already exists therefore add HashItem to array
				if(temp_item != NULL){
					array_append(array, temp_item);
					printf("String %s added! \n ", str);
					//printf("Item : %s added \n", temp_item->word);
				}else{
					printf("String %s already exists, current count is %d \n", hash_get(hash, str)->word, hash_get(hash, str)->occurences);
				}
				n = 0;
				//allocate new memory for string
				str = (char*)malloc(45);
			}
		}
	}
	fclose(file);
	free(hash);    
	printf("\n \n HERE: %s  SIZE : %d \n",array_get(array, 1)->word, array->number_of_values);
	return array;                  
}

int compare_func(const void *a, const void *b)
{
	HashItem *aItem = (HashItem*)malloc (sizeof (HashItem));
	HashItem *bItem = (HashItem*)malloc (sizeof (HashItem));

	memcpy (aItem, a, sizeof (HashItem));
	memcpy (bItem, b, sizeof (HashItem));

	int aCount = aItem->occurences;
	int bCount = bItem->occurences;

	free(aItem);
	free(bItem);

	printf("comparing %d to %d \n", aCount, bCount);
	return (aCount-bCount);
}

int main (int argc, char *argv[]) {
	DynArray *array = readFile(argv[1]);
	int n = array->number_of_values;
	int i;
	HashItem **standard_array = array_getarray(array);
	for(i = 0; i < n; i++){
		printf("%s : %d \n" , array_get(array, i)->word, array_get(array, i)->occurences);
	}
	printf("trying to sort");
	qsort(standard_array, n, sizeof(HashItem*), compare_func);
	printf("sorted");
	for(n; n > 0; n--){
		printf("Word : %s , occurences : %d \n", item_get_word(standard_array[n]),item_get_occurences(standard_array[n]));
	}
	return 0;
}