#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <stdbool.h>
#include <time.h>
 
#define LOG(...) fprintf(stderr,__VA_ARGS__);
 
typedef unsigned int uint;
typedef uint64_t ulongint;
 
enum settings_list
{
        MARKOVLVL = 3,              //level of markov chain
        MAXTEXTSIZE = 10000,        //maximal size of text
        AVERAGESENTENCESIZE = 7,    //average size of sentence
                AVERAGELINESIZE = 15
};
 /*
ulongint hash_coefficients[13]={1,37,1369,50653,1874161,69343957,2565726409,94931877133,3512479453921,
129961739795077,4808584372417849,177917621779460413,6582952005840035281};*/
 
 
ulongint hash_coefficients[13]={1, 31, 961, 29791, 923521, 28629151, 887503681, 27512614111, 852891037441,
26439622160671, 819628286980801, 25408476896404831, 787662783788549761};
//int settings[sizeof(settings_list)];
 
char* delimeter_list[9] = {" ", "...", "?!", "?",  ".", "!", ":", ";", ","};
typedef struct suffix
{
        char * text;
        struct suffix * next; //pointer to the next suffix
        uint suff_counter;  //how many times this suffix appears
        uint eos_counter;   //how many times this prefix was an end of sentence
        struct suffix * delimeter_list;
} suffix;
 
void free_s_list(suffix ** suff)
{
        if((*suff)->next == NULL)
        {
                free((*suff)->text);
                (*suff)->text = NULL;
                free((*suff));
                (*suff) = NULL;
                return;
        }
        else
        {
                free_s_list(&((*suff)->next));
                free_s_list(suff);
        }
}
 
 
typedef struct prefix
{
        suffix * suffix_list;
        uint sl_size; //size of suffix list
        char * words[MARKOVLVL]; 
        uint counter; //how many times this prefix appears
}prefix ;
 
char * get_delimeter(char * str)
{
        char * tmp;
        for(int i = 1; i <= 8; i++)
        {
                tmp = strstr(str, delimeter_list[i]);
                if(tmp!=NULL && strlen(tmp) == strlen(delimeter_list[i]))
                        return delimeter_list[i];
        }
        return delimeter_list[0]; //return space symbol
}
 
int get_delimeter_index(char * str)
{
        char * tmp;
        for(int i = 1; i <= 8; i++)
        {
                tmp = strstr(str, delimeter_list[i]);
                if(tmp!=NULL && strlen(tmp) == strlen(delimeter_list[i]))
                        return i;
        }
        return 0; //return space symbol
}
 
char * remove_delimeter(char * str)
{
        int len = strlen(str);
        if(len == 0 || len == 1)
                return str;
        char * result;
        char * gd = get_delimeter(str);
        if(gd != NULL && strcmp(gd," ") != 0)
        {
			result = (char*) malloc(sizeof(char)*(len-strlen(gd)+1));
			strncpy(result, str, len-strlen(gd));
			/*for(int i = 0; i < len-strlen(gd); i++)
					result[i] = str[i];*/
			result[len-strlen(gd)] = '\0';
        }
        else
        {
                if(str[0] == '\"' && str[len-1] == '\"')
                {
					result = (char*) malloc(sizeof(char)*(len-2+1));
					for(int i = 1; i < len-1; i++)
							result[i-1] = str[i];
					result[len-2] = '\0';
                }
                else if(str[0] == '\"')
                {
					result = (char*) malloc(sizeof(char)*(len-1+1));
					for(int i = 1; i <= len-1; i++)
							result[i-1] = str[i];
					result[len-1] = '\0';
                }
                else if(str[len-1] == '\"')
                {
					result = (char*) malloc(sizeof(char)*(len-1+1));
					for(int i = 0; i < len-1; i++)
							result[i] = str[i];
					result[len-1] = '\0';
                }
                else
                {
					result = (char*) malloc(sizeof(char)*(len+1));
					strcpy(result, str);
					result[len] = '\0';
                }
        }
        return result;
}
 
uint is_eos_delimeter(char * delim)
{
        if(strcmp(delim,"!") == 0 || strcmp(delim,"?") == 0 ||
                strcmp(delim,".") == 0 || strcmp(delim,"?!") == 0 ||
                strcmp(delim,"...") == 0)
                return 1;
        else return 0;
}
 
char * lower(char * str) //locale-dependend function
{
        if(str[0] <= 'Z' && str[0] >= 'A')
                str[0] += 'a'-'A';
        return str;
}
 
char * higher(char * str) //locale-dependend function
{
        if(str[0] <= 'z' && str[0] >= 'A')
                str[0] -= 'a'-'A';
        return str;
}
 
prefix * create_prefix(char * pref[MARKOVLVL], char * suff)
{
	LOG("IN CREATE_PREFIX FUNCTION\n");
	LOG("Allocating memory for result\n");
	prefix * result = (prefix*) malloc(sizeof(prefix));
	char * temp;
	temp = remove_delimeter(suff);
	LOG("Allocating memory for suffix_list\n");
	result->suffix_list = (suffix*) malloc(sizeof(suffix));
	result->suffix_list->next = NULL;
	LOG("Allocating memory for suffix_list->text\n");
	result->suffix_list->text = (char*) malloc(strlen(temp)+1);
	result->suffix_list->suff_counter = 1;
	LOG("Allocating memory for suffix_list->delimeter_list\n");
	result->suffix_list->delimeter_list = (suffix*) malloc(sizeof(suffix));
	result->suffix_list->delimeter_list->next = NULL;
	result->suffix_list->delimeter_list->text = get_delimeter(suff);
	result->suffix_list->delimeter_list->suff_counter = 1;
	if(is_eos_delimeter(result->suffix_list->delimeter_list->text) == 1)
	{
		result->suffix_list->delimeter_list->eos_counter = 1;
		result->suffix_list->eos_counter = 1;
	}
	//strcpy(result->suffix_list->text,lower(temp));
	strcpy(result->suffix_list->text,temp);
	/*if(is_eos_delimeter(get_delimeter(pref[MARKOVLVL-1])) == 0)
	{
		LOG("Copying LOWER suffix\n");
		strcpy(result->suffix_list->text,lower(temp));
	}
	else
	{
		LOG("Copying suffix\n");
		strcpy(result->suffix_list->text,temp);
	}*/
	//LOG("SUFFIX:%s\n TEMPORARY STRING:%s\n",suff,temp);
	LOG("Freeing temp memory\n");
	if(temp != NULL)
	{
		LOG("Temp memory address: %p\n", temp);
		free(temp);
		temp = NULL;
	}
	else
		LOG("Problem with freeing\n");
	LOG("Temp memory freed\n");
	for(int i = 0; i < MARKOVLVL; i++)
	{
		//temp = remove_delimeter(lower(pref[i]));
		temp = remove_delimeter(pref[i]);
		//LOG("SUFFIX:%s\n TEMPORARY STRING:%s\n",suff,temp);
		LOG("Allocating memory for result->words[%d]\n", i);
		result->words[i] = (char*) malloc(strlen(temp)+1);
		strcpy(result->words[i], temp);
		LOG("Freeing temp memory\n");
		free(temp);
		temp = NULL;
	}
	//LOG("SUFFIX:%s\n",suff);
	result->counter = 1;
	result->sl_size = 1;
	LOG("Returning result\n");
	return result;
}
 
void delete_prefix(prefix ** pref)
{
        if((*pref)->suffix_list != NULL)
                free_s_list(&((*pref)->suffix_list));
        for(int i = 0; i < MARKOVLVL; i++)
                {
                        if((*pref)->words[i] != NULL)
                                free((*pref)->words[i]);
                }
                if((*pref) != NULL)
                        free((*pref));
}
 
void add_delimeter(suffix * suff, int delim_index)
{
        char * delim = delimeter_list[delim_index];
        if(is_eos_delimeter(delim) == 1)
                suff->eos_counter += 1;
        suffix * dlist;
        dlist = suff->delimeter_list;
        if(dlist != NULL)
        {
                for(int i = 0; i < suff->suff_counter;)
                {
                        if(strcmp(dlist->text, delim) == 0)
                        {
                                dlist->suff_counter +=1;
                                return;
                        }
                        i += dlist->suff_counter;
                        if(dlist->next == NULL)
                        {
                                dlist->next = (suffix*) malloc(sizeof(suffix));
                                dlist = dlist->next;
                                break;
                        }
                }
        }
        else
        {
                dlist = (suffix*) malloc(sizeof(suffix));
        }
        dlist->next = NULL;
        suff->suff_counter += 1;
        dlist->suff_counter = 1;
        dlist->text = delim;
}
 
void add_suffix(prefix * pref, char * suff, int delim_index, int is_pref_eos)
{
    suffix * suf = pref->suffix_list;
    for(int i = 0; i < pref->sl_size;)
    {
        if(strcmp(suf->text, suff) == 0)
        {
                        add_delimeter(suf, delim_index);
            suf->suff_counter += 1;
            pref->sl_size++;
            return;
        }
        i+=suf->suff_counter;
        if( suf->next == NULL)
        {
            suf->next =(suffix*) malloc(sizeof(suffix));
            suf = suf->next;
            break;
        }
    }
    pref->sl_size++;
    suf->next = NULL;
    suf->text = (char*) malloc(strlen(suff)+1);
    /*if(is_pref_eos == 1)
                strcpy(suf->text, lower(suff));
        else
                strcpy(suf->text, suff);*/
    strcpy(suf->text, suff);
    suf->suff_counter = 1;
        pref->counter += 1;
}
 
suffix * get_random_suffix(suffix * suffix_list, int sl_size)
{
        if(suffix_list == NULL ||sl_size == 0)
                return NULL;
        int randint = rand() % sl_size;
        suffix * suf = suffix_list;
        for(int i = 0; i < sl_size;)
        {
                randint -= suf->suff_counter;
                if(randint < 0 || suf->next == NULL)
                        break;
        }
        return suf;
}
 
prefix empty_prefix = {NULL,0,{NULL,NULL,NULL},0};
 
typedef struct bt_node
{
        struct bt_node * left;
        struct bt_node * right;
        //bt_node * parent;
        ulongint key;
        prefix * value;
} bt_node;
 
bt_node * bt_add(bt_node * root, char * pref[MARKOVLVL], ulongint pref_hash, char * suff)
{
	LOG("IN BT_ADD FUNCTION\n");
	if(root == NULL)
	{
		root = (bt_node*) malloc(sizeof(bt_node));
		root->left = NULL;
		root->right = NULL;
		//root->parent = NULL;
		root->key = pref_hash;
		LOG("Going to CREATE_PREFIX\n");
		root->value = create_prefix(pref, suff);
	}
	else if(pref_hash < root->key)
	{
		root->left = bt_add(root->left, pref, pref_hash, suff);
	}
	else if(pref_hash > root->key)
	{
		root->right = bt_add(root->right, pref, pref_hash, suff);
	}
	else if(pref_hash == root->key)
	{
		LOG("Going to ADD_SUFFIX\n");
		add_suffix(root->value, remove_delimeter(suff), get_delimeter_index(suff), is_eos_delimeter(get_delimeter(pref[MARKOVLVL-1])));
	}
	return root;
}
 
bt_node * bt_search(bt_node * root, ulongint key)
{
        if(root == NULL)
                return NULL;
        else if(key < root->key)
                return bt_search(root->left, key);
        else if(key > root->key)
                return bt_search(root->right,key);
        else if(key == root->key)
                return root;
        return NULL;
}
 
void bt_delete(bt_node ** root)
{
        if((*root)->left!=NULL)
                bt_delete(&((*root)->left));
        else if((*root)->right!=NULL)
                bt_delete(&((*root)->right));
        else
                free(*root);
}
 
bool xor(bool a, bool b)
{
	return (a&&!b)||(!a&&b);
}

prefix * bt_get_random_prefix(bt_node * root, ulongint rnd)
{
        if(root->left == NULL && root->right == NULL)
                return root->value;
        //else if((root->key - rnd) == rnd || (root->key == rnd))
        //      return root->value;
        else if(!xor(root->right != NULL , rnd - rand() < rnd) )
                return bt_get_random_prefix(root->right, rnd);
        else
                return bt_get_random_prefix(root->left, rnd);
}
 
ulongint hash(char * str)
{
        int numlen = strlen(str);
        ulongint result = 0;
        for(int i = 0; i < numlen; i++)
                result += (str[i] + 1)*hash_coefficients[i%13];
        return result;
}       
 
ulongint prefix_hash(char * pref[MARKOVLVL])
{
        ulongint result = 0;
        for(int i = 0; i < MARKOVLVL; i++)
                result += hash(pref[i])*(i+2);
        return result;
}
 
char * read_word(FILE * fd)
{
        int max_size = 5, cur_size = 0;
        char * result =(char *) malloc(max_size*sizeof(char));
        char ch = fgetc(fd);
        while(ch != EOF && ch != '\n' && ch != ' ' && ch != '\t')
        {
                if(cur_size >= max_size)
                {
                        max_size *= 2;
                        result = (char * ) realloc(result, max_size*sizeof(char));
                }
                result[cur_size] = ch;
                cur_size++;
                ch = fgetc(fd);
        }
        if(cur_size == 0)
        {
                free(result);
                return NULL;
        }
        else
        {
                result = (char*) realloc(result, ((cur_size+1)*sizeof(char)));
                result[cur_size] = '\0';
        }
        return result;
}
 
bt_node * generate_tree(FILE * fd)
{
	bt_node * root = NULL;
	char * pref[MARKOVLVL];
	char * suf;
	char * temp;
	LOG("Initial fd reading\n");
	for(int i = 0; i < MARKOVLVL; i++)
	{
		pref[i] = read_word(fd);
		if(pref[i] == NULL)
		{
				LOG("Input file reading error. Exiting");
				exit(1);
				return NULL;
		}
	}
	while(1)
	{
		LOG("Reading suffix\n");
		suf = read_word(fd);
		LOG("Suffix is %s\n", suf);
		if(suf == NULL)
		{
			if(feof(fd))
					break;
			else
					continue;		
		}
		LOG("Adding suffix to tree\n");
		root = bt_add(root, pref, prefix_hash(pref), suf);
		LOG("Suffix was succesfully added\n");
		LOG("Freeing pref[0] memory\n");
		free(pref[0]);
		for(int i = 0; i < MARKOVLVL-1; i++)
		{
				pref[i] = pref[i+1];
		}
		/*for(int i = 0; i < MARKOVLVL; i++)
				LOG("PREFIX[%d]: %s\n",i,pref[i]);*/
		LOG("Removing delimeter from suffix\n");
		temp = remove_delimeter(suf);
		pref[MARKOVLVL-1] = temp;
		LOG("Freeing suffix memory\n");
		free(suf);
	}
	for(int i = 0; i < MARKOVLVL; i++)
		if(pref[i] != NULL)
				free(pref[i]);
	free(suf);
	return root;
}
 
void generate_text(bt_node * root, FILE * fd, uint text_size)
{
	LOG("IN GENERATE_TEXT FUNCTION\n");
	if(root == NULL)
		return;
	char * pref[MARKOVLVL];
	LOG("Getting random prefix\n");
	prefix * cur_pref = bt_get_random_prefix(root, rand());
	for(int i = 0; i < MARKOVLVL; i++)
	{
		pref[i] = cur_pref->words[i];
		fprintf(fd, "%s ", pref[i]);
	}
	LOG("Getting random suffix\n");
	suffix * t_suf = get_random_suffix(cur_pref->suffix_list, cur_pref->sl_size);
	LOG("Got random suffix\n");
	char * suf = t_suf->text;
	char * delim = (get_random_suffix(t_suf->delimeter_list, t_suf->suff_counter))->text;
	uint ls_counter = MARKOVLVL+1; //current line size counter
	uint txt_counter = MARKOVLVL+1; //current text size counter
	uint sent_counter = MARKOVLVL+1;
	fprintf(fd, "%s ", suf);
	uint eos_flag = 0;
	while(txt_counter < text_size && txt_counter < MAXTEXTSIZE)
	{
		if(ls_counter > AVERAGELINESIZE)
		{
			fprintf(fd, "\b\n");
			ls_counter = 1;
		}
		for(int i = 0; i < MARKOVLVL-1; i++)
		{
				pref[i] = pref[i+1];
		}
		pref[MARKOVLVL-1] = suf;
		LOG("Searching for prefix\n");
		cur_pref = (bt_search(root, prefix_hash(pref)))->value;
		LOG("Getting random suffix\n");
		t_suf = get_random_suffix(cur_pref->suffix_list, cur_pref->sl_size);
		suf = t_suf->text;
		if(sent_counter > AVERAGESENTENCESIZE)
		{
			delim = delimeter_list[rand()%5 +1];
			sent_counter = 0;
			eos_flag = 0;
		}
		else
		{
			delim = (get_random_suffix(t_suf->delimeter_list, t_suf->suff_counter))->text;
			if(is_eos_delimeter(delim) == 1)
			{
				sent_counter = 0;
				eos_flag = 0;
			}
			else
				eos_flag++;
		}/*
		if(eos_flag == 1)
		{
			if(strcmp(delim, " ") == 0)
					fprintf(fd, "%s ", higher(suf));
			else
					fprintf(fd, "%s%s ", higher(suf), delim);
		}
		else
		{*/
			if(strcmp(delim, " ") == 0)
					fprintf(fd, "%s ", suf);
			else
					fprintf(fd, "%s%s ", suf, delim);
		//}
		txt_counter++;
		ls_counter++;
		sent_counter++;
	}

}
 
int main(void)
{
        /*settings[MARKOVLVL] = 3;
        settings[HASHSIZE] = 10000;
        settings[MAXTEXTSIZE] = 10000;
        settings[AVERAGESENTENCESIZE] = 7;*/
        //char * somestr[MARKOVLVL] = {"some","shitty","string"};
        //prefix * pref = create_prefix(somestr, "wow");
 //   add_suffix(pref, "wow");
 //   add_suffix(pref, "notwow");
 //   suffix* suff = pref->suffix_list;
 //   while(suff != NULL)
 //   {
 //           LOG("%s\n",suff->text);
 //           if(suff->next == NULL)
 //                   break;
 //           else
 //                   suff = suff->next;
 //   }
 //   delete_prefix(&pref);
        srand(time(NULL));
        /*FILE * fd = fopen("C:\\Users\\sai\\Documents\\Visual Studio 2010\\Projects\\markov_chain\\markov_chain\\input_01.txt", "r");
        FILE * fd1 = fopen("C:\\Users\\sai\\Documents\\Visual Studio 2010\\Projects\\markov_chain\\markov_chain\\output_01.txt", "w");
        if(fd == NULL)
        {
                LOG("Couldn\'t open file");
                getchar();
                exit(1);
        }*/
		LOG("Going to tree building\n");
        bt_node * root = generate_tree(stdin);
        LOG("Going to text generation\n");
        FILE * output = fopen("./generated_text_02","w");
        if(output == NULL)
        {
                LOG("Couldn\'t open file");
                getchar();
                exit(1);
        }
        generate_text(root, output, 100);
        bt_delete(&root);
        fclose(output);
        /*char * str = (char *) malloc(7);
        scanf("%s",str);
        printf("%s\n",str);
        str = lower(str);
        printf("%s\n", str);
    getchar();*/
        getchar();
    return 0;
}
