#-*- coding:utf-8 -*-

""" A basic lexicographic Similarity algorithm ...part1
    @author: WhiZTiM (Timothy Onogu)
	@copyright: (c) January, 2013
	@NOTE: This algorithm is developed as it is, with the hope
		that it will be useful and I will not be responsible
		for any failures if any arises...
	@contact: whiztim@whiztim.com
"""

def split_on_delimeters(string, delimeter=":;. \"!,$()-_+=~'`"):
	"""splits string upon the occurence of any character in delimeter"""	
	rtn = []
	placeHolder = ""
	for x in string:
		if(not x in delimeter):			#if current iter is not a delimeter
			placeHolder = placeHolder + x
			continue
		if(placeHolder != ""):			#Not allowed to append empty string
			rtn.append(placeHolder)
			placeHolder = ""
			continue
	if(placeHolder != ""):				#Add the last item that is not catered for in the "for-loop"
		rtn.append(placeHolder)
	return rtn

def returnbigrams(string):
	"""This function returns bigrams"""
	return [string[n:n+2] for n in range(len(string) - 1)]

def word_similarity(word1, word2, case_sensitive=False):
	"""This function, returns in percentage, how similar the string 'word1' is to 'word2'
	   >..>case_sensitive for considering cases"""
	if(not case_sensitive):
		word1 = word1.lower()
		word2 = word2.lower()
	pairs_word1 = returnbigrams(word1)
	pairs_word2 = returnbigrams(word2)
	t = len(pairs_word1) + len(pairs_word2)

	sb = 0
	for x in pairs_word1:
		for y in pairs_word2:
			if(x == y): 			#bigrams match
				sb += 2.0		#add (1+1=2).. since its found in both.
				pairs_word2.remove(y)	#we do not need it again
				break			#break inner loop
	
	similarity = (sb / t * 100.0)
	return similarity

sent1= "Getting higher on rank oh"
sent2= "Getting on higher rank"

list1 =  split_on_delimeters(sent1)
list2 =  split_on_delimeters(sent2)
list1_ratios=[]

if len(list1) > len(list2):
    for x in list1:
        list2_ratios=[]
        for y in list2:
            list2_ratios.append(word_similarity(x,y))
        list1_ratios.append(max(list2_ratios))
    print list1_ratios
else:
    for x in list2:
        list2_ratios=[]
        for y in list1:
            list2_ratios.append(word_similarity(x,y))
        list1_ratios.append(max(list2_ratios))
    print list1_ratios


