# -*- coding:utf8 -*-
# Build Vietnamese characters to ASCII characters dictionary
VN_UNI_ASC = {u'á':'a', u'à':'a', u'ả':'a', u'ã':'a', u'ạ':'a',
u'ắ':'a', u'ằ':'a', u'ẳ':'a', u'ẵ':'a', u'ặ':'a', u'ă':'a',
u'ấ':'a', u'ầ':'a', u'ẩ':'a', u'ẫ':'a', u'ậ':'a', u'â':'a',
u'é':'e', u'è':'e', u'ẻ':'e', u'ẽ':'e', u'ẹ':'e',
u'ế':'e', u'ề':'e', u'ể':'e', u'ễ':'e', u'ệ':'e', u'ê':'e',
u'í':'i', u'ì':'i', u'ỉ':'i', u'ĩ':'i', u'ị':'i',
u'ó':'o', u'ò':'o', u'ỏ':'o', u'õ':'o', u'ọ':'o',
u'ố':'o', u'ồ':'o', u'ổ':'o', u'ỗ':'o', u'ộ':'o', u'ô':'o',
u'ớ':'o', u'ờ':'o', u'ở':'o', u'ỡ':'o', u'ợ':'o', u'ơ':'o',
u'ú':'u', u'ù':'u', u'ủ':'u', u'ũ':'u', u'ụ':'u',
u'ứ':'u', u'ừ':'u', u'ử':'u', u'ữ':'u', u'ự':'u', u'ư':'u',
u'ý':'y', u'ỳ':'y', u'ỷ':'y', u'ỹ':'y', u'ỵ':'y',
u'đ':'d', u'Đ':'D',
u'Á':'A', u'À':'A', u'Ả':'A', u'Ã':'A', u'Ạ':'A',
u'Ắ':'A', u'Ằ':'A', u'Ẳ':'A', u'Ẵ':'A', u'Ặ':'A', u'Ă':'A',
u'Ấ':'A', u'Ầ':'A', u'Ẩ':'A', u'Ẫ':'A', u'Ậ':'A', u'Â':'A',
u'É':'E', u'È':'E', u'Ẻ':'E', u'Ẽ':'E', u'Ẹ':'E',
u'Ế':'E', u'Ề':'E', u'Ể':'E', u'Ễ':'E', u'Ệ':'E', u'Ê':'E',
u'Í':'I', u'Ì':'I', u'Ỉ':'I', u'Ĩ':'I', u'Ị':'I',
u'Ó':'O', u'Ò':'O', u'Ỏ':'O', u'Õ':'O', u'Ọ':'O',
u'Ố':'O', u'Ồ':'O', u'Ổ':'O', u'Ỗ':'O', u'Ộ':'O', u'Ô':'O',
u'Ớ':'O', u'Ờ':'O', u'Ở':'O', u'Ỡ':'O', u'Ợ':'O', u'Ơ':'O',
u'Ú':'U', u'Ù':'U', u'Ủ':'U', u'Ũ':'U', u'Ụ':'U',
u'Ứ':'U', u'Ừ':'U', u'Ử':'U', u'Ữ':'U', u'Ự':'U', u'Ư':'U',
u'Ý':'Y', u'Ỳ':'Y', u'Ỷ':'Y', u'Ỹ':'Y', u'Ỵ':'Y'}
# Build Vietnamese characters to floats dictionary
VN_UNI_FLT = {}
VN_CHARS_TEMP = {u'á': 97.06, u'à': 97.12, u'ả': 97.18, u'ã': 97.24, u'ạ': 97.30,
u'ă': 97.33, u'ắ': 97.36, u'ằ': 97.42, u'ẳ': 97.48, u'ẵ': 97.54, u'ặ': 97.60,
u'â': 97.63, u'ấ': 97.66, u'ầ': 97.72, u'ẩ': 97.78, u'ẫ': 97.84, u'ậ': 97.90,
u'đ': 100.50,
u'é': 101.09, u'è': 101.18, u'ẻ': 101.27, u'ẽ': 101.36, u'ẹ': 101.45,
u'ê': 101.50, u'ế': 101.54, u'ề': 101.63, u'ể': 101.72, u'ễ': 101.81, u'ệ': 101.90,
u'í': 105.18, u'ì': 105.36, u'ỉ': 105.54, u'ĩ': 105.72, u'ị': 105.90,
u'ó': 111.06, u'ò': 111.12, u'ỏ': 111.18, u'õ': 111.24, u'ọ': 111.30,
u'ơ': 111.33, u'ớ': 111.36, u'ờ': 111.42, u'ở': 111.48, u'ỡ': 111.54, u'ợ': 111.60,
u'ô': 111.63, u'ố': 111.66, u'ồ': 111.72, u'ổ': 111.78, u'ỗ': 111.84, u'ộ': 111.90,
u'ú': 117.09, u'ù': 117.18, u'ủ': 117.27, u'ũ': 117.36, u'ụ': 117.45,
u'ư': 117.50, u'ứ': 117.54, u'ừ': 117.63, u'ử': 117.72, u'ữ': 117.81, u'ự': 117.90,
u'ý': 121.18, u'ỳ': 121.36, u'ỷ': 121.54, u'ỹ': 121.72, u'ỵ': 121.90}
for c in VN_CHARS_TEMP:
VN_UNI_FLT[c], VN_UNI_FLT[c.upper()] = VN_CHARS_TEMP[c], VN_CHARS_TEMP[c] - 32
# Functions
def OrdVn(c):
'Updated version of ord()'
return VN_UNI_FLT[c] if c in VN_UNI_FLT else ord(c)
def CompareVnChr(a,b):
'Alphabetical order of Vietnamese characters'
a, b = OrdVn(a), OrdVn(b)
if a > b: return 1
elif a < b: return -1
return 0
def CompareVnStr(s1,s2):
'Alphabetical order of Vietnamese strings'
len1, len2 = len(s1), len(s2)
i_max = len1 if len1 < len2 else len2
for i in range(i_max):
c = CompareVnChr(s1[i],s2[i])
if c: return c
if len1 > i_max: return 1
if len2 > i_max: return -1
return 0
def Vn2Ascii(s):
'Convert Vietnamese string with marks to Vietnamese string with no marks'
return ''.join( [(chr(int(VN_UNI_FLT[c])) if c in VN_UNI_FLT else c)
for c in s] )
def Uni2Ascii(s):
'Convert Vietnamese string with marks to Vietnamese string with no marks'
s = [c for c in s]
for i in xrange(len(s)):
o = int(OrdVn(s[i]))
s[i] = '?' if o > 127 else chr(o)
return ''.join(s)
IyAtKi0gY29kaW5nOnV0ZjggLSotCgojIEJ1aWxkIFZpZXRuYW1lc2UgY2hhcmFjdGVycyB0byBBU0NJSSBjaGFyYWN0ZXJzIGRpY3Rpb25hcnkKVk5fVU5JX0FTQyAgPSB7dSfDoSc6J2EnLCB1J8OgJzonYScsIHUn4bqjJzonYScsIHUnw6MnOidhJywgdSfhuqEnOidhJywKICAgICAgICAgICAgICAgdSfhuq8nOidhJywgdSfhurEnOidhJywgdSfhurMnOidhJywgdSfhurUnOidhJywgdSfhurcnOidhJywgdSfEgyc6J2EnLAogICAgICAgICAgICAgICB1J+G6pSc6J2EnLCB1J+G6pyc6J2EnLCB1J+G6qSc6J2EnLCB1J+G6qyc6J2EnLCB1J+G6rSc6J2EnLCB1J8OiJzonYScsIAogICAgICAgICAgICAgICB1J8OpJzonZScsIHUnw6gnOidlJywgdSfhursnOidlJywgdSfhur0nOidlJywgdSfhurknOidlJywgCiAgICAgICAgICAgICAgIHUn4bq/JzonZScsIHUn4buBJzonZScsIHUn4buDJzonZScsIHUn4buFJzonZScsIHUn4buHJzonZScsIHUnw6onOidlJywgCiAgICAgICAgICAgICAgIHUnw60nOidpJywgdSfDrCc6J2knLCB1J+G7iSc6J2knLCB1J8SpJzonaScsIHUn4buLJzonaScsIAogICAgICAgICAgICAgICB1J8OzJzonbycsIHUnw7InOidvJywgdSfhu48nOidvJywgdSfDtSc6J28nLCB1J+G7jSc6J28nLCAKICAgICAgICAgICAgICAgdSfhu5EnOidvJywgdSfhu5MnOidvJywgdSfhu5UnOidvJywgdSfhu5cnOidvJywgdSfhu5knOidvJywgdSfDtCc6J28nLCAKICAgICAgICAgICAgICAgdSfhu5snOidvJywgdSfhu50nOidvJywgdSfhu58nOidvJywgdSfhu6EnOidvJywgdSfhu6MnOidvJywgdSfGoSc6J28nLCAKICAgICAgICAgICAgICAgdSfDuic6J3UnLCB1J8O5JzondScsIHUn4bunJzondScsIHUnxaknOid1JywgdSfhu6UnOid1JywgCiAgICAgICAgICAgICAgIHUn4bupJzondScsIHUn4burJzondScsIHUn4butJzondScsIHUn4buvJzondScsIHUn4buxJzondScsIHUnxrAnOid1JywgCiAgICAgICAgICAgICAgIHUnw70nOid5JywgdSfhu7MnOid5JywgdSfhu7cnOid5JywgdSfhu7knOid5JywgdSfhu7UnOid5JywgCiAgICAgICAgICAgICAgIHUnxJEnOidkJywgdSfEkCc6J0QnLAogICAgICAgICAgICAgICB1J8OBJzonQScsIHUnw4AnOidBJywgdSfhuqInOidBJywgdSfDgyc6J0EnLCB1J+G6oCc6J0EnLAogICAgICAgICAgICAgICB1J+G6ric6J0EnLCB1J+G6sCc6J0EnLCB1J+G6sic6J0EnLCB1J+G6tCc6J0EnLCB1J+G6tic6J0EnLCB1J8SCJzonQScsCiAgICAgICAgICAgICAgIHUn4bqkJzonQScsIHUn4bqmJzonQScsIHUn4bqoJzonQScsIHUn4bqqJzonQScsIHUn4bqsJzonQScsIHUnw4InOidBJywgCiAgICAgICAgICAgICAgIHUnw4knOidFJywgdSfDiCc6J0UnLCB1J+G6uic6J0UnLCB1J+G6vCc6J0UnLCB1J+G6uCc6J0UnLCAKICAgICAgICAgICAgICAgdSfhur4nOidFJywgdSfhu4AnOidFJywgdSfhu4InOidFJywgdSfhu4QnOidFJywgdSfhu4YnOidFJywgdSfDiic6J0UnLCAKICAgICAgICAgICAgICAgdSfDjSc6J0knLCB1J8OMJzonSScsIHUn4buIJzonSScsIHUnxKgnOidJJywgdSfhu4onOidJJywgCiAgICAgICAgICAgICAgIHUnw5MnOidPJywgdSfDkic6J08nLCB1J+G7jic6J08nLCB1J8OVJzonTycsIHUn4buMJzonTycsIAogICAgICAgICAgICAgICB1J+G7kCc6J08nLCB1J+G7kic6J08nLCB1J+G7lCc6J08nLCB1J+G7lic6J08nLCB1J+G7mCc6J08nLCB1J8OUJzonTycsIAogICAgICAgICAgICAgICB1J+G7mic6J08nLCB1J+G7nCc6J08nLCB1J+G7nic6J08nLCB1J+G7oCc6J08nLCB1J+G7oic6J08nLCB1J8agJzonTycsIAogICAgICAgICAgICAgICB1J8OaJzonVScsIHUnw5knOidVJywgdSfhu6YnOidVJywgdSfFqCc6J1UnLCB1J+G7pCc6J1UnLCAKICAgICAgICAgICAgICAgdSfhu6gnOidVJywgdSfhu6onOidVJywgdSfhu6wnOidVJywgdSfhu64nOidVJywgdSfhu7AnOidVJywgdSfGryc6J1UnLCAKICAgICAgICAgICAgICAgdSfDnSc6J1knLCB1J+G7sic6J1knLCB1J+G7tic6J1knLCB1J+G7uCc6J1knLCB1J+G7tCc6J1knfQoKIyBCdWlsZCBWaWV0bmFtZXNlIGNoYXJhY3RlcnMgdG8gZmxvYXRzIGRpY3Rpb25hcnkKVk5fVU5JX0ZMVCA9IHt9ClZOX0NIQVJTX1RFTVAgPSB7dSfDoSc6ICA5Ny4wNiwgdSfDoCc6ICA5Ny4xMiwgdSfhuqMnOiAgOTcuMTgsIHUnw6MnOiAgOTcuMjQsIHUn4bqhJzogIDk3LjMwLAogICAgICAgICAgICAgICAgIHUnxIMnOiAgOTcuMzMsIHUn4bqvJzogIDk3LjM2LCB1J+G6sSc6ICA5Ny40MiwgdSfhurMnOiAgOTcuNDgsIHUn4bq1JzogIDk3LjU0LCB1J+G6tyc6ICA5Ny42MCwKICAgICAgICAgICAgICAgICB1J8OiJzogIDk3LjYzLCB1J+G6pSc6ICA5Ny42NiwgdSfhuqcnOiAgOTcuNzIsIHUn4bqpJzogIDk3Ljc4LCB1J+G6qyc6ICA5Ny44NCwgdSfhuq0nOiAgOTcuOTAsCiAgICAgICAgICAgICAgICAgdSfEkSc6IDEwMC41MCwKICAgICAgICAgICAgICAgICB1J8OpJzogMTAxLjA5LCB1J8OoJzogMTAxLjE4LCB1J+G6uyc6IDEwMS4yNywgdSfhur0nOiAxMDEuMzYsIHUn4bq5JzogMTAxLjQ1LAogICAgICAgICAgICAgICAgIHUnw6onOiAxMDEuNTAsIHUn4bq/JzogMTAxLjU0LCB1J+G7gSc6IDEwMS42MywgdSfhu4MnOiAxMDEuNzIsIHUn4buFJzogMTAxLjgxLCB1J+G7hyc6IDEwMS45MCwKICAgICAgICAgICAgICAgICB1J8OtJzogMTA1LjE4LCB1J8OsJzogMTA1LjM2LCB1J+G7iSc6IDEwNS41NCwgdSfEqSc6IDEwNS43MiwgdSfhu4snOiAxMDUuOTAsICAgICAgICAgICAKICAgICAgICAgICAgICAgICB1J8OzJzogMTExLjA2LCB1J8OyJzogMTExLjEyLCB1J+G7jyc6IDExMS4xOCwgdSfDtSc6IDExMS4yNCwgdSfhu40nOiAxMTEuMzAsCiAgICAgICAgICAgICAgICAgdSfGoSc6IDExMS4zMywgdSfhu5snOiAxMTEuMzYsIHUn4budJzogMTExLjQyLCB1J+G7nyc6IDExMS40OCwgdSfhu6EnOiAxMTEuNTQsIHUn4bujJzogMTExLjYwLAogICAgICAgICAgICAgICAgIHUnw7QnOiAxMTEuNjMsIHUn4buRJzogMTExLjY2LCB1J+G7kyc6IDExMS43MiwgdSfhu5UnOiAxMTEuNzgsIHUn4buXJzogMTExLjg0LCB1J+G7mSc6IDExMS45MCwKICAgICAgICAgICAgICAgICB1J8O6JzogMTE3LjA5LCB1J8O5JzogMTE3LjE4LCB1J+G7pyc6IDExNy4yNywgdSfFqSc6IDExNy4zNiwgdSfhu6UnOiAxMTcuNDUsCiAgICAgICAgICAgICAgICAgdSfGsCc6IDExNy41MCwgdSfhu6knOiAxMTcuNTQsIHUn4burJzogMTE3LjYzLCB1J+G7rSc6IDExNy43MiwgdSfhu68nOiAxMTcuODEsIHUn4buxJzogMTE3LjkwLAogICAgICAgICAgICAgICAgIHUnw70nOiAxMjEuMTgsIHUn4buzJzogMTIxLjM2LCB1J+G7tyc6IDEyMS41NCwgdSfhu7knOiAxMjEuNzIsIHUn4bu1JzogMTIxLjkwfQpmb3IgYyBpbiBWTl9DSEFSU19URU1QOgogICAgVk5fVU5JX0ZMVFtjXSwgVk5fVU5JX0ZMVFtjLnVwcGVyKCldID0gVk5fQ0hBUlNfVEVNUFtjXSwgVk5fQ0hBUlNfVEVNUFtjXSAtIDMyCgoKIyBGdW5jdGlvbnMKZGVmIE9yZFZuKGMpOgogICAgJ1VwZGF0ZWQgdmVyc2lvbiBvZiBvcmQoKScKICAgIHJldHVybiBWTl9VTklfRkxUW2NdIGlmIGMgaW4gVk5fVU5JX0ZMVCBlbHNlIG9yZChjKQoKZGVmIENvbXBhcmVWbkNocihhLGIpOgogICAgJ0FscGhhYmV0aWNhbCBvcmRlciBvZiBWaWV0bmFtZXNlIGNoYXJhY3RlcnMnCiAgICBhLCBiID0gT3JkVm4oYSksIE9yZFZuKGIpCiAgICBpZiBhID4gYjogcmV0dXJuIDEKICAgIGVsaWYgYSA8IGI6IHJldHVybiAtMQogICAgcmV0dXJuIDAKCmRlZiBDb21wYXJlVm5TdHIoczEsczIpOgogICAgJ0FscGhhYmV0aWNhbCBvcmRlciBvZiBWaWV0bmFtZXNlIHN0cmluZ3MnCiAgICBsZW4xLCBsZW4yID0gbGVuKHMxKSwgbGVuKHMyKQogICAgaV9tYXggPSBsZW4xIGlmIGxlbjEgPCBsZW4yIGVsc2UgbGVuMgogICAgZm9yIGkgaW4gcmFuZ2UoaV9tYXgpOgogICAgICAgIGMgPSBDb21wYXJlVm5DaHIoczFbaV0sczJbaV0pCiAgICAgICAgaWYgYzogcmV0dXJuIGMKICAgIGlmIGxlbjEgPiBpX21heDogcmV0dXJuIDEKICAgIGlmIGxlbjIgPiBpX21heDogcmV0dXJuIC0xCiAgICByZXR1cm4gMAoKZGVmIFZuMkFzY2lpKHMpOgogICAgJ0NvbnZlcnQgVmlldG5hbWVzZSBzdHJpbmcgd2l0aCBtYXJrcyB0byBWaWV0bmFtZXNlIHN0cmluZyB3aXRoIG5vIG1hcmtzJwogICAgcmV0dXJuICcnLmpvaW4oIFsoY2hyKGludChWTl9VTklfRkxUW2NdKSkgaWYgYyBpbiBWTl9VTklfRkxUIGVsc2UgYykKICAgICAgICAgICAgICAgICAgICAgZm9yIGMgaW4gc10gKQoKZGVmIFVuaTJBc2NpaShzKToKICAgICdDb252ZXJ0IFZpZXRuYW1lc2Ugc3RyaW5nIHdpdGggbWFya3MgdG8gVmlldG5hbWVzZSBzdHJpbmcgd2l0aCBubyBtYXJrcycKICAgIHMgPSBbYyBmb3IgYyBpbiBzXQogICAgZm9yIGkgaW4geHJhbmdlKGxlbihzKSk6CiAgICAgICAgbyA9IGludChPcmRWbihzW2ldKSkKICAgICAgICBzW2ldID0gJz8nIGlmIG8gPiAxMjcgZWxzZSBjaHIobykKICAgIHJldHVybiAnJy5qb2luKHMpCg==