# http://stackoverflow.com/q/40122058/5290909
import re
s = "You say: 你好. I say: 再見. 答案, my friend, 在風在吹"
utf_line = s.decode('utf-8')
dict = {"你好" : "hello",
"再見" : "goodbye",
"答案" : "The answer",
"在風在吹" : "is blowing in the wind",
}
def translate(m):
block = m.group().encode('utf-8')
if block in dict:
return dict[ block ]
else:
return "----"
utf_translated = re.sub(ur'[\u4e00-\u9fff]+', translate, utf_line, re.UNICODE)
print utf_translated.encode('utf-8')
IyBodHRwOi8vc3RhY2tvdmVyZmxvdy5jb20vcS80MDEyMjA1OC81MjkwOTA5CmltcG9ydCByZQoKcyA9ICJZb3Ugc2F5OiDkvaDlpb0uIEkgc2F5OiDlho3oposuIOetlOahiCwgbXkgZnJpZW5kLCDlnKjpoqjlnKjlkLkiCnV0Zl9saW5lID0gcy5kZWNvZGUoJ3V0Zi04JykKCmRpY3QgPSB7IuS9oOWlvSIgOiAiaGVsbG8iLAogICAgICAgICLlho3oposiIDogImdvb2RieWUiLAogICAgICAgICLnrZTmoYgiIDogIlRoZSBhbnN3ZXIiLAogICAgICAgICLlnKjpoqjlnKjlkLkiIDogImlzIGJsb3dpbmcgaW4gdGhlIHdpbmQiLAoJICAgfQoKZGVmIHRyYW5zbGF0ZShtKToKCWJsb2NrID0gbS5ncm91cCgpLmVuY29kZSgndXRmLTgnKQoJaWYgYmxvY2sgaW4gZGljdDoKCSAgICByZXR1cm4gZGljdFsgYmxvY2sgXQoJZWxzZToKCQlyZXR1cm4gIi0tLS0iCiAgICAKCnV0Zl90cmFuc2xhdGVkID0gcmUuc3ViKHVyJ1tcdTRlMDAtXHU5ZmZmXSsnLCB0cmFuc2xhdGUsIHV0Zl9saW5lLCByZS5VTklDT0RFKQoKcHJpbnQgdXRmX3RyYW5zbGF0ZWQuZW5jb2RlKCd1dGYtOCcp