# coding: utf-8
from __future__ import print_function
import io
import unicodedata
from pprint import PrettyPrinter
NON_PRINTABLE_CATEGORIES = set(('Cc', 'Cf', 'Cs', 'Co', 'Cn', 'Zl', 'Zp', 'Zs'))
def escape_non_printable_unicode(ustring):
assert isinstance(ustring, unicode)
def is_printable(uchar):
return unicodedata.category(uchar) not in NON_PRINTABLE_CATEGORIES
buf = []
for uchar in ustring:
if uchar in (u'\\', u'\''):
buf.append(u'\\' + uchar)
elif uchar == u' ' or is_printable(uchar):
buf.append(uchar)
else:
buf.append(uchar.encode('unicode_escape').decode('ascii'))
return u''.join(buf)
class BytesFriendlyStringIO(io.StringIO):
def write(self, s):
if isinstance(s, str):
s = s.decode('ascii')
return super(BytesFriendlyStringIO, self).write(s)
class UnicodePrittyPrinter(PrettyPrinter):
def pformat(self, object):
sio = BytesFriendlyStringIO()
self._format(object, sio, 0, 0, {}, 0)
return sio.getvalue() # Returns unicode not str
def format(self, object, context, maxlevels, level):
if isinstance(object, unicode):
return (u"u'{0}'".format(escape_non_printable_unicode(object)), True, False)
return PrettyPrinter.format(self, object, context, maxlevels, level)
unicode_printer = UnicodePrittyPrinter()
if __name__ == '__main__':
import sys, codecs
sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
test_string = u'あいうえお\tかきくけこ\nさしすせそ たちつてと Catal\xe0\x01 \\\'"'
print(test_string)
print(repr(test_string))
print(unicode_printer.pformat(test_string))
IyBjb2Rpbmc6IHV0Zi04CmZyb20gX19mdXR1cmVfXyBpbXBvcnQgcHJpbnRfZnVuY3Rpb24KCmltcG9ydCBpbwppbXBvcnQgdW5pY29kZWRhdGEKZnJvbSBwcHJpbnQgaW1wb3J0IFByZXR0eVByaW50ZXIKCk5PTl9QUklOVEFCTEVfQ0FURUdPUklFUyA9IHNldCgoJ0NjJywgJ0NmJywgJ0NzJywgJ0NvJywgJ0NuJywgJ1psJywgJ1pwJywgJ1pzJykpCgoKZGVmIGVzY2FwZV9ub25fcHJpbnRhYmxlX3VuaWNvZGUodXN0cmluZyk6CiAgICBhc3NlcnQgaXNpbnN0YW5jZSh1c3RyaW5nLCB1bmljb2RlKQoKICAgIGRlZiBpc19wcmludGFibGUodWNoYXIpOgogICAgICAgIHJldHVybiB1bmljb2RlZGF0YS5jYXRlZ29yeSh1Y2hhcikgbm90IGluIE5PTl9QUklOVEFCTEVfQ0FURUdPUklFUwoKICAgIGJ1ZiA9IFtdCiAgICBmb3IgdWNoYXIgaW4gdXN0cmluZzoKICAgICAgICBpZiB1Y2hhciBpbiAodSdcXCcsIHUnXCcnKToKICAgICAgICAgICAgYnVmLmFwcGVuZCh1J1xcJyArIHVjaGFyKQogICAgICAgIGVsaWYgdWNoYXIgPT0gdScgJyBvciBpc19wcmludGFibGUodWNoYXIpOgogICAgICAgICAgICBidWYuYXBwZW5kKHVjaGFyKQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIGJ1Zi5hcHBlbmQodWNoYXIuZW5jb2RlKCd1bmljb2RlX2VzY2FwZScpLmRlY29kZSgnYXNjaWknKSkKCiAgICByZXR1cm4gdScnLmpvaW4oYnVmKQoKCmNsYXNzIEJ5dGVzRnJpZW5kbHlTdHJpbmdJTyhpby5TdHJpbmdJTyk6CgogICAgZGVmIHdyaXRlKHNlbGYsIHMpOgogICAgICAgIGlmIGlzaW5zdGFuY2Uocywgc3RyKToKICAgICAgICAgICAgcyA9IHMuZGVjb2RlKCdhc2NpaScpCiAgICAgICAgcmV0dXJuIHN1cGVyKEJ5dGVzRnJpZW5kbHlTdHJpbmdJTywgc2VsZikud3JpdGUocykKCgpjbGFzcyBVbmljb2RlUHJpdHR5UHJpbnRlcihQcmV0dHlQcmludGVyKToKCiAgICBkZWYgcGZvcm1hdChzZWxmLCBvYmplY3QpOgogICAgICAgIHNpbyA9IEJ5dGVzRnJpZW5kbHlTdHJpbmdJTygpCiAgICAgICAgc2VsZi5fZm9ybWF0KG9iamVjdCwgc2lvLCAwLCAwLCB7fSwgMCkKICAgICAgICByZXR1cm4gc2lvLmdldHZhbHVlKCkgICMgUmV0dXJucyB1bmljb2RlIG5vdCBzdHIKCiAgICBkZWYgZm9ybWF0KHNlbGYsIG9iamVjdCwgY29udGV4dCwgbWF4bGV2ZWxzLCBsZXZlbCk6CiAgICAgICAgaWYgaXNpbnN0YW5jZShvYmplY3QsIHVuaWNvZGUpOgogICAgICAgICAgICByZXR1cm4gKHUidSd7MH0nIi5mb3JtYXQoZXNjYXBlX25vbl9wcmludGFibGVfdW5pY29kZShvYmplY3QpKSwgVHJ1ZSwgRmFsc2UpCiAgICAgICAgcmV0dXJuIFByZXR0eVByaW50ZXIuZm9ybWF0KHNlbGYsIG9iamVjdCwgY29udGV4dCwgbWF4bGV2ZWxzLCBsZXZlbCkKCnVuaWNvZGVfcHJpbnRlciA9IFVuaWNvZGVQcml0dHlQcmludGVyKCkKCgppZiBfX25hbWVfXyA9PSAnX19tYWluX18nOgogICAgaW1wb3J0IHN5cywgY29kZWNzCiAgICBzeXMuc3Rkb3V0ID0gY29kZWNzLmdldHdyaXRlcigidXRmLTgiKShzeXMuc3Rkb3V0KQoKICAgIHRlc3Rfc3RyaW5nID0gdSfjgYLjgYTjgYbjgYjjgYpcdOOBi+OBjeOBj+OBkeOBk1xu44GV44GX44GZ44Gb44Gd44CA44Gf44Gh44Gk44Gm44GoIENhdGFsXHhlMFx4MDEgXFxcJyInCiAgICBwcmludCh0ZXN0X3N0cmluZykKICAgIHByaW50KHJlcHIodGVzdF9zdHJpbmcpKQogICAgcHJpbnQodW5pY29kZV9wcmludGVyLnBmb3JtYXQodGVzdF9zdHJpbmcpKQ==