fork download
  1. # coding: utf-8
  2. from __future__ import print_function
  3.  
  4. import io
  5. import unicodedata
  6. from pprint import PrettyPrinter
  7.  
  8. NON_PRINTABLE_CATEGORIES = set(('Cc', 'Cf', 'Cs', 'Co', 'Cn', 'Zl', 'Zp', 'Zs'))
  9.  
  10.  
  11. def escape_non_printable_unicode(ustring):
  12. assert isinstance(ustring, unicode)
  13.  
  14. def is_printable(uchar):
  15. return unicodedata.category(uchar) not in NON_PRINTABLE_CATEGORIES
  16.  
  17. buf = []
  18. for uchar in ustring:
  19. if uchar in (u'\\', u'\''):
  20. buf.append(u'\\' + uchar)
  21. elif uchar == u' ' or is_printable(uchar):
  22. buf.append(uchar)
  23. else:
  24. buf.append(uchar.encode('unicode_escape').decode('ascii'))
  25.  
  26. return u''.join(buf)
  27.  
  28.  
  29. class BytesFriendlyStringIO(io.StringIO):
  30.  
  31. def write(self, s):
  32. if isinstance(s, str):
  33. s = s.decode('ascii')
  34. return super(BytesFriendlyStringIO, self).write(s)
  35.  
  36.  
  37. class UnicodePrittyPrinter(PrettyPrinter):
  38.  
  39. def pformat(self, object):
  40. sio = BytesFriendlyStringIO()
  41. self._format(object, sio, 0, 0, {}, 0)
  42. return sio.getvalue() # Returns unicode not str
  43.  
  44. def format(self, object, context, maxlevels, level):
  45. if isinstance(object, unicode):
  46. return (u"u'{0}'".format(escape_non_printable_unicode(object)), True, False)
  47. return PrettyPrinter.format(self, object, context, maxlevels, level)
  48.  
  49. unicode_printer = UnicodePrittyPrinter()
  50.  
  51.  
  52. if __name__ == '__main__':
  53. import sys, codecs
  54. sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
  55.  
  56. test_string = u'あいうえお\tかきくけこ\nさしすせそ たちつてと Catal\xe0\x01 \\\'"'
  57. print(test_string)
  58. print(repr(test_string))
  59. print(unicode_printer.pformat(test_string))
Success #stdin #stdout 0.02s 9016KB
stdin
Standard input is empty
stdout
あいうえお	かきくけこ
さしすせそ たちつてと Català \'"
u'\u3042\u3044\u3046\u3048\u304a\t\u304b\u304d\u304f\u3051\u3053\n\u3055\u3057\u3059\u305b\u305d\u3000\u305f\u3061\u3064\u3066\u3068 Catal\xe0\x01 \\\'"'
u'あいうえお\tかきくけこ\nさしすせそ\u3000たちつてと Català\x01 \\\'"'