# coding: utf-8
from __future__ import print_function

import io
import unicodedata
from pprint import PrettyPrinter

NON_PRINTABLE_CATEGORIES = set(('Cc', 'Cf', 'Cs', 'Co', 'Cn', 'Zl', 'Zp', 'Zs'))


def escape_non_printable_unicode(ustring):
    assert isinstance(ustring, unicode)

    def is_printable(uchar):
        return unicodedata.category(uchar) not in NON_PRINTABLE_CATEGORIES

    buf = []
    for uchar in ustring:
        if uchar in (u'\\', u'\''):
            buf.append(u'\\' + uchar)
        elif uchar == u' ' or is_printable(uchar):
            buf.append(uchar)
        else:
            buf.append(uchar.encode('unicode_escape').decode('ascii'))

    return u''.join(buf)


class BytesFriendlyStringIO(io.StringIO):

    def write(self, s):
        if isinstance(s, str):
            s = s.decode('ascii')
        return super(BytesFriendlyStringIO, self).write(s)


class UnicodePrittyPrinter(PrettyPrinter):

    def pformat(self, object):
        sio = BytesFriendlyStringIO()
        self._format(object, sio, 0, 0, {}, 0)
        return sio.getvalue()  # Returns unicode not str

    def format(self, object, context, maxlevels, level):
        if isinstance(object, unicode):
            return (u"u'{0}'".format(escape_non_printable_unicode(object)), True, False)
        return PrettyPrinter.format(self, object, context, maxlevels, level)

unicode_printer = UnicodePrittyPrinter()


if __name__ == '__main__':
    import sys, codecs
    sys.stdout = codecs.getwriter("utf-8")(sys.stdout)

    test_string = u'あいうえお\tかきくけこ\nさしすせそ　たちつてと Catal\xe0\x01 \\\'"'
    print(test_string)
    print(repr(test_string))
    print(unicode_printer.pformat(test_string))