#include <cstdio>
#include <cstring>
using namespace std;
void print_as16(unsigned utf32) {
if ( utf32 < 0x10000 )
printf("\\u%04X", utf32);
else {
unsigned wwwww = (utf32 >> 16) - 1;
unsigned x1 = (utf32 >> 10) & 0x3F;
unsigned x2 = (utf32 >> 0) & 0x03FF;
unsigned utf16_1 = 0xD800 | (wwwww << 6) | x1;
unsigned utf16_2 = 0xDC00 | x2;
printf("\\u%02X\\u", utf16_1, utf16_1);
}
}
int main() {
char const *p_org="\xE3\x81\xBE\xE3\x82\x93\xE3\x81\x93z"; // UTF-8文字列。
for ( auto i = 0; i < strlen(p_org); i++ ) {
if ( (p_org[i] & 0x80) == 0 )
printf("%c", p_org[i]);
else if ( (p_org[i] & 0xE0) == 0xC0 ) {
unsigned yyyyy = p_org[i] & 0x1F;
unsigned xxxxxx = p_org[++i] & 0x3F;
unsigned utf32 = (yyyyy<<6) | xxxxxx;
print_as16(utf32);
}
else if ( (p_org[i] & 0xF0) == 0xE0 ) {
unsigned zzzz = p_org[i] & 0x0F;
unsigned yyyyyy = p_org[++i] & 0x3F;
unsigned xxxxxx = p_org[++i] & 0x3F;
unsigned utf32 = (zzzz<<12) | (yyyyyy<<6) | xxxxxx;
print_as16(utf32);
}
else if ( (p_org[i] & 0xF0) == 0xF0 ) ; // 4バイトは省略
}
getchar();
}