import java.io.ByteArrayOutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.Scanner;
/*
プログラミングのお題スレ Part12
ttps://mevius.5ch.net/test/read.cgi/tech/1538096947/576
576 名前:デフォルトの名無しさん[sage] 投稿日:2018/11/18(日) 22:42:22.16 ID:qz83zcM7
お題
UTF-8のバイト列が与えられるので、Unicodeのコードポイントを求めよ
6F 64 61 69
=> U+006F U+0064 U+0061 U+0069
E3 81 8A E9 A1 8C
=> U+304A U+984C
C2 A9 F0 9F 8D 94 E9 A6 99 41
=> U+00A9 U+1F354 U+9999 U+0041
*/
class Ideone
{
static final int REPLACEMENT_CHARACTER = 0xFFFD;
public static void main
(String[] args
) {
printUnicodeCodePoint(toByteArray("6F 64 61 69"));
printUnicodeCodePoint(toByteArray("E3 81 8A E9 A1 8C"));
printUnicodeCodePoint(toByteArray("C2 A9 F0 9F 8D 94 E9 A6 99 41"));
printUnicodeCodePoint2(toByteArray("6F 64 61 69"));
printUnicodeCodePoint2(toByteArray("E3 81 8A E9 A1 8C"));
printUnicodeCodePoint2(toByteArray("C2 A9 F0 9F 8D 94 E9 A6 99 41"));
}
static byte[] toByteArray
(String s
) {
Scanner in = new Scanner(s);
while (in.hasNextInt(16))
{
baos.write(in.nextInt(16));
}
in.close();
return baos.toByteArray();
}
// 言語任せ
static void printUnicodeCodePoint(byte[] bs)
{
for (byte b : bs)
{
System.
out.
printf("%02X ", b
& 0xFF
); }
str.
codePoints().
forEach(i
-> System.
out.
printf("U+%04X ", i
));
}
// 自力
static void printUnicodeCodePoint2(byte[] bs)
{
for (byte b : bs)
{
System.
out.
printf("%02X ", b
& 0xFF
); }
ByteBuffer buf = ByteBuffer.wrap(bs);
while (buf.hasRemaining())
{
System.
out.
printf("U+%04X ", utf8decode
(buf
)); }
}
// ByteBufferから1文字デコードする
static int utf8decode(ByteBuffer buf)
{
byte b1 = buf.get();
if (b1 >= 0) return b1 & 0b01111111;
if (b1 <= -65) return REPLACEMENT_CHARACTER;
byte b2 = buf.get();
if (!test(b2))
{
buf.position(buf.position() - 1);
return REPLACEMENT_CHARACTER;
}
if (b1 <= -33) return (b1 & 0b00011111) << 6 | (b2 & 0b00111111);
byte b3 = buf.get();
if (!test(b3))
{
buf.position(buf.position() - 1);
return REPLACEMENT_CHARACTER;
}
if (b1 <= -17) return (b1 & 0b00001111) << 12 | (b2 & 0b00111111) << 6 | (b3 & 0b00111111);
byte b4 = buf.get();
if (!test(b4))
{
buf.position(buf.position() - 1);
return REPLACEMENT_CHARACTER;
}
if (b1 <= -9) return (b1 & 0b00000111) << 18 | (b2 & 0b00111111) << 12 | (b3 & 0b00111111) << 6 | (b4 & 0b00111111);
byte b5 = buf.get();
if (!test(b5))
{
buf.position(buf.position() - 1);
return REPLACEMENT_CHARACTER;
}
if (b1 <= -5) return REPLACEMENT_CHARACTER;
byte b6 = buf.get();
if (!test(b6))
{
buf.position(buf.position() - 1);
return REPLACEMENT_CHARACTER;
}
return REPLACEMENT_CHARACTER;
}
static boolean test(byte b)
{
return (b & 0b11000000) == 0b10000000;
}
}