#include <iostream>
#include <sstream>
#include <string>
#include <iomanip>
size_t nextUtf8CodepointLen(const char* data)
{
unsigned char ch = static_cast<unsigned char>(*data);
if ((ch & 0x80) == 0) {
return 1;
}
if ((ch & 0xE0) == 0xC0) {
return 2;
}
if ((ch & 0xF0) == 0xE0) {
return 3;
}
if ((ch & 0xF8) == 0xF0) {
return 4;
}
return 0;
}
unsigned nextUtf8Codepoint(const char* &data, size_t &data_size)
{
if (data_size == 0) return -1;
unsigned char ch = static_cast<unsigned char>(*data);
size_t len = nextUtf8CodepointLen(data);
++data;
--data_size;
if (len < 2) {
return (len == 1) ? static_cast<unsigned>(ch) : 0xFFFD;
}
--len;
unsigned cp;
if (len == 1) {
cp = ch & 0x1F;
}
else if (len == 2) {
cp = ch & 0x0F;
}
else {
cp = ch & 0x07;
}
if (len > data_size) {
data += data_size;
data_size = 0;
return 0xFFFD;
}
for(size_t j = 0; j < len; ++j) {
ch = static_cast<unsigned char>(data[j]);
if ((ch & 0xC0) != 0x80) {
cp = 0xFFFD;
break;
}
cp = (cp << 6) | (ch & 0x3F);
}
data += len;
data_size -= len;
return cp;
}
struct Helper {
static std::string ToHex(const std::string &input);
};
std::string Helper::ToHex(const std::string &input) {
const char *data = input.c_str();
size_t data_size = input.size();
std::ostringstream oss;
unsigned cp;
while ((cp = nextUtf8Codepoint(data, data_size)) != -1) {
if (cp > 0xFF) {
cp = static_cast<unsigned>('?');
}
oss << std::hex << std::setw(2) << std::setfill('0') << cp;
}
return oss.str();
}
int main() {
std::cout << Helper::ToHex(u8"É");
return 0;
}