fork(10) download
  1. #include <iostream>
  2. #include <sstream>
  3. #include <string>
  4. #include <iomanip>
  5.  
  6. size_t nextUtf8CodepointLen(const char* data)
  7. {
  8. unsigned char ch = static_cast<unsigned char>(*data);
  9.  
  10. if ((ch & 0x80) == 0) {
  11. return 1;
  12. }
  13.  
  14. if ((ch & 0xE0) == 0xC0) {
  15. return 2;
  16. }
  17.  
  18. if ((ch & 0xF0) == 0xE0) {
  19. return 3;
  20. }
  21.  
  22. if ((ch & 0xF8) == 0xF0) {
  23. return 4;
  24. }
  25.  
  26. return 0;
  27. }
  28.  
  29. unsigned nextUtf8Codepoint(const char* &data, size_t &data_size)
  30. {
  31. if (data_size == 0) return -1;
  32.  
  33. unsigned char ch = static_cast<unsigned char>(*data);
  34. size_t len = nextUtf8CodepointLen(data);
  35.  
  36. ++data;
  37. --data_size;
  38.  
  39. if (len < 2) {
  40. return (len == 1) ? static_cast<unsigned>(ch) : 0xFFFD;
  41. }
  42.  
  43. --len;
  44.  
  45. unsigned cp;
  46.  
  47. if (len == 1) {
  48. cp = ch & 0x1F;
  49. }
  50. else if (len == 2) {
  51. cp = ch & 0x0F;
  52. }
  53. else {
  54. cp = ch & 0x07;
  55. }
  56.  
  57. if (len > data_size) {
  58. data += data_size;
  59. data_size = 0;
  60. return 0xFFFD;
  61. }
  62.  
  63. for(size_t j = 0; j < len; ++j) {
  64. ch = static_cast<unsigned char>(data[j]);
  65.  
  66. if ((ch & 0xC0) != 0x80) {
  67. cp = 0xFFFD;
  68. break;
  69. }
  70.  
  71. cp = (cp << 6) | (ch & 0x3F);
  72. }
  73.  
  74. data += len;
  75. data_size -= len;
  76.  
  77. return cp;
  78. }
  79.  
  80. struct Helper {
  81. static std::string ToHex(const std::string &input);
  82. };
  83.  
  84. std::string Helper::ToHex(const std::string &input) {
  85. const char *data = input.c_str();
  86. size_t data_size = input.size();
  87.  
  88. std::ostringstream oss;
  89. unsigned cp;
  90.  
  91. while ((cp = nextUtf8Codepoint(data, data_size)) != -1) {
  92. if (cp > 0xFF) {
  93. cp = static_cast<unsigned>('?');
  94. }
  95. oss << std::hex << std::setw(2) << std::setfill('0') << cp;
  96. }
  97.  
  98. return oss.str();
  99. }
  100.  
  101. int main() {
  102. std::cout << Helper::ToHex(u8"É");
  103. return 0;
  104. }
Success #stdin #stdout 0s 5464KB
stdin
Standard input is empty
stdout
c9