fork(2) download
  1. #include <cassert>
  2. #include <cwctype>
  3. #include <cstdlib>
  4. #include <iomanip>
  5. #include <iostream>
  6. #include <locale>
  7. #include <string>
  8.  
  9. #if _WIN32 || _WIN64
  10. // Windows needs a little non-standard magic for this to work.
  11. #include <io.h>
  12. #include <fcntl.h>
  13. #include <locale.h>
  14. #endif
  15.  
  16. using std::size_t;
  17.  
  18. void init_locale(void)
  19. // Does magic so that wcout can work.
  20. {
  21. #if _WIN32 || _WIN64
  22. // Windows needs a little non-standard magic.
  23. constexpr char cp_utf16le[] = ".1200";
  24. setlocale( LC_ALL, cp_utf16le );
  25. _setmode( _fileno(stdout), _O_U16TEXT );
  26. #else
  27. // The correct locale name may vary by OS, e.g., "en_US.utf8".
  28. constexpr char locale_name[] = "";
  29. std::locale::global(std::locale(locale_name));
  30. std::wcout.imbue(std::locale());
  31. #endif
  32. }
  33.  
  34. std::u16string make_u16string( const std::wstring& ws )
  35. /* Creates a UTF-16 string from a wide-character string. Any wide characters
  36.  * outside the allowed range of UTF-16 are mapped to the sentinel value U+FFFD,
  37.  * per the Unicode documentation. (http://w...content-available-to-author-only...e.org/faq/private_use.html
  38.  * retrieved 12 March 2017.) Unpaired surrogates in ws are also converted to
  39.  * sentinel values. Noncharacters, however, are left intact. As a fallback,
  40.  * if wide characters are the same size as char16_t, this does a more trivial
  41.  * construction using that implicit conversion.
  42.  */
  43. {
  44. /* We assume that, if this test passes, a wide-character string is already
  45.   * UTF-16, or at least converts to it implicitly without needing surrogate
  46.   * pairs.
  47.   */
  48. if ( sizeof(wchar_t) == sizeof(char16_t) ) {
  49. return std::u16string( ws.begin(), ws.end() );
  50. } else {
  51. /* The conversion from UTF-32 to UTF-16 might possibly require surrogates.
  52.   * A surrogate pair suffices to represent all wide characters, because all
  53.   * characters outside the range will be mapped to the sentinel value
  54.   * U+FFFD. Add one character for the terminating NUL.
  55.   */
  56. const size_t max_len = 2 * ws.length() + 1;
  57. // Our temporary UTF-16 string.
  58. std::u16string result;
  59.  
  60. result.reserve(max_len);
  61.  
  62. for ( const wchar_t& wc : ws ) {
  63. const std::wint_t chr = wc;
  64.  
  65. if ( chr < 0 || chr > 0x10FFFF || (chr >= 0xD800 && chr <= 0xDFFF) ) {
  66. // Invalid code point. Replace with sentinel, per Unicode standard:
  67. constexpr char16_t sentinel = u'\uFFFD';
  68. result.push_back(sentinel);
  69. } else if ( chr < 0x10000UL ) { // In the BMP.
  70. result.push_back(static_cast<char16_t>(wc));
  71. } else {
  72. const char16_t leading = static_cast<char16_t>(
  73. ((chr-0x10000UL) / 0x400U) + 0xD800U );
  74. const char16_t trailing = static_cast<char16_t>(
  75. ((chr-0x10000UL) % 0x400U) + 0xDC00U );
  76.  
  77. result.append({leading, trailing});
  78. } // end if
  79. } // end for
  80.  
  81. /* The returned string is shrunken to fit, which might not be the Right
  82.   * Thing if there is more to be added to the string.
  83.   */
  84. result.shrink_to_fit();
  85.  
  86. // We depend here on the compiler to optimize the move constructor.
  87. return result;
  88. } // end if
  89. // Not reached.
  90. }
  91.  
  92. int main(void)
  93. {
  94. static const std::wstring wtest(L"☪☮∈✡℩☯✝ \U0001F644");
  95. static const std::u16string u16test(u"☪☮∈✡℩☯✝ \U0001F644");
  96. const std::u16string converted = make_u16string(wtest);
  97.  
  98. init_locale();
  99.  
  100. std::wcout << L"sizeof(wchar_t) == " << sizeof(wchar_t) << L".\n";
  101.  
  102. for( size_t i = 0; i <= u16test.length(); ++i ) {
  103. if ( u16test[i] != converted[i] ) {
  104. std::wcout << std::hex << std::showbase
  105. << std::right << std::setfill(L'0')
  106. << std::setw(4) << (unsigned)converted[i] << L" ≠ "
  107. << std::setw(4) << (unsigned)u16test[i] << L" at "
  108. << i << L'.' << std::endl;
  109. return EXIT_FAILURE;
  110. } // end if
  111. } // end for
  112.  
  113. std::wcout << wtest << std::endl;
  114.  
  115. return EXIT_SUCCESS;
  116. }
Success #stdin #stdout 0s 16920KB
stdin
Standard input is empty
stdout
sizeof(wchar_t) == 4.
☪☮∈✡℩☯✝ 🙄