fork(3) download
  1. template <typename T>
  2. T reduce2(T v) {
  3. /* pre: ((short*)&v)[i] < 100 for all i
  4. * post:
  5. * ((char*)&v)[2i] = ((short*)&v)[i] / 10
  6. * ((char*)&v)[2i + 1] = ((short*)&v)[i] % 10
  7. *
  8. * That is, we split each short in v into its ones and tens digits
  9. */
  10.  
  11. /* t < 100 --> (t * 410) >> 12 == t / 10
  12. * && (t * 410) < 0x10000
  13. *
  14. * For the least order short that's all we need, for the others the
  15. * shift doesn't drop the remainder so we mask those out
  16. */
  17. T k = ((v * 410) >> 12) & 0x000F000F000F000Full;
  18.  
  19. /*
  20. * Then just subtract out the tens digit to get the ones digit and
  21. * shift them into the right place
  22. */
  23. return (((v - k * 10) << 8) + k);
  24. }
  25.  
  26. template <typename T>
  27. T reduce4(T v) {
  28. /* pre: ((unsigned*)&v)[i] < 10000 for all i
  29. *
  30. * preReduce2:
  31. * ((short*)&v)[2i] = ((unsigned*)&v)[i] / 100
  32. * ((short*)&v)[2i + 1] = ((unsigned*)&v)[i] % 100
  33. *
  34. * That is, we split each int in v into its one/ten and hundred/thousand
  35. * digit pairs. Put them into the corresponding short positions and then
  36. * call reduce2 to finish the splitting
  37. */
  38.  
  39. /* This is basically the same as reduce2 with different constants
  40. */
  41. T k = ((v * 10486) >> 20) & 0xFF000000FFull;
  42. return reduce2(((v - k * 100) << 16) + (k));
  43. }
  44.  
  45. typedef unsigned long long ull;
  46. inline ull reduce8(ull v) {
  47. /* pre: v < 100000000
  48. *
  49. * preReduce4:
  50. * ((unsigned*)&v)[0] = v / 10000
  51. * ((unsigned*)&v)[1] = v % 10000
  52. *
  53. * This should be familiar now, split v into the two 4-digit segments,
  54. * put them in the right place, and let reduce4 continue the splitting
  55. */
  56.  
  57. /* Again, use the same method as reduce4 and reduce2 with correctly tailored constants
  58. */
  59. ull k = ((v * 3518437209u) >> 45);
  60. return reduce4(((v - k * 10000) << 32) + (k));
  61. }
  62.  
  63. template <typename T>
  64. std::string itostr(T o) {
  65. /*
  66. * Use of this union is not strictly compliant, but, really,
  67. * who cares? This is just for fun :)
  68. *
  69. * Our ones digit will be in str[15]
  70. *
  71. * We don't actually need the first 6 bytes, but w/e
  72. */
  73. union {
  74. char str[16];
  75. unsigned short u2[8];
  76. unsigned u4[4];
  77. unsigned long long u8[2];
  78. };
  79.  
  80. /* Technically should be "... ? unsigned(~0) + 1 ..." to ensure correct behavior I think */
  81. /* Tends to compile to: v = (o ^ (o >> 31)) - (o >> 31); */
  82. unsigned v = o < 0 ? ~o + 1 : o;
  83.  
  84. /* We want u2[3] = v / 100000000 ... that is, the first 2 bytes of the decimal rep */
  85.  
  86. /* This is the same as in reduce8, that is divide by 10000. So u8[0] = v / 10000 */
  87. u8[0] = (ull(v) * 3518437209u) >> 45;
  88.  
  89. /* Now we want u2[3] = u8[0] / 10000.
  90. * If we added " >> 48 " to the end of the calculation below we would get u8[0] = u8[0] / 10000
  91. * Note though that in little endian byte ordering u2[3] is the top 2 bytes of u8[0]
  92. * and 64 - 16 == 48... Aha, We've got what we want, the rest of u8[0] is junk, but we don't care
  93. */
  94. u8[0] = (u8[0] * 28147497672ull);
  95.  
  96. /* Then just subtract out those digits from v so that u8[1] now holds
  97. * the low 8 decimal digits of v
  98. */
  99. u8[1] = v - u2[3] * 100000000;
  100.  
  101. /* Split u8[1] into its 8 decimal digits */
  102. u8[1] = reduce8(u8[1]);
  103.  
  104. /* f will point to the high order non-zero char */
  105. char* f;
  106.  
  107. /* branch post: f is at the correct short (but not necessarily the correct byte) */
  108. if (u2[3]) {
  109. /* split the top two digits into their respective chars */
  110. u2[3] = reduce2(u2[3]);
  111. f = str + 6;
  112. } else {
  113. /* a sort of binary search on first non-zero digit */
  114. unsigned short* k = u4[2] ? u2 + 4 : u2 + 6;
  115. f = *k ? (char*)k : (char*)(k + 1);
  116. }
  117. /* update f to its final position */
  118. if (!*f) f++;
  119.  
  120. /* '0' == 0x30 and i < 10 --> i <= 0xF ... that is, i | 0x30 = 'i' *
  121. * Note that we could do u8[0] |= ... u8[1] |= ... but the corresponding
  122. * x86-64 operation cannot use a 64 bit immediate value whereas the
  123. * 32 bit 'or' can use a 32 bit immediate.
  124. */
  125. u4[1] |= 0x30303030;
  126. u4[2] |= 0x30303030;
  127. u4[3] |= 0x30303030;
  128.  
  129. /* Add the negative sign... note that o is just the original parameter passed */
  130. if (o < 0) *--f = '-';
  131.  
  132. /* gcc basically forwards this to std::string(f, str + 16)
  133. * but msvc handles it way more efficiently
  134. */
  135. return std::string(f, (str + 16) - f);
  136. }
Not running #stdin #stdout 0s 0KB
stdin
Standard input is empty
stdout
Standard output is empty