fork download
  1. #pragma GCC optimize("O3")
  2. #include <bits/stdc++.h>
  3.  
  4. using namespace std;
  5.  
  6. typedef unsigned long long ull;
  7. struct timeit {
  8. decltype(chrono::high_resolution_clock::now()) begin;
  9. const string label;
  10. timeit(string label = "???") : label(label) { begin = chrono::high_resolution_clock::now(); }
  11. ~timeit() {
  12. auto end = chrono::high_resolution_clock::now();
  13. auto duration = chrono::duration_cast<chrono::milliseconds>(end - begin).count();
  14. cerr << duration << "ms elapsed [" << label << "]" << endl;
  15. }
  16. };
  17. mt19937 uni(time(0));
  18. namespace sixtyfournonasm {
  19. struct H {
  20. ull x;
  21. H(ull x = 0) : x(x) {}
  22. ull get() const { return x + !~x; }
  23. H operator+(H o) { return x + o.x + (x + o.x < x); }
  24. H operator*(H o) {
  25. ull a = x >> 32, b = x & -1u, c = o.x >> 32, d = o.x & -1u;
  26. ull y = (H(a * d) + H(b * c)).get();
  27. return H(a * c) + b * d + ((y & -1u) << 32 | y >> 32);
  28. }
  29. H operator-(H o) { return *this + ~o.x; }
  30. bool operator==(H o) const { return get() == o.get(); }
  31. bool operator<(H o) const { return get() < o.get(); }
  32. };
  33. } // namespace sixtyfournonasm
  34.  
  35. namespace sixtyfourasm {
  36. struct H {
  37. ull x;
  38. H(ull x = 0) : x(x) {}
  39. ull get() const { return x + !~x; }
  40. H operator+(H o) { return x + o.x + (x + o.x < x); }
  41. H operator*(H o) {
  42. ull r = x;
  43. asm("mul %1\n addq %%rdx, %0\n adcq $0,%0" : "+a"(r) : "r"(o.x) : "rdx");
  44. return r;
  45. }
  46. H operator-(H o) { return *this + ~o.x; }
  47. bool operator==(H o) const { return get() == o.get(); }
  48. bool operator<(H o) const { return get() < o.get(); }
  49. };
  50. } // namespace sixtyfourasm
  51.  
  52. namespace dacinsixtyone {
  53. struct H {
  54. ull x;
  55. H(ull x = 0) : x(x) {}
  56. ull get() const { return x + !~x; }
  57. const static ull M = (1ull << 61) - 1;
  58. H operator+(H o) {
  59. o.x += x + 1;
  60. o.x = (o.x & M) + (o.x >> 61);
  61. return o.x - 1;
  62. }
  63. H operator*(H o) {
  64. ull l1 = x & -1u, h1 = x >> 32, l2 = o.x & -1u, h2 = o.x >> 32;
  65. ull l = l1 * l2, m = l1 * h2 + l2 * h1, h = h1 * h2;
  66. ull ret = (l & M) + (l >> 61) + (h << 3) + (m >> 29) + (m << 35 >> 3) + 1;
  67. ret = (ret & M) + (ret >> 61);
  68. ret = (ret & M) + (ret >> 61);
  69. return ret - 1;
  70. }
  71. H operator-(H o) { return *this + ~o.x; }
  72. bool operator==(H o) const { return get() == o.get(); }
  73. bool operator<(H o) const { return get() < o.get(); }
  74. };
  75. } // namespace dacinsixtyone
  76. namespace mod1e9 {
  77. typedef long long ll;
  78. struct H {
  79. ll x;
  80. H(ll x = 0) : x(x) {}
  81. ll get() const { return x; }
  82. const static ll M = 1e9 + 7;
  83. H operator+(H o) { return o.x + x > M ? o.x + x - M : o.x + x; }
  84. H operator*(H o) { return x * o.x % M; }
  85. H operator-(H o) { return *this - o.x < 0 ? *this + M - o.x : *this - o.x; }
  86. bool operator==(H o) const { return get() == o.get(); }
  87. bool operator<(H o) const { return get() < o.get(); }
  88. };
  89. } // namespace mod1e9
  90.  
  91. namespace extensible {
  92. template <class h> struct H : array<h, 2> {
  93. H g() { return *this; }
  94. H() { *this = {0, 0}; }
  95. H(h a) { *this = {a, a}; }
  96. H(h a, h b) { (*this)[0] = a, (*this)[1] = b; }
  97. #define op(o) \
  98.   H operator o(H a) { return {g()[0] o a[0], g()[1] o a[1]}; }
  99. op(+) op(-) op(*);
  100. H operator+(int a) { return g() + H(a); }
  101. };
  102. } // namespace extensible
  103. const int MAXITERS = 5e7;
  104. template <class H>
  105. void benchSeq(
  106. string name, function<void(H)> f = [](H x) { cout << x.get() << endl; }) {
  107. timeit x(name + " seq");
  108. H h = H(1), v = H(uni());
  109. for (int i = 0; i < MAXITERS; i++) {
  110. h = h * v + 5;
  111. }
  112. f(h);
  113. }
  114. template <class H>
  115. void benchParallel(
  116. string name, function<void(H)> f = [](H x) { cout << x.get() << endl; }) {
  117. const int NUMPAR = 8;
  118. timeit x(name + " parallel");
  119. H v = H(uni());
  120. vector<H> hs(NUMPAR);
  121. for (int i = 0; i < hs.size(); i++)
  122. hs[i] = H(i + 1);
  123. for (int i = 0; i < MAXITERS / NUMPAR; i++) {
  124. for (int j = 0; j < NUMPAR; j++)
  125. hs[j] = hs[j] * v + 5;
  126. }
  127. H sm = H(1);
  128. for (auto j : hs)
  129. sm = sm + j;
  130. f(sm);
  131. }
  132.  
  133. signed main() {
  134. ios::sync_with_stdio(0);
  135. cin.tie(0);
  136. benchSeq<sixtyfournonasm::H>("2^64 non-asm");
  137. benchParallel<sixtyfournonasm::H>("2^64 non-asm");
  138. cerr << endl;
  139.  
  140. benchSeq<sixtyfourasm::H>("2^64 nasm");
  141. benchParallel<sixtyfourasm::H>("2^64 asm");
  142. cerr << endl;
  143.  
  144. benchSeq<ull>("64 overflow", [](auto x) { cout << x << endl; });
  145. benchParallel<ull>("64 overflow", [](auto x) { cout << x << endl; });
  146. cerr << endl;
  147.  
  148. benchSeq<dacinsixtyone::H>("2^61 dacin", [](auto x) { cout << x.get() << endl; });
  149. benchParallel<dacinsixtyone::H>("2^61 dacin", [](auto x) { cout << x.get() << endl; });
  150. cerr << endl;
  151.  
  152. benchSeq<mod1e9::H>("mod1e9", [](auto x) { cout << x.get() << endl; });
  153. benchParallel<mod1e9::H>("mod1e9", [](auto x) { cout << x.get() << endl; });
  154. cerr << endl;
  155.  
  156. benchSeq<extensible::H<sixtyfournonasm::H>>("double 2^64 non-asm", [](auto x) { cout << x[0].get() << endl; });
  157. benchParallel<extensible::H<sixtyfournonasm::H>>("double 2^64 non-asm", [](auto x) { cout << x[0].get() << endl; });
  158. cerr << endl;
  159.  
  160. benchSeq<extensible::H<sixtyfourasm::H>>("double 2^64 asm", [](auto x) { cout << x[0].get() << endl; });
  161. benchParallel<extensible::H<sixtyfourasm::H>>("double 2^64 asm", [](auto x) { cout << x[0].get() << endl; });
  162. cerr << endl;
  163.  
  164. benchSeq<extensible::H<ull>>("double 64 overflow asm", [](auto x) { cout << x[0] << endl; });
  165. benchParallel<extensible::H<ull>>("double 64 overflow asm", [](auto x) { cout << x[0] << endl; });
  166. cerr << endl;
  167.  
  168. benchSeq<extensible::H<mod1e9::H>>("double mod1e9", [](auto x) { cout << x[0].get() << endl; });
  169. benchParallel<extensible::H<mod1e9::H>>("double mod1e9", [](auto x) { cout << x[0].get() << endl; });
  170. cerr << endl;
  171. }
Success #stdin #stdout #stderr 2.72s 15280KB
stdin
Standard input is empty
stdout
1355501998133476376
5955998209830211837
17204860302772174936
501830744618003932
8700261127882000607
11764917994226794533
1490576299144534022
1808792341260402497
304863915
782648368
18414618995664905711
5929995228812464157
4540773920170125841
3984042508109819756
4006604661758824833
9246383853378530853
159611782
965602116
stderr
269ms elapsed [2^64 non-asm seq]
137ms elapsed [2^64 non-asm parallel]

118ms elapsed [2^64 nasm seq]
43ms elapsed [2^64 asm parallel]

51ms elapsed [64 overflow seq]
13ms elapsed [64 overflow parallel]

303ms elapsed [2^61 dacin seq]
143ms elapsed [2^61 dacin parallel]

237ms elapsed [mod1e9 seq]
83ms elapsed [mod1e9 parallel]

269ms elapsed [double 2^64 non-asm seq]
307ms elapsed [double 2^64 non-asm parallel]

136ms elapsed [double 2^64 asm seq]
101ms elapsed [double 2^64 asm parallel]

51ms elapsed [double 64 overflow asm seq]
26ms elapsed [double 64 overflow asm parallel]

237ms elapsed [double mod1e9 seq]
185ms elapsed [double mod1e9 parallel]