fork download
  1. enum {
  2. kMaxSize = 110000000,
  3. kMinSize = 1000000,
  4. kRadix = 1500,
  5. kMaxCounters = (kMaxSize + kRadix - 1) / kRadix,
  6. };
  7.  
  8. struct IndexValue
  9. {
  10. unsigned index;
  11. unsigned value;
  12. };
  13.  
  14. unsigned g_input[kMaxSize];
  15. unsigned g_index[kMaxSize];
  16. struct IndexValue g_sort[kMaxSize];
  17. unsigned g_counters[kMaxCounters];
  18. unsigned g_output[kMaxSize];
  19.  
  20. void reorder2(const unsigned size)
  21. {
  22. const unsigned min_bucket = size / kRadix;
  23. const unsigned large_buckets = size % kRadix;
  24. g_counters[0] = 0;
  25.  
  26. for (unsigned i = 1; i <= large_buckets; ++i)
  27. g_counters[i] = g_counters[i - 1] + min_bucket + 1;
  28.  
  29. for (unsigned i = large_buckets + 1; i < kRadix; ++i)
  30. g_counters[i] = g_counters[i - 1] + min_bucket;
  31.  
  32. for (unsigned i = 0; i < size; ++i)
  33. {
  34. const unsigned dst = g_counters[g_index[i] % kRadix]++;
  35. g_sort[dst].index = g_index[i] / kRadix;
  36. g_sort[dst].value = g_input[i];
  37. __builtin_prefetch(&g_sort[dst + 1].value, 1);
  38. }
  39.  
  40. g_counters[0] = 0;
  41.  
  42. for (unsigned i = 1; i < (size + kRadix - 1) / kRadix; ++i)
  43. g_counters[i] = g_counters[i - 1] + kRadix;
  44.  
  45. for (unsigned i = 0; i < size; ++i)
  46. {
  47. const unsigned dst = g_counters[g_sort[i].index]++;
  48. g_output[dst] = g_sort[i].value;
  49. __builtin_prefetch(&g_output[dst + 1], 1);
  50. }
  51. }
  52.  
  53. void reorder1(const unsigned size)
  54. {
  55. for (unsigned i = 0; i < size; ++i)
  56. g_output[g_index[i]] = g_input[i];
  57. }
  58.  
  59. #include <iostream>
  60. #include <random>
  61. #include <chrono>
  62. #include <algorithm>
  63. #include <numeric>
  64. using namespace std;
  65.  
  66. int main()
  67. {
  68. mt19937_64 rng(random_device{}());
  69.  
  70. for (unsigned size = kMaxSize; size > kMinSize; size -= size / 4)
  71. //for (unsigned size = 64*1024*kRadix; size > kMinSize; size -= size / 2)
  72. {
  73. iota(begin(g_index), begin(g_index) + size, 0);
  74. shuffle(begin(g_index), begin(g_index) + size, rng);
  75. copy(begin(g_index), begin(g_index) + size, begin(g_input));
  76.  
  77. const auto start = chrono::steady_clock::now();
  78. reorder2(size);
  79. const chrono::nanoseconds time = chrono::steady_clock::now() - start;
  80.  
  81. const auto b = begin(g_output);
  82. const auto e = begin(g_output) + size;
  83.  
  84. bool ok = adjacent_find(b, e, [&](auto l, auto r){return l + 1 != r;})
  85. == e;
  86.  
  87. cout << size << ' ' << time.count() / double(size)
  88. << ' ' << boolalpha << ok << '\n';
  89. }
  90. }
Runtime error #stdin #stdout 0s 148KB
stdin
Standard input is empty
stdout
Standard output is empty