enum {
kMaxSize = 110000000,
kMinSize = 1000000,
kRadix = 1500,
kMaxCounters = (kMaxSize + kRadix - 1) / kRadix,
};
struct IndexValue
{
unsigned index;
unsigned value;
};
unsigned g_input[kMaxSize];
unsigned g_index[kMaxSize];
struct IndexValue g_sort[kMaxSize];
unsigned g_counters[kMaxCounters];
unsigned g_output[kMaxSize];
void reorder2(const unsigned size)
{
const unsigned min_bucket = size / kRadix;
const unsigned large_buckets = size % kRadix;
g_counters[0] = 0;
for (unsigned i = 1; i <= large_buckets; ++i)
g_counters[i] = g_counters[i - 1] + min_bucket + 1;
for (unsigned i = large_buckets + 1; i < kRadix; ++i)
g_counters[i] = g_counters[i - 1] + min_bucket;
for (unsigned i = 0; i < size; ++i)
{
const unsigned dst = g_counters[g_index[i] % kRadix]++;
g_sort[dst].index = g_index[i] / kRadix;
g_sort[dst].value = g_input[i];
__builtin_prefetch(&g_sort[dst + 1].value, 1);
}
g_counters[0] = 0;
for (unsigned i = 1; i < (size + kRadix - 1) / kRadix; ++i)
g_counters[i] = g_counters[i - 1] + kRadix;
for (unsigned i = 0; i < size; ++i)
{
const unsigned dst = g_counters[g_sort[i].index]++;
g_output[dst] = g_sort[i].value;
__builtin_prefetch(&g_output[dst + 1], 1);
}
}
void reorder1(const unsigned size)
{
for (unsigned i = 0; i < size; ++i)
g_output[g_index[i]] = g_input[i];
}
#include <iostream>
#include <random>
#include <chrono>
#include <algorithm>
#include <numeric>
using namespace std;
int main()
{
mt19937_64 rng(random_device{}());
for (unsigned size = kMaxSize; size > kMinSize; size -= size / 4)
//for (unsigned size = 64*1024*kRadix; size > kMinSize; size -= size / 2)
{
iota(begin(g_index), begin(g_index) + size, 0);
shuffle(begin(g_index), begin(g_index) + size, rng);
copy(begin(g_index), begin(g_index) + size, begin(g_input));
const auto start = chrono::steady_clock::now();
reorder2(size);
const chrono::nanoseconds time = chrono::steady_clock::now() - start;
const auto b = begin(g_output);
const auto e = begin(g_output) + size;
bool ok = adjacent_find(b, e, [&](auto l, auto r){return l + 1 != r;})
== e;
cout << size << ' ' << time.count() / double(size)
<< ' ' << boolalpha << ok << '\n';
}
}