#include <iostream>
#include <random>
#include <chrono>
#include <vector>
#include <deque>
#include <array>
#include <algorithm>
#include <numeric>
using namespace std;
enum {
kPageW = 10000,
kSortingBits = 8,
kTooLarge = 1u << kSortingBits,
};
struct WidthPos
{
uint32_t width = 0;
uint32_t pos = 0;
WidthPos(uint32_t w = 0, uint32_t p = 0)
: width(w)
, pos(p)
{}
};
bool operator < (WidthPos l, WidthPos r)
{ return l.width > r.width; }
using Vec = vector<uint32_t>;
using SVec = vector<WidthPos>;
uint64_t g_complexity = 0ull;
constexpr uint64_t ceilDiv(uint64_t dividend, uint64_t divisor)
{
return (dividend + divisor - 1) / divisor;
}
uint32_t getMinRows(const Vec& input)
{
const auto sum = accumulate(begin(input), end(input), 0ull);
return max(1u, uint32_t(ceilDiv(sum, kPageW)));
}
uint32_t getMaxColumns(const Vec& input)
{
const auto sum = accumulate(begin(input), end(input), 0ull);
return uint32_t(input.size() / max(1ull, sum / kPageW));
}
uint32_t preproc(const Vec& input, SVec& sorted)
{
sorted.clear();
sorted.resize(input.size());
auto sum = 0ull;
array<uint32_t, 1 + kTooLarge> cnt_1 {};
for (uint32_t i = 0; i != input.size(); ++i)
{
sum += input[i];
if (input[i] >= kTooLarge)
sorted[cnt_1[0]++] = {input[i], i};
else
++cnt_1[kTooLarge - input[i]];
}
sort(begin(sorted), begin(sorted) + cnt_1[0]);
partial_sum(begin(cnt_1), end(cnt_1), begin(cnt_1));
for (uint32_t i = 0; i != input.size(); ++i)
{
if (input[i] < kTooLarge)
{
auto& cnt = cnt_1[kTooLarge - 1 - input[i]];
sorted[cnt++] = {input[i], i};
__builtin_prefetch(4 + &sorted[cnt], 1);
}
}
return uint32_t(input.size() / max(1ull, sum / kPageW));
}
uint32_t arrangeAcrossG(const Vec& input) // iterate by width
{
SVec sorted;
vector<bool> seen_columns;
for (auto columns = preproc(input, sorted); columns; --columns)
{
seen_columns.clear();
seen_columns.resize(columns);
uint32_t sum = 0;
uint32_t use_cnt = columns;
for (auto x: sorted)
{
++g_complexity;
const auto column = x.pos % columns;
if (!seen_columns[column])
{
sum += x.width;
--use_cnt;
if (sum > kPageW)
break;
}
seen_columns[column] = true;
if (!use_cnt)
return columns;
}
}
return 0;
}
uint32_t preprocX(const Vec& input, SVec& sorted)
{
sorted.clear();
sorted.resize(input.size());
auto sum = 0ull;
array<uint32_t, 1 + kTooLarge> cnt_1 {};
for (uint32_t i = 0; i != input.size(); ++i)
{
sum += input[i];
if (input[i] >= kTooLarge)
sorted[cnt_1[0]++] = {input[i], i};
else
++cnt_1[kTooLarge - input[i]];
}
sort(begin(sorted), begin(sorted) + cnt_1[0]);
partial_sum(begin(cnt_1), end(cnt_1), begin(cnt_1));
for (uint32_t i = 0; i != input.size(); ++i)
{
if (input[i] < kTooLarge)
{
auto& cnt = cnt_1[kTooLarge - 1 - input[i]];
sorted[cnt++] = {input[i], i};
__builtin_prefetch(4 + &sorted[cnt], 1);
}
}
return max(1u, uint32_t(ceilDiv(sum, kPageW)));
}
uint32_t arrangeDownG(const Vec& input) // iterate by width
{
SVec sorted;
vector<bool> seen_columns;
for (auto rows = preprocX(input, sorted); rows <= input.size(); ++rows)
{
auto columns = uint32_t(ceilDiv(input.size(), rows));
seen_columns.clear();
seen_columns.resize(columns);
uint32_t sum = 0;
uint32_t use_cnt = columns;
for (auto x: sorted)
{
++g_complexity;
const auto column = x.pos / rows;
if (!seen_columns[column])
{
sum += x.width;
--use_cnt;
if (sum > kPageW)
break;
}
seen_columns[column] = true;
if (!use_cnt)
return columns;
}
}
return 0;
}
uint32_t arrangeAcrossC(const Vec& input) // iterate by columns
{
for (auto columns = getMaxColumns(input); columns; --columns)
{
uint32_t sum = 0;
for (uint32_t c = 0; sum <= kPageW && c < columns; ++c)
{
uint32_t cw = 0;
for (uint32_t i = c; i < input.size(); i += columns)
{
cw = max(cw, input[i]);
++g_complexity;
}
sum += cw;
}
if (sum <= kPageW)
return columns;
}
return 0;
}
uint32_t arrangeAcrossR(const Vec& input) // iterate by rows
{
auto columns = getMaxColumns(input);
std::vector<uint32_t> widths(columns);
for (; columns; --columns)
{
uint32_t sum = 0;
size_t column = 0;
for (size_t i = 0; sum <= kPageW && i < input.size(); ++i, ++column)
{
if (column == columns)
{
column = 0;
}
if (input[i] > widths[column])
{
sum += input[i] - widths[column];
widths[column] = input[i];
}
++g_complexity;
}
if (sum <= kPageW)
{
return columns;
}
fill(begin(widths), begin(widths) + columns, 0u);
}
return 0;
}
uint32_t arrangeDownR(const Vec& input) // iterate by rows
{
auto rows = getMinRows(input);
auto columns = uint32_t(ceilDiv(input.size(), rows));
std::vector<uint32_t> widths(columns);
while (rows <= input.size())
{
uint32_t sum = 0;
for (uint32_t row = 0; row < rows; ++row)
{
uint32_t column = 0;
uint32_t i = row;
for (; sum <= kPageW && i < input.size(); i += rows, ++column)
{
if (input[i] > widths[column])
{
sum += input[i] - widths[column];
widths[column] = input[i];
}
++g_complexity;
}
if (sum > kPageW)
{
break;
}
}
if (sum <= kPageW)
{
return columns;
}
++rows;
columns = uint32_t(ceilDiv(input.size(), rows));
fill(begin(widths), begin(widths) + columns, 0u);
}
return 0;
}
uint32_t arrangeDownNaive(const Vec& input)
{
const size_t size = input.size();
for (size_t rows = 1; rows <= size; ++rows)
{
size_t index = 0;
uint32_t text_w = 0;
for (uint32_t c = 1; ; ++c)
{
uint32_t column_w = 0;
for (uint32_t r = 0; r < rows && index < size; ++r, ++index)
column_w = max(column_w, input[index]);
text_w += column_w;
if (text_w > kPageW)
break;
if (index >= size)
return c;
}
}
return 0;
}
uint32_t arrangeDownRMQ(Vec& input)
{
auto rows = getMinRows(input);
uint32_t ranges = 1;
while (rows <= input.size())
{
if (rows >= ranges * 2)
{ // lazily update RMQ data
transform(begin(input) + ranges, end(input), begin(input),
begin(input), [](auto l, auto r) {return max(l, r);}
);
ranges *= 2;
}
else
{ // use RMQ to get widths of columns and check if all columns fit
uint32_t sum = 0;
for (uint32_t i = 0; sum <= kPageW && i < input.size(); i += rows)
{
++g_complexity;
auto j = i + rows - ranges;
if (j < input.size())
sum += max(input[i], input[j]);
else
sum += input[i];
}
if (sum <= kPageW)
{
return uint32_t(ceilDiv(input.size(), rows));
}
++rows;
}
}
return 0;
}
struct DeckEntry
{
uint32_t v;
uint32_t p;
DeckEntry(uint32_t value = 0, uint32_t pos = 0)
: v(value)
, p(pos)
{}
};
using Deck = deque<DeckEntry>;
void push(Deck& deck, uint32_t value, uint32_t pos)
{
while (!deck.empty() && deck.back().v <= value)
deck.pop_back();
deck.emplace_back(value, pos);
}
uint32_t maxAfterPos(Deck& deck, uint32_t pos)
{
if (pos == deck.front().p)
deck.pop_front();
return deck.front().v;
}
uint32_t preprocRMQ(Vec& input, uint32_t rows)
{
if (rows < 4)
return 1;
uint32_t back_pos = 0;
size_t front_pos = 0;
uint32_t ranges = 1;
Deck deck;
while (rows >>= 1)
ranges <<= 1;
for (; back_pos != ranges - 1; ++back_pos)
{
push(deck, input[back_pos], back_pos);
}
for (; back_pos != input.size(); ++front_pos, ++back_pos)
{
push(deck, input[back_pos], back_pos);
input[front_pos] = maxAfterPos(deck, back_pos - ranges);
}
for (; front_pos != input.size(); ++front_pos, ++back_pos)
{
input[front_pos] = maxAfterPos(deck, back_pos - ranges);
}
return ranges;
}
uint32_t arrangeDownRMQ2(Vec& input)
{
auto rows = getMinRows(input);
uint32_t ranges = preprocRMQ(input, rows);
while (rows <= input.size())
{
if (rows >= ranges * 2)
{ // lazily update RMQ data
transform(begin(input) + ranges, end(input), begin(input),
begin(input), [](auto l, auto r) {return max(l, r);}
);
ranges *= 2;
//g_complexity += input.size() - ranges;
}
else
{ // use RMQ to get widths of columns and check if all columns fit
uint32_t sum = 0;
for (uint32_t i = 0; sum <= kPageW && i < input.size(); i += rows)
{
++g_complexity;
auto j = i + rows - ranges;
if (j < input.size())
sum += max(input[i], input[j]);
else
sum += input[i];
}
if (sum <= kPageW)
{
return uint32_t(ceilDiv(input.size(), rows));
}
++rows;
}
}
return 0;
}
class TwoStk
{
vector<uint32_t> in_;
vector<uint32_t> out_;
uint32_t in_max_;
void inout()
{
in_max_ = 0;
uint32_t out_max = 0;
while (!in_.empty())
{
const auto x = in_.back();
in_.pop_back();
out_max = max(out_max, x);
out_.push_back(out_max);
}
}
public:
TwoStk(uint32_t size)
: in_max_(0)
{
in_.reserve(size);
out_.reserve(size);
}
void push(uint32_t x)
{
in_.push_back(x);
in_max_ = max(in_max_, x);
}
uint32_t pop()
{
if (out_.empty())
{
inout();
}
const auto res = out_.back();
out_.pop_back();
return max(res, in_max_);
}
};
uint32_t preprocRMQ3(Vec& input, uint32_t rows)
{
if (rows < 4)
return 1;
uint32_t back_pos = 0;
size_t front_pos = 0;
uint32_t ranges = 1;
while (rows >>= 1)
ranges <<= 1;
TwoStk ts(ranges);
for (; back_pos != ranges - 1; ++back_pos)
{
ts.push(input[back_pos]);
}
for (; back_pos != input.size(); ++front_pos, ++back_pos)
{
ts.push(input[back_pos]);
input[front_pos] = ts.pop();
}
for (; front_pos != input.size(); ++front_pos, ++back_pos)
{
input[front_pos] = ts.pop();
}
return ranges;
}
uint32_t arrangeDownRMQ3(Vec& input)
{
auto rows = getMinRows(input);
uint32_t ranges = preprocRMQ3(input, rows);
while (rows <= input.size())
{
if (rows >= ranges * 2)
{ // lazily update RMQ data
transform(begin(input) + ranges, end(input), begin(input),
begin(input), [](auto l, auto r) {return max(l, r);}
);
ranges *= 2;
//g_complexity += input.size() - ranges;
}
else
{ // use RMQ to get widths of columns and check if all columns fit
uint32_t sum = 0;
for (uint32_t i = 0; sum <= kPageW && i < input.size(); i += rows)
{
++g_complexity;
auto j = i + rows - ranges;
if (j < input.size())
sum += max(input[i], input[j]);
else
sum += input[i];
}
if (sum <= kPageW)
{
return uint32_t(ceilDiv(input.size(), rows));
}
++rows;
}
}
return 0;
}
int main()
{
mt19937_64 rng(random_device{}());
geometric_distribution<uint32_t> distrib(0.02); // max value up to ~1000
for (unsigned size = 3; size < 1000000u; size *= 2)
{
const unsigned repeat = max(1u, 100000u / size);
chrono::duration<double> time {};
g_complexity = 0ull;
Vec input(size);
uint32_t res = 0u;
for (unsigned i = 0; i < repeat; ++i)
{
generate(begin(input), end(input), [&] {return 1 + distrib(rng);});
auto start = chrono::steady_clock::now();
res = arrangeDownG(input);
time += chrono::steady_clock::now() - start;
}
cout << size << ' '
<< time.count() / repeat << ' '
<< g_complexity / repeat << ' '
<< res << '\n';
}
}