#include <iostream>
#include <random>
#include <chrono>
#include <vector>
#include <deque>
#include <array>
#include <algorithm>
#include <numeric>
#include <iterator>
using namespace std;
enum {
kPageW = 10000,
kSortingBits = 10,
kTooLarge = 1u << kSortingBits,
};
using Data = uint16_t;
using Index = uint32_t;
using Vec = vector<Data>;
using SVec = vector<Index>;
using Counters = array<Index, 1 + kTooLarge>;
uint64_t g_complexity = 0ull;
constexpr uint64_t ceilDiv(uint64_t dividend, uint64_t divisor)
{
return (dividend + divisor - 1) / divisor;
}
Index getMinRows(const Vec& input)
{
const auto sum = accumulate(begin(input), end(input), 0ull);
return max<Index>(1u, Index(ceilDiv(sum, kPageW)));
}
uint32_t getMaxColumns(const Vec& input)
{
const auto sum = accumulate(begin(input), end(input), 0ull);
return uint32_t(input.size() / max(1ull, sum / kPageW));
}
uint64_t preproc(const Vec& input, SVec& sorted)
{
const auto size = input.size();
sorted.clear();
sorted.resize(size);
auto sum = 0ull;
Counters counters{};
for (Index i = 0; i != size; ++i)
{
++counters[1 + input[i]];
}
partial_sum(begin(counters), end(counters), begin(counters));
for (Index i = 0; i != size; ++i)
{
sum += input[i];
auto& cnt = counters[input[i]];
sorted[cnt] = i;
__builtin_prefetch(4 + &sorted[cnt], 1);
++cnt;
}
return sum;
}
uint32_t arrangeAcrossG(const Vec& input) // iterate by width
{
SVec sorted;
vector<bool> seen_columns;
const auto total = preproc(input, sorted);
auto columns = static_cast<uint32_t>(
input.size() / max<uint64_t>(1u, total / kPageW));
for (; columns; --columns)
{
seen_columns.clear();
seen_columns.resize(columns);
uint32_t sum = 0;
uint32_t use_cnt = columns;
Index i_sorted = static_cast<Index>(sorted.size() - 1);
do {
++g_complexity;
const auto index = sorted[i_sorted];
const auto width = input[index];
const auto column = index % columns;
if (!seen_columns[column])
{
seen_columns[column] = true;
sum += width;
--use_cnt;
if (sum > kPageW)
break;
}
if (!use_cnt)
{
return columns;
}
} while (i_sorted--);
}
return 0;
}
uint32_t arrangeDownG(const Vec& input) // iterate by width
{
SVec sorted;
vector<bool> seen_columns;
const auto total = preproc(input, sorted);
auto rows = max<Index>(1u, static_cast<Index>(ceilDiv(total, kPageW)));
for (; rows <= input.size(); ++rows)
{
auto columns = uint32_t(ceilDiv(input.size(), rows));
seen_columns.clear();
seen_columns.resize(columns);
uint32_t sum = 0;
uint32_t use_cnt = columns;
Index i_sorted = static_cast<Index>(sorted.size() - 1);
do {
++g_complexity;
const auto index = sorted[i_sorted];
const auto width = input[index];
const auto column = index / rows;
if (!seen_columns[column])
{
seen_columns[column] = true;
sum += width;
--use_cnt;
if (sum > kPageW)
break;
}
if (!use_cnt)
{
return columns;
}
} while (i_sorted--);
}
return 0;
}
uint32_t arrangeAcrossC(const Vec& input) // iterate by columns
{
for (auto columns = getMaxColumns(input); columns; --columns)
{
uint32_t sum = 0;
for (uint32_t c = 0; sum <= kPageW && c < columns; ++c)
{
Data cw = 0;
for (uint32_t i = c; i < input.size(); i += columns)
{
cw = max(cw, input[i]);
++g_complexity;
}
sum += cw;
}
if (sum <= kPageW)
return columns;
}
return 0;
}
uint32_t arrangeAcrossR(const Vec& input) // iterate by rows
{
auto columns = getMaxColumns(input);
std::vector<uint32_t> widths(columns);
for (; columns; --columns)
{
uint32_t sum = 0;
uint32_t column = 0;
for (size_t i = 0; sum <= kPageW && i < input.size(); ++i, ++column)
{
if (column == columns)
{
column = 0;
}
if (input[i] > widths[column])
{
sum += input[i] - widths[column];
widths[column] = input[i];
}
++g_complexity;
}
if (sum <= kPageW)
{
return columns;
}
fill(begin(widths), begin(widths) + columns, 0u);
}
return 0;
}
Data maximum(Data l, Data r)
{
return max(l, r);
}
template<typename I, typename Op>
auto transform_partial_sum(I lbegin, I lend, I rbegin, I out, Op op)
{ // maximum of the element in first enterval and prefix of second interval
auto sum = typename I::value_type{};
for (; lbegin != lend; ++lbegin, ++rbegin, ++out)
{
sum = op(sum, *rbegin);
*lbegin = op(*lbegin, sum);
}
return sum;
}
template<typename I>
void reverse_max(I b, I e)
{ // for each element: maximum of the suffix starting from this element
partial_sum(make_reverse_iterator(e),
make_reverse_iterator(b),
make_reverse_iterator(e),
maximum);
}
struct PreprocRMQ
{
Index operator () (Vec& input, Index rows)
{
if (rows < 4)
{ // no preprocessing needed
return 1;
}
Index ranges = 1;
auto b = begin(input);
while (rows >>= 1)
{
ranges <<= 1;
}
for (; b + 2 * ranges <= end(input); b += ranges)
{
reverse_max(b, b + ranges);
transform_partial_sum(b, b + ranges, b + ranges, b, maximum);
}
// preprocess inconvenient tail part of the array
reverse_max(b, b + ranges);
const auto d = end(input) - b - ranges;
const auto z = transform_partial_sum(b, b + d, b + ranges, b, maximum);
transform(b + d, b + ranges, b + d, [&](Data x){return max(x, z);});
reverse_max(b + ranges, end(input));
return ranges;
}
};
struct NoPreproc
{
Index operator () (Vec&, Index)
{
return 1;
}
};
template<class PP>
uint32_t arrangeDownRMQ(Vec& input)
{
auto rows = getMinRows(input);
auto ranges = PP{}(input, rows);
while (rows <= input.size())
{
if (rows >= ranges * 2)
{ // lazily update RMQ data
transform(begin(input) + ranges, end(input), begin(input),
begin(input), maximum
);
ranges *= 2;
}
else
{ // use RMQ to get widths of columns and check if all columns fit
uint32_t sum = 0;
for (Index i = 0; sum <= kPageW && i < input.size(); i += rows)
{
++g_complexity;
auto j = i + rows - ranges;
if (j < input.size())
sum += max(input[i], input[j]);
else
sum += input[i];
}
if (sum <= kPageW)
{
return uint32_t(ceilDiv(input.size(), rows));
}
++rows;
}
}
return 0;
}
//#define TESTING
#ifndef TESTING
int main()
{
mt19937_64 rng(random_device{}());
geometric_distribution<uint32_t> distrib(0.02); // max value up to ~1000
for (unsigned size = 3; size < 1000000u; size *= 2)
{
const unsigned repeat = max(5u, 1000u / size);
chrono::duration<double> time {};
g_complexity = 0ull;
Vec input(size);
uint32_t res = 0u;
for (unsigned i = 0; i < repeat; ++i)
{
generate(begin(input), end(input),
[&]{return 1 + min(distrib(rng), kTooLarge - 2u);});
auto start = chrono::steady_clock::now();
res = arrangeDownRMQ<PreprocRMQ>(input);
time += chrono::steady_clock::now() - start;
}
cout << size << ' '
<< time.count() / repeat << ' '
<< g_complexity / repeat << ' '
<< res << '\n';
}
}
#else // TESTING
int main()
{
mt19937_64 rng(random_device{}());
geometric_distribution<uint32_t> distrib(0.02); // max value up to ~1000
for (unsigned size = 12; size < 10000000u; size *= 4)
{
Vec input(size);
generate(begin(input), end(input),
[&]{return 1 + min(distrib(rng), kTooLarge - 2u);});
cout << arrangeAcrossR(input) << ' ' << arrangeAcrossG(input) << '\n';
cout << arrangeAcrossR(input) << ' ' << arrangeAcrossC(input) << '\n';
cout << arrangeDownG(input) << ' ';
cout << arrangeDownRMQ<PreprocRMQ>(input) << '\n';
}
}
#endif // TESTING