#include <iostream>
#include <random>
#include <chrono>
#include <vector>
#include <deque>
#include <array>
#include <algorithm>
#include <numeric>
#include <iterator>
using namespace std;

enum {
    kPageW = 10000,
    kSortingBits = 10,
    kTooLarge = 1u << kSortingBits,
};

using Data = uint16_t;
using Index = uint32_t;
using Vec = vector<Data>;
using SVec = vector<Index>;
using Counters = array<Index, 1 + kTooLarge>;

uint64_t g_complexity = 0ull;

constexpr uint64_t ceilDiv(uint64_t dividend, uint64_t divisor)
{
    return (dividend + divisor - 1) / divisor;
}

Index getMinRows(const Vec& input)
{
    const auto sum = accumulate(begin(input), end(input), 0ull);
    return max<Index>(1u, Index(ceilDiv(sum, kPageW)));
}

uint32_t getMaxColumns(const Vec& input)
{
    const auto sum = accumulate(begin(input), end(input), 0ull);
    return uint32_t(input.size() / max(1ull, sum / kPageW));
}

uint64_t preproc(const Vec& input, SVec& sorted)
{
    const auto size = input.size();
    sorted.clear();
    sorted.resize(size);
    auto sum = 0ull;
    Counters counters{};

    for (Index i = 0; i != size; ++i)
    {
        ++counters[1 + input[i]];
    }

    partial_sum(begin(counters), end(counters), begin(counters));

    for (Index i = 0; i != size; ++i)
    {
        sum += input[i];
        auto& cnt = counters[input[i]];
        sorted[cnt] = i;
        __builtin_prefetch(4 + &sorted[cnt], 1);
        ++cnt;
    }

    return sum;

}

uint32_t arrangeAcrossG(const Vec& input) // iterate by width
{
    SVec sorted;
    vector<bool> seen_columns;
    const auto total = preproc(input, sorted);

    auto columns = static_cast<uint32_t>(
            input.size() / max<uint64_t>(1u, total / kPageW));

    for (; columns; --columns)
    {
        seen_columns.clear();
        seen_columns.resize(columns);
        uint32_t sum = 0;
        uint32_t use_cnt = columns;
        Index i_sorted = static_cast<Index>(sorted.size() - 1);

        do {
            ++g_complexity;
            const auto index = sorted[i_sorted];
            const auto width = input[index];
            const auto column = index % columns;

            if (!seen_columns[column])
            {
                seen_columns[column] = true;
                sum += width;
                --use_cnt;

                if (sum > kPageW)
                    break;
            }

            if (!use_cnt)
            {
                return columns;
            }
        } while (i_sorted--);
    }

    return  0;
}

uint32_t arrangeDownG(const Vec& input) // iterate by width
{
    SVec sorted;
    vector<bool> seen_columns;
    const auto total = preproc(input, sorted);
    auto rows = max<Index>(1u, static_cast<Index>(ceilDiv(total, kPageW)));

    for (; rows <= input.size(); ++rows)
    {
        auto columns = uint32_t(ceilDiv(input.size(), rows));
        seen_columns.clear();
        seen_columns.resize(columns);
        uint32_t sum = 0;
        uint32_t use_cnt = columns;
        Index i_sorted = static_cast<Index>(sorted.size() - 1);

        do {
            ++g_complexity;
            const auto index = sorted[i_sorted];
            const auto width = input[index];
            const auto column = index / rows;

            if (!seen_columns[column])
            {
                seen_columns[column] = true;
                sum += width;
                --use_cnt;

                if (sum > kPageW)
                    break;
            }

            if (!use_cnt)
            {
                return columns;
            }
        } while (i_sorted--);
    }

    return  0;
}

uint32_t arrangeAcrossC(const Vec& input) // iterate by columns
{
    for (auto columns = getMaxColumns(input); columns; --columns)
    {
        uint32_t sum = 0;

        for (uint32_t c = 0; sum <= kPageW && c < columns; ++c)
        {
            Data cw = 0;

            for (uint32_t i = c; i < input.size(); i += columns)
            {
                cw = max(cw, input[i]);
                ++g_complexity;
            }

            sum += cw;
        }

        if (sum <= kPageW)
            return columns;
    }

    return 0;
}

uint32_t arrangeAcrossR(const Vec& input) // iterate by rows
{
    auto columns = getMaxColumns(input);
    std::vector<uint32_t> widths(columns);

    for (; columns; --columns)
    {
        uint32_t sum = 0;
        uint32_t column = 0;

        for (size_t i = 0; sum <= kPageW && i < input.size(); ++i, ++column)
        {
            if (column == columns)
            {
                column = 0;
            }

            if (input[i] > widths[column])
            {
                sum += input[i] - widths[column];
                widths[column] = input[i];
            }

            ++g_complexity;
        }

        if (sum <= kPageW)
        {
            return columns;
        }

        fill(begin(widths), begin(widths) + columns, 0u);
    }

    return 0;
}

Data maximum(Data l, Data r)
{
    return max(l, r);
}

template<typename I, typename Op>
auto transform_partial_sum(I lbegin, I lend, I rbegin, I out, Op op)
{ // maximum of the element in first enterval and prefix of second interval
    auto sum = typename I::value_type{};

    for (; lbegin != lend; ++lbegin, ++rbegin, ++out)
    {
        sum = op(sum, *rbegin);
        *lbegin = op(*lbegin, sum);
    }

    return sum;
}

template<typename I>
void reverse_max(I b, I e)
{ // for each element: maximum of the suffix starting from this element
    partial_sum(make_reverse_iterator(e),
                make_reverse_iterator(b),
                make_reverse_iterator(e),
                maximum);
}

struct PreprocRMQ
{
    Index operator () (Vec& input, Index rows)
    {
        if (rows < 4)
        { // no preprocessing needed
            return 1;
        }

        Index ranges = 1;
        auto b = begin(input);

        while (rows >>= 1)
        {
            ranges <<= 1;
        }

        for (; b + 2 * ranges <= end(input); b += ranges)
        {
            reverse_max(b, b + ranges);
            transform_partial_sum(b, b + ranges, b + ranges, b, maximum);
        }

        // preprocess inconvenient tail part of the array
        reverse_max(b, b + ranges);
        const auto d = end(input) - b - ranges;
        const auto z = transform_partial_sum(b, b + d, b + ranges, b, maximum);
        transform(b + d, b + ranges, b + d, [&](Data x){return max(x, z);});
        reverse_max(b + ranges, end(input));
        return ranges;
    }
};

struct NoPreproc
{
    Index operator () (Vec&, Index)
    {
        return 1;
    }
};

template<class PP>
uint32_t arrangeDownRMQ(Vec& input)
{
    auto rows = getMinRows(input);
    auto ranges = PP{}(input, rows);

    while (rows <= input.size())
    {
        if (rows >= ranges * 2)
        { // lazily update RMQ data
            transform(begin(input) + ranges, end(input), begin(input),
                      begin(input), maximum
            );

            ranges *= 2;
        }
        else
        { // use RMQ to get widths of columns and check if all columns fit
            uint32_t sum = 0;

            for (Index i = 0; sum <= kPageW && i < input.size(); i += rows)
            {
                ++g_complexity;
                auto j = i + rows - ranges;

                if (j < input.size())
                    sum += max(input[i], input[j]);
                else
                    sum += input[i];
            }

            if (sum <= kPageW)
            {
                return uint32_t(ceilDiv(input.size(), rows));
            }

            ++rows;
        }
    }

    return 0;
}

//#define TESTING
#ifndef TESTING
int main()
{
    mt19937_64 rng(random_device{}());
    geometric_distribution<uint32_t> distrib(0.02); // max value up to ~1000

    for (unsigned size = 3; size < 1000000u; size *= 2)
    {
        const unsigned repeat = max(5u, 1000u / size);
        chrono::duration<double> time {};
        g_complexity = 0ull;
        Vec input(size);
        uint32_t res = 0u;

        for (unsigned i = 0; i < repeat; ++i)
        {
            generate(begin(input), end(input),
                     [&]{return 1 + min(distrib(rng), kTooLarge - 2u);});

            auto start = chrono::steady_clock::now();
            res = arrangeDownRMQ<PreprocRMQ>(input);
            time += chrono::steady_clock::now() - start;
        }

        cout << size << ' '
        << time.count() / repeat << ' '
        << g_complexity / repeat << ' '
        << res << '\n';
    }
}
#else // TESTING
int main()
{
    mt19937_64 rng(random_device{}());
    geometric_distribution<uint32_t> distrib(0.02); // max value up to ~1000

    for (unsigned size = 12; size < 10000000u; size *= 4)
    {
        Vec input(size);

        generate(begin(input), end(input),
                 [&]{return 1 + min(distrib(rng), kTooLarge - 2u);});

        cout << arrangeAcrossR(input) << ' ' << arrangeAcrossG(input) << '\n';

        cout << arrangeAcrossR(input) << ' ' << arrangeAcrossC(input) << '\n';

        cout << arrangeDownG(input) << ' ';
        cout << arrangeDownRMQ<PreprocRMQ>(input) << '\n';
    }
}
#endif // TESTING