#include <algorithm>
#include <utility>
#include <iostream>

struct my_cmp
{
    bool operator()(unsigned char a, unsigned char b)
    {
        return a < b;
    }
};

struct my_cmp2
{
    bool operator()(unsigned char a, unsigned char b)
    {
        std::cerr << (int) a << ' ' << (int) b << '\n';
        return a < b;
    }
};

template<unsigned int Len>
struct reverse_if
{
    template<typename T, typename P>
    void operator()(T* a, P pred)
    {
        using std::swap;
        if (pred(*(a + Len - 1), *a)) swap(*a, *(a + Len - 1));
        reverse_if<Len - 2> rif;
        rif(a + 1, pred);
    }
};

template<>
struct reverse_if<0>
{
    template<typename T, typename P>
    void operator()(T* a, P pred) {}
};


template<unsigned int Len, unsigned int Count>
struct shift_swap_if_help
{
    template<typename T, typename P>
    void operator()(T* a, P pred)
    {
        using std::swap;
        if (pred(*(a + Len), *a)) swap(*a, *(a + Len));
        shift_swap_if_help<Len, Count - 1> ssif;
        ssif(a + 1, pred);
    }
};

template<unsigned int Len>
struct shift_swap_if_help<Len, 0>
{
    template<typename T, typename P>
    void operator()(T* a, P Pred) {}
};

template<unsigned int Len>
struct shift_swap_if
{
    template<typename T, typename P>
    void operator()(T* a, P pred)
    {
        shift_swap_if_help<Len, Len> ssif;
        ssif(a, pred);
    }
};

template<unsigned int Len>
struct shift_chain
{
    template<typename T, typename P>
    void operator()(T* a, P pred)
    {
        shift_swap_if<Len/2> ssif;
        ssif(a, pred);

        shift_chain<Len / 2> upper;
        shift_chain<Len / 2> lower;
        upper(a, pred);
        lower(a + Len/2, pred);
    }
};

template<>
struct shift_chain<0>
{
    template<typename T, typename P>
    void operator()(T* a, P pred) {}
};

template<unsigned int Len>
struct block
{
    template<typename T, typename P>
    void operator()(T* a, P pred)
    {
        reverse_if<Len> rif;
        rif(a, pred);

        shift_chain<Len / 2> upper;
        shift_chain<Len / 2> lower;
        upper(a, pred);
        lower(a + Len/2, pred);
    }
};

template<unsigned int Depth>
struct bitonic_sort
{

    template<typename T, typename P>
    void operator()(T* a, P pred)
    {
        bitonic_sort<Depth / 2> upper;
        bitonic_sort<Depth / 2> lower;
        upper(a, pred);
        lower(a + (Depth / 2), pred);

        block<Depth> blk;
        blk(a, pred);
    }
};

template<>
struct bitonic_sort<1>
{

    template<typename T, typename P>
    void operator()(T* a, P pred) {}
};

template<unsigned int Len>
void test_bitonic(unsigned int idx)
{
    unsigned char a[Len];
    for (auto& e : a) e = 0;
    a[idx] = 1;

    for (auto e : a) std::cerr << (int) e << ' ';
    std::cerr << " ->  ";

    bitonic_sort<Len> bsort;
    bsort(a, my_cmp{});

    for (auto e : a) std::cerr << (int) e << ' ';
    std::cerr << '\n';
}

template<unsigned int Len>
void test_bitonic_index()
{
    unsigned char a[Len];
    for (unsigned int i = 0; i < Len; ++i)
        a[i] = i;

    for (auto e : a) std::cerr << (int) e << ' ';
    std::cerr << " ->  ";

    bitonic_sort<Len> bsort;
    bsort(a, my_cmp2{});

    for (auto e : a) std::cerr << (int) e << ' ';
    std::cerr << '\n';
}



int main()
{
    //test_bitonic<4>(2);
    //std::cerr << '\n';
    //test_bitonic_index<4>();

    test_bitonic<1>(0);

    std::cerr << '\n';

    test_bitonic<2>(0);
    test_bitonic<2>(1);

    std::cerr << '\n';

    for(unsigned int i = 0; i < 4; ++i)
        test_bitonic<4>(i);
    std::cerr << '\n';

    for(unsigned int i = 0; i < 8; ++i)
        test_bitonic<8>(i);
    std::cerr << '\n';

    for(unsigned int i = 0; i < 16; ++i)
        test_bitonic<16>(i);
    std::cerr << '\n';

    return 0;
}