#pragma GCC optimize("Ofast")
#include <bits/stdc++.h>

uint32_t xorshift32(uint32_t x) {
	x ^= x << 13;
	x ^= x >> 17;
	return x ^= x << 5;
}

using namespace std;

const int n = (1<<20);
const int block_size = 16;
alignas(64) int a[n], b[n * 2];

int eytzinger(int i = 0, int k = 1) {
    if (k <= n) {
        i = eytzinger(i, 2 * k);
        b[k] = a[i++];
        i = eytzinger(i, 2 * k + 1);
    }
    return i;
}

int search(int x) {
    int k = 1;
    while (k <= n) {
        __builtin_prefetch(b + k * block_size);
        k = 2 * k + (b[k] < x);
    }
    return k - n + 1;
}

int main() {
	std::cout << std::fixed << std::setprecision(3);
	for (int i = 0; i < n; i++) {
		a[i] = i;
	}
	eytzinger();
	{
		double tin = (double)clock();
		uint32_t x = 1, check = 0;
		for (int i = 0; i < (1 << 20); i++) {
			x = xorshift32(x);
			check ^= search(x % n);
		}
		double tout = (double)clock();
		std::cout << "eytzinger = " << (tout - tin) / CLOCKS_PER_SEC << "s, check = " << check << std::endl;
	}
	{
		double tin = (double)clock();
		uint32_t x = 1, check = 0;
		for (int i = 0; i < (1 << 20); i++) {
			x = xorshift32(x);
			check ^= int(std::lower_bound(a, a + n, x % n) - a);
		}
		double tout = (double)clock();
		std::cout << "lower_bound = " << (tout - tin) / CLOCKS_PER_SEC << "s, check = " << check << std::endl;
	}
	return 0;
}