#include <vector>
#include <algorithm>
#include <iostream>
#include <numeric>
#include <cmath>

// https://g...content-available-to-author-only...b.com/Cyan4973/FiniteStateEntropy
#include "fse.h"

using namespace std;

// Optimized for the case of many duplicates
template<class ForwardIt>
ForwardIt rem_dupl(ForwardIt first, ForwardIt last)
{
  if (first == last)
    return last;

  auto value = *first;
  bool dupl = false;
  ForwardIt result = first;
  while (++first != last) {
    if (value == *first) {
      dupl = true;
    }
    else {
      if (!dupl) {
        *result++ = value;
      }
      value = std::move(*first);
      dupl = false;
    }
  }
  if (!dupl) {
    *result++ = value;
  }
  return result;
}

double est_compr_bits(const vector<char>& vc) {
  vector<char> compressed(2 * vc.size());

  auto compr_res = FSE_compress(
    compressed.data(), compressed.size(),
    vc.data(), vc.size()
  );
  if (FSE_isError(compr_res)) {
    std::cout << "FSE error: " << FSE_getErrorName(compr_res) << "\n";
    return 0.;
  }
  return 8. * compr_res;
}

// https://g...content-available-to-author-only...b.com/badboy/6267743
uint64_t hash64shift(uint64_t key)
{
  key = (~key) + (key << 21); // key = (key << 21) - key - 1;
  key = key ^ (key >> 24);
  key = (key + (key << 3)) + (key << 8); // key * 265
  key = key ^ (key >> 14);
  key = (key + (key << 2)) + (key << 4); // key * 21
  key = key ^ (key >> 28);
  key = key + (key << 31);
  return key;
}

using Ty = uint32_t;
constexpr double h_factor = 2.1;
constexpr double passes = 10. - 2.;
constexpr size_t key_count = 2'000'000'000;
constexpr double key_bytes = 50.;
constexpr double index_bytes = 32.;
constexpr double avail_bytes = 1024 * 1024 * 1024;
constexpr double MB = 1024 * 1024;

int main() {
  // Partial algorithm for hash compression
  vector<Ty> v(key_count);
  iota(begin(v), end(v), 0);
  const auto h = 1 | static_cast<Ty>(key_count * h_factor);
  transform(begin(v), end(v), begin(v), [](Ty x) {
    return hash64shift(x) % h;
  });
  sort(begin(v), end(v));
  const auto uniq_end = rem_dupl(begin(v), end(v));
  v.erase(uniq_end, end(v));
  adjacent_difference(begin(v), end(v), begin(v));
  const auto max_diff = *max_element(begin(v), end(v));
  vector<char> vc(v.size());
  copy(begin(v), end(v), begin(vc));
  const auto bits = est_compr_bits(vc);

  // Calculations
  const auto uniq_keys = uniq_end - begin(v);
  const auto uniq_frac = static_cast<double>(uniq_keys) / key_count;
  const auto dupl_frac = pow((1 - uniq_frac), passes);
  const auto uncompr_bytes = dupl_frac * key_count * key_bytes;
  const auto bits_per_key = bits / uniq_keys;
  const auto compressed_bytes = bits_per_key * key_count / 8;
  const auto bitmap_bytes = key_count / 8;
  const auto total_bytes = compressed_bytes + uncompr_bytes + bitmap_bytes;
  const auto index_entries = (avail_bytes - total_bytes) / index_bytes;
  const auto keys_per_ind = key_count / index_entries;

  // Printing results
  //for (auto x: vc) { cout << (int)x << '\n'; }
  cout << "Bits per key:            " << bits_per_key << '\n';
  cout << "Compressed MB:           " << compressed_bytes / MB << '\n';
  cout << "Uncompressed MB:         " << uncompr_bytes / MB << '\n';
  cout << "Bitmap MB:               " << bitmap_bytes / MB << '\n';
  cout << "MB used:                 " << total_bytes / MB << '\n';
  cout << "Index entries:           " << index_entries << '\n';
  cout << "Indexed fragment size:   " << keys_per_ind << '\n';
  cout << "Unique hash fraction:    " << uniq_frac << '\n';
  cout << "Maxumum hash difference: " << max_diff << '\n';
}