#include <iostream>
#include <vector>
#include <functional>
#include <algorithm>
#include <cmath>
#include <iomanip>
#include <random>

using namespace std;

template <typename T>
double shannon_entropy(T first, T last)
{
    size_t frequencies_count{};
    double entropy = 0.0;

    std::for_each(first, last, [&entropy, &frequencies_count] (auto item) mutable {
        
        if (0. == item) return;
        double fp_item = static_cast<double>(item);
        entropy += fp_item * log2(fp_item);
        ++frequencies_count;
    });

    if (frequencies_count > 256) {
        return -1.0;
    }

    return -entropy;
}

std::vector<uint8_t> generate_random_sequence(size_t sequence_size)
{
    std::vector<uint8_t> random_sequence;
    std::random_device rnd_device;
    
    std::cout << "Random device entropy: " << rnd_device.entropy() << '\n';
    
    std::mt19937 mersenne_engine(rnd_device());
    std::uniform_int_distribution<unsigned> dist(0, 255);

    auto gen = std::bind(dist, mersenne_engine);
    random_sequence.resize(sequence_size);
    std::generate(random_sequence.begin(), random_sequence.end(), gen);
    return std::move(random_sequence);
}

std::vector<double> read_random_probabilities(size_t sequence_size)
{
    std::vector<size_t> bytes_distribution(256);
    std::vector<double> bytes_frequencies(256);

    std::vector<uint8_t> random_sequence = generate_random_sequence(sequence_size);

    size_t rnd_seq_size = random_sequence.size();
    std::for_each(random_sequence.begin(), random_sequence.end(), [&](uint8_t b) mutable {
        ++bytes_distribution[b];
    });

    std::transform(bytes_distribution.begin(), bytes_distribution.end(), bytes_frequencies.begin(),
        [&rnd_seq_size](size_t item) {
        return static_cast<double>(item) / rnd_seq_size;
    });
    return std::move(bytes_frequencies);
}

int main(int argc, char* argv[]) {

    size_t sequence_size = 1024 * 1024;
    std::vector<double> bytes_frequencies = read_random_probabilities(sequence_size);
    double entropy = shannon_entropy(bytes_frequencies.begin(), bytes_frequencies.end());
    
    std::cout << "Sequence entropy: " << std::setprecision(16) << entropy << std::endl;

    std::cout << "Min possible file size assuming max theoretical compression efficiency:\n";
    std::cout << (entropy * sequence_size) << " in bits\n";
    std::cout << ((entropy * sequence_size) / 8) << " in bytes\n";

    return EXIT_SUCCESS;
}
