fork download
  1. #include <iostream>
  2. #include <vector>
  3. #include <functional>
  4. #include <algorithm>
  5. #include <cmath>
  6. #include <iomanip>
  7. #include <random>
  8.  
  9. using namespace std;
  10.  
  11. template <typename T>
  12. double shannon_entropy(T first, T last)
  13. {
  14. size_t frequencies_count{};
  15. double entropy = 0.0;
  16.  
  17. std::for_each(first, last, [&entropy, &frequencies_count] (auto item) mutable {
  18.  
  19. if (0. == item) return;
  20. double fp_item = static_cast<double>(item);
  21. entropy += fp_item * log2(fp_item);
  22. ++frequencies_count;
  23. });
  24.  
  25. if (frequencies_count > 256) {
  26. return -1.0;
  27. }
  28.  
  29. return -entropy;
  30. }
  31.  
  32. std::vector<uint8_t> generate_random_sequence(size_t sequence_size)
  33. {
  34. std::vector<uint8_t> random_sequence;
  35. std::random_device rnd_device;
  36.  
  37. std::cout << "Random device entropy: " << rnd_device.entropy() << '\n';
  38.  
  39. std::mt19937 mersenne_engine(rnd_device());
  40. std::uniform_int_distribution<unsigned> dist(0, 255);
  41.  
  42. auto gen = std::bind(dist, mersenne_engine);
  43. random_sequence.resize(sequence_size);
  44. std::generate(random_sequence.begin(), random_sequence.end(), gen);
  45. return std::move(random_sequence);
  46. }
  47.  
  48. std::vector<double> read_random_probabilities(size_t sequence_size)
  49. {
  50. std::vector<size_t> bytes_distribution(256);
  51. std::vector<double> bytes_frequencies(256);
  52.  
  53. std::vector<uint8_t> random_sequence = generate_random_sequence(sequence_size);
  54.  
  55. size_t rnd_seq_size = random_sequence.size();
  56. std::for_each(random_sequence.begin(), random_sequence.end(), [&](uint8_t b) mutable {
  57. ++bytes_distribution[b];
  58. });
  59.  
  60. std::transform(bytes_distribution.begin(), bytes_distribution.end(), bytes_frequencies.begin(),
  61. [&rnd_seq_size](size_t item) {
  62. return static_cast<double>(item) / rnd_seq_size;
  63. });
  64. return std::move(bytes_frequencies);
  65. }
  66.  
  67. int main(int argc, char* argv[]) {
  68.  
  69. size_t sequence_size = 1024 * 1024;
  70. std::vector<double> bytes_frequencies = read_random_probabilities(sequence_size);
  71. double entropy = shannon_entropy(bytes_frequencies.begin(), bytes_frequencies.end());
  72.  
  73. std::cout << "Sequence entropy: " << std::setprecision(16) << entropy << std::endl;
  74.  
  75. std::cout << "Min possible file size assuming max theoretical compression efficiency:\n";
  76. std::cout << (entropy * sequence_size) << " in bits\n";
  77. std::cout << ((entropy * sequence_size) / 8) << " in bytes\n";
  78.  
  79. return EXIT_SUCCESS;
  80. }
  81.  
Success #stdin #stdout 0.01s 3472KB
stdin
Standard input is empty
stdout
Random device entropy: 0
Sequence entropy: 7.999833679669222
Min possible file size assuming max theoretical compression efficiency:
8388433.600492834 in bits
1048554.200061604 in bytes