fork download
  1. #include <vector>
  2. #include <algorithm>
  3. #include <iostream>
  4. #include <numeric>
  5. #include <cmath>
  6.  
  7. // https://g...content-available-to-author-only...b.com/Cyan4973/FiniteStateEntropy
  8. #include "fse.h"
  9.  
  10. using namespace std;
  11.  
  12. // Optimized for the case of many duplicates
  13. template<class ForwardIt>
  14. ForwardIt rem_dupl(ForwardIt first, ForwardIt last)
  15. {
  16. if (first == last)
  17. return last;
  18.  
  19. auto value = *first;
  20. bool dupl = false;
  21. ForwardIt result = first;
  22. while (++first != last) {
  23. if (value == *first) {
  24. dupl = true;
  25. }
  26. else {
  27. if (!dupl) {
  28. *result++ = value;
  29. }
  30. value = std::move(*first);
  31. dupl = false;
  32. }
  33. }
  34. if (!dupl) {
  35. *result++ = value;
  36. }
  37. return result;
  38. }
  39.  
  40. double est_compr_bits(const vector<char>& vc) {
  41. vector<char> compressed(2 * vc.size());
  42.  
  43. auto compr_res = FSE_compress(
  44. compressed.data(), compressed.size(),
  45. vc.data(), vc.size()
  46. );
  47. if (FSE_isError(compr_res)) {
  48. std::cout << "FSE error: " << FSE_getErrorName(compr_res) << "\n";
  49. return 0.;
  50. }
  51. return 8. * compr_res;
  52. }
  53.  
  54. // https://g...content-available-to-author-only...b.com/badboy/6267743
  55. uint64_t hash64shift(uint64_t key)
  56. {
  57. key = (~key) + (key << 21); // key = (key << 21) - key - 1;
  58. key = key ^ (key >> 24);
  59. key = (key + (key << 3)) + (key << 8); // key * 265
  60. key = key ^ (key >> 14);
  61. key = (key + (key << 2)) + (key << 4); // key * 21
  62. key = key ^ (key >> 28);
  63. key = key + (key << 31);
  64. return key;
  65. }
  66.  
  67. using Ty = uint32_t;
  68. constexpr double h_factor = 2.1;
  69. constexpr double passes = 10. - 2.;
  70. constexpr size_t key_count = 2'000'000'000;
  71. constexpr double key_bytes = 50.;
  72. constexpr double index_bytes = 32.;
  73. constexpr double avail_bytes = 1024 * 1024 * 1024;
  74. constexpr double MB = 1024 * 1024;
  75.  
  76. int main() {
  77. // Partial algorithm for hash compression
  78. vector<Ty> v(key_count);
  79. iota(begin(v), end(v), 0);
  80. const auto h = 1 | static_cast<Ty>(key_count * h_factor);
  81. transform(begin(v), end(v), begin(v), [](Ty x) {
  82. return hash64shift(x) % h;
  83. });
  84. sort(begin(v), end(v));
  85. const auto uniq_end = rem_dupl(begin(v), end(v));
  86. v.erase(uniq_end, end(v));
  87. adjacent_difference(begin(v), end(v), begin(v));
  88. const auto max_diff = *max_element(begin(v), end(v));
  89. vector<char> vc(v.size());
  90. copy(begin(v), end(v), begin(vc));
  91. const auto bits = est_compr_bits(vc);
  92.  
  93. // Calculations
  94. const auto uniq_keys = uniq_end - begin(v);
  95. const auto uniq_frac = static_cast<double>(uniq_keys) / key_count;
  96. const auto dupl_frac = pow((1 - uniq_frac), passes);
  97. const auto uncompr_bytes = dupl_frac * key_count * key_bytes;
  98. const auto bits_per_key = bits / uniq_keys;
  99. const auto compressed_bytes = bits_per_key * key_count / 8;
  100. const auto bitmap_bytes = key_count / 8;
  101. const auto total_bytes = compressed_bytes + uncompr_bytes + bitmap_bytes;
  102. const auto index_entries = (avail_bytes - total_bytes) / index_bytes;
  103. const auto keys_per_ind = key_count / index_entries;
  104.  
  105. // Printing results
  106. //for (auto x: vc) { cout << (int)x << '\n'; }
  107. cout << "Bits per key: " << bits_per_key << '\n';
  108. cout << "Compressed MB: " << compressed_bytes / MB << '\n';
  109. cout << "Uncompressed MB: " << uncompr_bytes / MB << '\n';
  110. cout << "Bitmap MB: " << bitmap_bytes / MB << '\n';
  111. cout << "MB used: " << total_bytes / MB << '\n';
  112. cout << "Index entries: " << index_entries << '\n';
  113. cout << "Indexed fragment size: " << keys_per_ind << '\n';
  114. cout << "Unique hash fraction: " << uniq_frac << '\n';
  115. cout << "Maxumum hash difference: " << max_diff << '\n';
  116. }
Compilation error #stdin compilation error #stdout 0s 0KB
stdin
Standard input is empty
compilation info
prog.cpp:8:17: fatal error: fse.h: No such file or directory
 #include "fse.h"
                 ^
compilation terminated.
stdout
Standard output is empty