fork download
  1. #include <algorithm>
  2. #include <vector>
  3. #include <stdint.h>
  4. #include <chrono>
  5. #include <stdio.h>
  6.  
  7. using namespace std::chrono;
  8.  
  9. /// XorShift32 generator; extremely fast, 2^32-1 period, way better quality
  10. /// than LCG but fail some test suites
  11. struct XorShift32 {
  12. /// This stuff allows to use this class wherever a library function
  13. /// requires a UniformRandomBitGenerator (e.g. std::shuffle)
  14. typedef uint32_t result_type;
  15. static uint32_t min() { return 1; }
  16. static uint32_t max() { return uint32_t(-1); }
  17.  
  18. /// PRNG state
  19. uint32_t y;
  20.  
  21. /// Initializes with seed
  22. XorShift32(uint32_t seed = 0) : y(seed) {
  23. if(y == 0) y = 2463534242UL;
  24. }
  25.  
  26. /// Returns a value in the range [1, 1<<32)
  27. uint32_t operator()() {
  28. y ^= (y<<13);
  29. y ^= (y>>17);
  30. y ^= (y<<15);
  31. return y;
  32. }
  33.  
  34. /// Returns a value in the range [0, limit); this conforms to the RandomFunc
  35. /// requirements for std::random_shuffle
  36. uint32_t operator()(uint32_t limit) {
  37. return (*this)()%limit;
  38. }
  39. };
  40.  
  41. std::vector<uint8_t> main_arr;
  42. std::vector<uint32_t> sec_arr;
  43.  
  44. uint8_t lookup(unsigned idx) {
  45. // extract the 2 bits of our interest from the main array
  46. uint8_t v = (main_arr[idx>>2]>>(2*(idx&3)))&3;
  47. // usual (likely) case: value between 0 and 2
  48. if(v != 3) return v;
  49. // bad case: lookup the index<<8 in the secondary array
  50. // lower_bound finds the first >=, so we don't need to mask out the value
  51. auto ptr = std::lower_bound(sec_arr.begin(), sec_arr.end(), idx<<8);
  52. #ifdef _DEBUG
  53. // some coherency checks
  54. if(ptr == sec_arr.end()) std::abort();
  55. if((*ptr >> 8) != idx) std::abort();
  56. #endif
  57. // extract our 8-bit value from the 32 bit (index, value) thingie
  58. return (*ptr) & 0xff;
  59. }
  60.  
  61. void populate(uint8_t *source, size_t size) {
  62. main_arr.clear(); sec_arr.clear();
  63. // size the main storage (round up)
  64. main_arr.resize((size+3)/4);
  65. for(size_t idx = 0; idx < size; ++idx) {
  66. uint8_t in = source[idx];
  67. uint8_t &target = main_arr[idx>>2];
  68. // if the input doesn't fit, cap to 3 and put in secondary storage
  69. if(in >= 3) {
  70. // top 24 bits: index; low 8 bit: value
  71. sec_arr.push_back((idx << 8) | in);
  72. in = 3;
  73. }
  74. // store in the target according to the position
  75. target |= in << ((idx & 3)*2);
  76. }
  77. }
  78.  
  79. volatile unsigned out;
  80.  
  81. int main() {
  82. XorShift32 xs;
  83. std::vector<uint8_t> vec;
  84. int size = 10000000;
  85. for(int i = 0; i<size; ++i) {
  86. uint32_t v = xs();
  87. if(v < 1825361101) v = 0; // 42.5%
  88. else if(v < 4080218931) v = 1; // 95.0%
  89. else if(v < 4252017623) v = 2; // 99.0%
  90. else {
  91. while((v & 0xff) < 3) v = xs();
  92. }
  93. vec.push_back(v);
  94. }
  95. populate(vec.data(), vec.size());
  96. for(int i = 0; i<10; ++i) {
  97. unsigned o = 0;
  98. auto beg = high_resolution_clock::now();
  99. for(int i = 0; i < size; ++i) {
  100. o += lookup(xs() % size);
  101. }
  102. out += o;
  103. printf("lookup: %10d µs\n",int((high_resolution_clock::now()-beg)/microseconds(1)));
  104. o = 0;
  105. beg = high_resolution_clock::now();
  106. for(int i = 0; i < size; ++i) {
  107. o += vec[xs() % size];
  108. }
  109. out += o;
  110. printf("array: %10d µs\n",int((high_resolution_clock::now()-beg)/microseconds(1)));
  111. }
  112. return 0;
  113. }
  114.  
Success #stdin #stdout 1.79s 18568KB
stdin
Standard input is empty
stdout
lookup:      77196 µs
array:      107298 µs
lookup:      60230 µs
array:      105703 µs
lookup:      60290 µs
array:      105632 µs
lookup:      60216 µs
array:      105717 µs
lookup:      60226 µs
array:      105777 µs
lookup:      60221 µs
array:      105629 µs
lookup:      60238 µs
array:      105666 µs
lookup:      60251 µs
array:      105689 µs
lookup:      60247 µs
array:      105634 µs
lookup:      60278 µs
array:      105829 µs