#include #include #include #include #include #include #include #include template struct cast_to_uint { // This is identical to std::hash, but it optimizes differently! constexpr std::size_t operator()(T const* k) const { return reinterpret_cast(k); } }; template struct right_shift { // This actually makes sense for N = 2 or N = 3, since the low 3 bits // are always zero, for larger N this only discards entropy. // Surprisingly, this is not at all a bad hash, most of the time // among the fastest three! constexpr std::size_t operator()(T const* k) const { return (size_t) k>>N; } }; template struct simple_xorshift { // Marsaglia PRNG used to hash bits, large shifts used // to fill the constant low zero-bits with entropy // Mixes bits very well considering it's about 2x the speed of murmur, // but no overall advantage in hash table performance std::size_t operator()(T const* k) const { unsigned int x = (unsigned int) k; x ^= x >> 11; x ^= x << 7; x ^= x >> 12; return x; } }; template struct double_xorshift { // Same as above, but combining two different generators // Sadly 50% slower on x86 due to lack of registers // (amd64 implementation should be +/- 1 clock tick of simple version) std::size_t operator()(T const* k) const { unsigned int x = (unsigned int) k; unsigned int y = x; x ^= x << 14; x ^= x >> 13; x ^= x << 15; y ^= y << 11; y ^= y >> 19; y ^= y << 8; return x+y; } }; template struct djb2 { // standard implementation, // hardcoded input size = 4 std::size_t operator()(T const* k) const { unsigned int x = (unsigned int) k; unsigned int h = 5381; h = h*33 + (x & 0xff); h = h*33 + (x>>8 & 0xff); h = h*33 + (x>>16 & 0xff); h = h*33 + (x>>24 & 0xff); return h; } }; template struct djb2_mod { // Same as above, except for using larger seed so // overflow happens earlier (since only 4 bytes of input) std::size_t operator()(T const* k) const { unsigned int x = (unsigned int) k; unsigned int h = 715225739; h = h*33 + (x & 0xff); h = h*33 + (x>>8 & 0xff); h = h*33 + (x>>16 & 0xff); h = h*33 + (x>>24 & 0xff); return h; } }; template struct sdbm { // Standard implementation, hardcoded input size = 4 std::size_t operator()(T const* k) const { unsigned int x = (unsigned int) k; unsigned int h = 0; h = h * 65599 + (x & 0xff); h = h * 65599 + (x>>8 & 0xff); h = h * 65599 + (x>>16 & 0xff); h = h * 65599 + (x>>24 & 0xff); return h; } }; template struct yet_another_lc { // Same design as djb and sdbm, using different prime multiplier. // This is the last one of about 50 different primes beween 7 and 17 bits // that I've tried. None truly performed any better (though some did worse). // This proves the common knowledge that finding a good (or better) hash function // is very hard. std::size_t operator()(T const* k) const { unsigned int x = (unsigned int) k; unsigned int h = 174440041; h = h * 257 + (x & 0xff); h = h * 257 + (x>>8 & 0xff); h = h * 257 + (x>>16 & 0xff); h = h * 257 + (x>>24 & 0xff); return h; } }; template struct murmur2 { // Hardcoded input size = 4 std::size_t operator()(T const* k) const { const unsigned int m = 0x5bd1e995; uint32_t h = 4; unsigned int d = (unsigned int) k; d *= m; d ^= d >> 24; d *= m; h *= m; h ^= d; h ^= h >> 13; h *= m; h ^= h >> 15; return h; } }; template struct murmur3 { // Hardcoded input size = 4. // Omitted xor with input size in the final mix step, as that's a constant, // and this only flips a single bit (waste of CPU cycles). template constexpr uint32_t rotl(uint32_t x) { return (x << r) | (x >> (32 - r)); } unsigned int fmix32( unsigned int h ) const { h ^= h >> 16; h *= 0x85ebca6b; h ^= h >> 13; h *= 0xc2b2ae35; h ^= h >> 16; return h; } std::size_t operator()(T const* k) const { constexpr unsigned int c1 = 0xcc9e2d51; constexpr unsigned int c2 = 0x1b873593; unsigned int h = (unsigned int) k; h *= c1; h = rotl<15>(h); h *= c2; h = rotl<13>(h); h = h*5+0xe6546b64; return fmix32(h); } }; // Copy-paste as posted on SO template struct MyTemplatePointerHash1 { size_t operator()(const Tval* val) const { static const size_t shift = (size_t)log2(1 + sizeof(Tval)); return (size_t)(val) >> shift; } }; // Copy-paste as posted on SO struct BasileStarynkevitch { size_t operator()(const void* val) const { uintptr_t ad = (uintptr_t) val; return (size_t) ((13*ad) ^ (ad >> 15)); } }; template void benchmark_hash(const char* name) { using namespace std::chrono; printf("%-25s ", name); H hash; { auto start = system_clock::now(); int unused = 0; for(unsigned int i = 0; i < 4*1000*1000*1000; ++i) unused += hash((int*)i); __attribute__ (( unused )) volatile int optimizer_sink = unused; auto stop = system_clock::now(); printf("%-10lld\n", duration_cast(stop - start).count()); } } template void test(std::vector const& vec, unsigned int bucket_count, const char* name) { using namespace std::chrono; printf("%-25s ", name); { auto start = system_clock::now(); for(unsigned int iterations = 0; iterations < 50; ++iterations) { std::unordered_map map(bucket_count); for(auto& elem : vec) map.insert(std::pair(elem, 1)); } auto stop = system_clock::now(); printf("%-10lld ", duration_cast(stop - start).count()); } { std::unordered_map map(bucket_count); for(auto& elem : vec) map.insert(std::pair(elem, 1)); auto start = system_clock::now(); int unused = 0; for(unsigned int i = 0; i < 100*1000*1000; ++i) { auto it = map.find(vec[(rand() ^ (rand()<<16)) % vec.size()]); unused += it->second; } __attribute__ (( unused )) volatile int optimizer_sink = unused; auto stop = system_clock::now(); printf("%-10lld ", duration_cast(stop - start).count()); } puts(""); fflush(stdout); } template void do_tests(unsigned int num_elements, bool shuffle) { puts ("--------------------------------"); printf("sizeof(T) = %u\n", sizeof(T)); printf("sizeof(T*) = %u\n", sizeof(T*)); printf("insertion order = %s\n\n", (shuffle ? "random" : "sequential")); printf("dataset size = %8u elements\n", num_elements); std::vector vec; vec.reserve(num_elements); for(unsigned int i = 0; i < num_elements; ++i) vec.push_back(new T); for(auto& elem : vec) delete elem; // don't actually need these, only need the addresses if(shuffle) std::random_shuffle(vec.begin(), vec.end()); unsigned int seed = time(0); // random, but identical access pattern for all tests printf("%-25s %-10s %-10s\n", "name", "insert", "access"); srand(seed); test> (vec, num_elements, "std::hash"); srand(seed); test> (vec, num_elements, "reinterpret_cast"); srand(seed); test> (vec, num_elements, "djb2"); srand(seed); test> (vec, num_elements, "djb2_mod"); srand(seed); test> (vec, num_elements, "sdbm"); srand(seed); test> (vec, num_elements, "yet_another_lc"); srand(seed); test> (vec, num_elements, "murmur2"); srand(seed); test> (vec, num_elements, "murmur3"); srand(seed); test> (vec, num_elements, "simple_xorshift"); srand(seed); test> (vec, num_elements, "double_xorshift"); srand(seed); test> (vec, num_elements, "right_shift_2"); srand(seed); test> (vec, num_elements, "right_shift_3"); srand(seed); test> (vec, num_elements, "right_shift_4"); srand(seed); test> (vec, num_elements, "right_shift_5"); srand(seed); test> (vec, num_elements, "right_shift_8"); srand(seed); test> (vec, num_elements, "right_shift_12"); srand(seed); test> (vec, num_elements, "MyTemplatePointerHash1"); srand(seed); test (vec, num_elements, "BasileStarynkevitch"); puts(""); } #include struct object64 { char x[64]; }; struct object256 { char x[256]; }; struct object1024 { char x[1024]; }; int main() { timeBeginPeriod(1); puts("Benchmarking hash funcs..."); benchmark_hash> ("std::hash"); benchmark_hash> ("reinterpret_cast"); benchmark_hash> ("djb2"); benchmark_hash> ("djb2_mod"); benchmark_hash> ("sdbm"); benchmark_hash> ("yet_another_lc"); benchmark_hash> ("murmur2"); benchmark_hash> ("murmur3"); benchmark_hash> ("simple_xorshift"); benchmark_hash> ("double_xorshift"); benchmark_hash> ("right_shift_2"); benchmark_hash> ("right_shift_3"); benchmark_hash> ("right_shift_4"); benchmark_hash> ("right_shift_5"); benchmark_hash> ("right_shift_8"); benchmark_hash> ("right_shift_12"); benchmark_hash> ("MyTemplatePointerHash1"); benchmark_hash ("BasileStarynkevitch"); puts(""); do_tests(50000, false); do_tests(50000, true); do_tests(1000000, false); do_tests(1000000, true); do_tests(50000, false); do_tests(50000, true); do_tests(1000000, false); do_tests(1000000, true); do_tests(50000, false); do_tests(50000, true); do_tests(1000000, false); do_tests(1000000, true); do_tests(50000, false); do_tests(50000, true); do_tests(1000000, false); do_tests(1000000, true); timeEndPeriod(1); return 0; }