#include <unordered_map>
#include <vector>
#include <algorithm>
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <ctime>
#include <cmath>
template<typename T> struct cast_to_uint
{
// This is identical to std::hash, but it optimizes differently!
constexpr std::size_t operator()(T const* k) const
{
return reinterpret_cast<size_t>(k);
}
};
template<typename T, unsigned int N> struct right_shift
{
// This actually makes sense for N = 2 or N = 3, since the low 3 bits
// are always zero, for larger N this only discards entropy.
// Surprisingly, this is not at all a bad hash, most of the time
// among the fastest three!
constexpr std::size_t operator()(T const* k) const
{
return (size_t) k>>N;
}
};
template<typename T> struct simple_xorshift
{
// Marsaglia PRNG used to hash bits, large shifts used
// to fill the constant low zero-bits with entropy
// Mixes bits very well considering it's about 2x the speed of murmur,
// but no overall advantage in hash table performance
std::size_t operator()(T const* k) const
{
unsigned int x = (unsigned int) k;
x ^= x >> 11;
x ^= x << 7;
x ^= x >> 12;
return x;
}
};
template<typename T> struct double_xorshift
{
// Same as above, but combining two different generators
// Sadly 50% slower on x86 due to lack of registers
// (amd64 implementation should be +/- 1 clock tick of simple version)
std::size_t operator()(T const* k) const
{
unsigned int x = (unsigned int) k;
unsigned int y = x;
x ^= x << 14;
x ^= x >> 13;
x ^= x << 15;
y ^= y << 11;
y ^= y >> 19;
y ^= y << 8;
return x+y;
}
};
template<typename T> struct djb2
{
// standard implementation,
// hardcoded input size = 4
std::size_t operator()(T const* k) const
{
unsigned int x = (unsigned int) k;
unsigned int h = 5381;
h = h*33 + (x & 0xff);
h = h*33 + (x>>8 & 0xff);
h = h*33 + (x>>16 & 0xff);
h = h*33 + (x>>24 & 0xff);
return h;
}
};
template<typename T> struct djb2_mod
{
// Same as above, except for using larger seed so
// overflow happens earlier (since only 4 bytes of input)
std::size_t operator()(T const* k) const
{
unsigned int x = (unsigned int) k;
unsigned int h = 715225739;
h = h*33 + (x & 0xff);
h = h*33 + (x>>8 & 0xff);
h = h*33 + (x>>16 & 0xff);
h = h*33 + (x>>24 & 0xff);
return h;
}
};
template<typename T> struct sdbm
{
// Standard implementation, hardcoded input size = 4
std::size_t operator()(T const* k) const
{
unsigned int x = (unsigned int) k;
unsigned int h = 0;
h = h * 65599 + (x & 0xff);
h = h * 65599 + (x>>8 & 0xff);
h = h * 65599 + (x>>16 & 0xff);
h = h * 65599 + (x>>24 & 0xff);
return h;
}
};
template<typename T> struct yet_another_lc
{
// Same design as djb and sdbm, using different prime multiplier.
// This is the last one of about 50 different primes beween 7 and 17 bits
// that I've tried. None truly performed any better (though some did worse).
// This proves the common knowledge that finding a good (or better) hash function
// is very hard.
std::size_t operator()(T const* k) const
{
unsigned int x = (unsigned int) k;
unsigned int h = 174440041;
h = h * 257 + (x & 0xff);
h = h * 257 + (x>>8 & 0xff);
h = h * 257 + (x>>16 & 0xff);
h = h * 257 + (x>>24 & 0xff);
return h;
}
};
template<typename T> struct murmur2
{
// Hardcoded input size = 4
std::size_t operator()(T const* k) const
{
const unsigned int m = 0x5bd1e995;
uint32_t h = 4;
unsigned int d = (unsigned int) k;
d *= m;
d ^= d >> 24;
d *= m;
h *= m;
h ^= d;
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
};
template<typename T> struct murmur3
{
// Hardcoded input size = 4.
// Omitted xor with input size in the final mix step, as that's a constant,
// and this only flips a single bit (waste of CPU cycles).
template<int r> constexpr uint32_t rotl(uint32_t x) { return (x << r) | (x >> (32 - r)); }
unsigned int fmix32( unsigned int h ) const
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
std::size_t operator()(T const* k) const
{
constexpr unsigned int c1 = 0xcc9e2d51;
constexpr unsigned int c2 = 0x1b873593;
unsigned int h = (unsigned int) k;
h *= c1;
h = rotl<15>(h);
h *= c2;
h = rotl<13>(h);
h = h*5+0xe6546b64;
return fmix32(h);
}
};
// Copy-paste as posted on SO
template<typename Tval>
struct MyTemplatePointerHash1 {
size_t operator()(const Tval* val) const {
static const size_t shift = (size_t)log2(1 + sizeof(Tval));
return (size_t)(val) >> shift;
}
};
// Copy-paste as posted on SO
struct BasileStarynkevitch {
size_t operator()(const void* val) const {
uintptr_t ad = (uintptr_t) val;
return (size_t) ((13*ad) ^ (ad >> 15));
}
};
template<typename H> void benchmark_hash(const char* name)
{
using namespace std::chrono;
printf("%-25s ", name);
H hash;
{
auto start = system_clock::now();
int unused = 0;
for(unsigned int i = 0; i < 4*1000*1000*1000; ++i)
unused += hash((int*)i);
__attribute__ (( unused )) volatile int optimizer_sink = unused;
auto stop = system_clock::now();
printf("%-10lld\n", duration_cast<milliseconds>(stop - start).count());
}
}
template<typename T, typename H> void test(std::vector<T*> const& vec, unsigned int bucket_count, const char* name)
{
using namespace std::chrono;
printf("%-25s ", name);
{
auto start = system_clock::now();
for(unsigned int iterations = 0; iterations < 50; ++iterations)
{
std::unordered_map<T*, int, H> map(bucket_count);
for(auto& elem : vec) map.insert(std::pair<T*, int>(elem, 1));
}
auto stop = system_clock::now();
printf("%-10lld ", duration_cast<milliseconds>(stop - start).count());
}
{
std::unordered_map<T*, int, H> map(bucket_count);
for(auto& elem : vec) map.insert(std::pair<T*, int>(elem, 1));
auto start = system_clock::now();
int unused = 0;
for(unsigned int i = 0; i < 100*1000*1000; ++i)
{
auto it = map.find(vec[(rand() ^ (rand()<<16)) % vec.size()]);
unused += it->second;
}
__attribute__ (( unused )) volatile int optimizer_sink = unused;
auto stop = system_clock::now();
printf("%-10lld ", duration_cast<milliseconds>(stop - start).count());
}
puts("");
fflush(stdout);
}
template<typename T> void do_tests(unsigned int num_elements, bool shuffle)
{
puts ("--------------------------------");
printf("sizeof(T) = %u\n", sizeof(T));
printf("sizeof(T*) = %u\n", sizeof(T*));
printf("insertion order = %s\n\n", (shuffle ? "random" : "sequential"));
printf("dataset size = %8u elements\n", num_elements);
std::vector<T*> vec;
vec.reserve(num_elements);
for(unsigned int i = 0; i < num_elements; ++i)
vec.push_back(new T);
for(auto& elem : vec)
delete elem; // don't actually need these, only need the addresses
if(shuffle) std::random_shuffle(vec.begin(), vec.end());
unsigned int seed = time(0); // random, but identical access pattern for all tests
printf("%-25s %-10s %-10s\n", "name", "insert", "access");
srand(seed); test<T, std::hash<T*>> (vec, num_elements, "std::hash");
srand(seed); test<T, cast_to_uint<T>> (vec, num_elements, "reinterpret_cast");
srand(seed); test<T, djb2<T>> (vec, num_elements, "djb2");
srand(seed); test<T, djb2_mod<T>> (vec, num_elements, "djb2_mod");
srand(seed); test<T, sdbm<T>> (vec, num_elements, "sdbm");
srand(seed); test<T, yet_another_lc<T>> (vec, num_elements, "yet_another_lc");
srand(seed); test<T, murmur2<T>> (vec, num_elements, "murmur2");
srand(seed); test<T, murmur3<T>> (vec, num_elements, "murmur3");
srand(seed); test<T, simple_xorshift<T>> (vec, num_elements, "simple_xorshift");
srand(seed); test<T, double_xorshift<T>> (vec, num_elements, "double_xorshift");
srand(seed); test<T, right_shift<T, 2>> (vec, num_elements, "right_shift_2");
srand(seed); test<T, right_shift<T, 3>> (vec, num_elements, "right_shift_3");
srand(seed); test<T, right_shift<T, 4>> (vec, num_elements, "right_shift_4");
srand(seed); test<T, right_shift<T, 5>> (vec, num_elements, "right_shift_5");
srand(seed); test<T, right_shift<T, 8>> (vec, num_elements, "right_shift_8");
srand(seed); test<T, right_shift<T, 12>> (vec, num_elements, "right_shift_12");
srand(seed); test<T, MyTemplatePointerHash1<T>> (vec, num_elements, "MyTemplatePointerHash1");
srand(seed); test<T, BasileStarynkevitch> (vec, num_elements, "BasileStarynkevitch");
puts("");
}
#include <windows.h>
struct object64 { char x[64]; };
struct object256 { char x[256]; };
struct object1024 { char x[1024]; };
int main()
{
timeBeginPeriod(1);
puts("Benchmarking hash funcs...");
benchmark_hash<std::hash<int*>> ("std::hash");
benchmark_hash<cast_to_uint<int>> ("reinterpret_cast");
benchmark_hash<djb2<int>> ("djb2");
benchmark_hash<djb2_mod<int>> ("djb2_mod");
benchmark_hash<sdbm<int>> ("sdbm");
benchmark_hash<yet_another_lc<int>> ("yet_another_lc");
benchmark_hash<murmur2<int>> ("murmur2");
benchmark_hash<murmur3<int>> ("murmur3");
benchmark_hash<simple_xorshift<int>> ("simple_xorshift");
benchmark_hash<double_xorshift<int>> ("double_xorshift");
benchmark_hash<right_shift<int, 2>> ("right_shift_2");
benchmark_hash<right_shift<int, 3>> ("right_shift_3");
benchmark_hash<right_shift<int, 4>> ("right_shift_4");
benchmark_hash<right_shift<int, 5>> ("right_shift_5");
benchmark_hash<right_shift<int, 8>> ("right_shift_8");
benchmark_hash<right_shift<int, 12>> ("right_shift_12");
benchmark_hash<MyTemplatePointerHash1<int>> ("MyTemplatePointerHash1");
benchmark_hash<BasileStarynkevitch> ("BasileStarynkevitch");
puts("");
do_tests<int>(50000, false);
do_tests<int>(50000, true);
do_tests<int>(1000000, false);
do_tests<int>(1000000, true);
do_tests<object64>(50000, false);
do_tests<object64>(50000, true);
do_tests<object64>(1000000, false);
do_tests<object64>(1000000, true);
do_tests<object256>(50000, false);
do_tests<object256>(50000, true);
do_tests<object256>(1000000, false);
do_tests<object256>(1000000, true);
do_tests<object1024>(50000, false);
do_tests<object1024>(50000, true);
do_tests<object1024>(1000000, false);
do_tests<object1024>(1000000, true);
timeEndPeriod(1);
return 0;
}