#ifndef BLOOM_FILTER_H
#define BLOOM_FILTER_H
#include <vector>
#include <array>
//#include "MurmurHash3.h"
void MurmurHash3_x64_128(const void *key, int len, uint32_t seed, void *out)
{
// emulate MurmurHash3_x64_128
size_t h = std::hash<std::string>()(std::string((const char*)key, len));
size_t *s = reinterpret_cast<size_t*>(out);
for (int i = 0; i < 128; i += 8*sizeof(size_t))
*s++ = h;
}
//basic structure of a bloom filter object
struct BloomFilter {
BloomFilter(size_t size, uint8_t numHashes);
void add(const uint8_t *data, std::size_t len);
bool possiblyContains(const uint8_t *data, std::size_t len) const;
private:
uint8_t m_numHashes;
std::vector<bool> m_bits;
};
//Bloom filter constructor
BloomFilter::BloomFilter(size_t size, uint8_t numHashes)
: m_bits(size),
m_numHashes(numHashes) {}
//Hash array created using the MurmurHash3 code
static std::array<uint64_t, 2> myhash(const uint8_t *data, std::size_t len)
{
std::array<uint64_t, 2> hashValue;
MurmurHash3_x64_128(data, len, 0, hashValue.data());
return hashValue;
}
//Hash array created using the MurmurHash3 code
inline size_t nthHash(int n,
uint64_t hashA,
uint64_t hashB,
size_t filterSize) {
return (hashA + n * hashB) % filterSize; // <- not sure if that is OK, perhaps it is.
}
//Adds an element to the array
void BloomFilter::add(const uint8_t *data, std::size_t len) {
auto hashValues = myhash(data, len);
for (int n = 0; n < m_numHashes; n++)
{
m_bits[nthHash(n, hashValues[0], hashValues[1], m_bits.size())] = true;
}
}
//Returns true or false based on a probabilistic assesment of the array using MurmurHash3
bool BloomFilter::possiblyContains(const uint8_t *data, std::size_t len) const {
auto hashValues = myhash(data, len);
for (int n = 0; n < m_numHashes; n++)
{
if (!m_bits[nthHash(n, hashValues[0], hashValues[1], m_bits.size())])
{
return false;
}
}
return true;
}
#endif
#include <functional>
#include <iostream>
#include <assert.h>
using namespace std;
int main()
{
BloomFilter bf(1024 * 1024, 5);
std::string s = "12345";
bf.add((uint8_t*)s.c_str(), s.size());
bool possiblyContains = bf.possiblyContains((uint8_t*)s.c_str(), s.size());
cout << "possible: " << possiblyContains << endl;
assert(possiblyContains);
}
I2lmbmRlZiBCTE9PTV9GSUxURVJfSAojZGVmaW5lIEJMT09NX0ZJTFRFUl9ICgojaW5jbHVkZSA8dmVjdG9yPgojaW5jbHVkZSA8YXJyYXk+CgovLyNpbmNsdWRlICJNdXJtdXJIYXNoMy5oIgp2b2lkIE11cm11ckhhc2gzX3g2NF8xMjgoY29uc3Qgdm9pZCAqa2V5LCBpbnQgbGVuLCB1aW50MzJfdCBzZWVkLCB2b2lkICpvdXQpCnsKICAgIC8vIGVtdWxhdGUgTXVybXVySGFzaDNfeDY0XzEyOAogICAgc2l6ZV90IGggPSBzdGQ6Omhhc2g8c3RkOjpzdHJpbmc+KCkoc3RkOjpzdHJpbmcoKGNvbnN0IGNoYXIqKWtleSwgbGVuKSk7CiAgICBzaXplX3QgKnMgPSByZWludGVycHJldF9jYXN0PHNpemVfdCo+KG91dCk7CiAgICBmb3IgKGludCBpID0gMDsgaSA8IDEyODsgaSArPSA4KnNpemVvZihzaXplX3QpKQogICAgICAgICpzKysgPSBoOwp9CgovL2Jhc2ljIHN0cnVjdHVyZSBvZiBhIGJsb29tIGZpbHRlciBvYmplY3QKc3RydWN0IEJsb29tRmlsdGVyIHsKICAgIEJsb29tRmlsdGVyKHNpemVfdCBzaXplLCB1aW50OF90IG51bUhhc2hlcyk7CiAgICB2b2lkIGFkZChjb25zdCB1aW50OF90ICpkYXRhLCBzdGQ6OnNpemVfdCBsZW4pOwogICAgYm9vbCBwb3NzaWJseUNvbnRhaW5zKGNvbnN0IHVpbnQ4X3QgKmRhdGEsIHN0ZDo6c2l6ZV90IGxlbikgY29uc3Q7CnByaXZhdGU6CiAgICB1aW50OF90IG1fbnVtSGFzaGVzOwogICAgc3RkOjp2ZWN0b3I8Ym9vbD4gbV9iaXRzOwp9OwovL0Jsb29tIGZpbHRlciBjb25zdHJ1Y3RvcgpCbG9vbUZpbHRlcjo6Qmxvb21GaWx0ZXIoc2l6ZV90IHNpemUsIHVpbnQ4X3QgbnVtSGFzaGVzKQogICAgOiBtX2JpdHMoc2l6ZSksCiAgICBtX251bUhhc2hlcyhudW1IYXNoZXMpIHt9Ci8vSGFzaCBhcnJheSBjcmVhdGVkIHVzaW5nIHRoZSBNdXJtdXJIYXNoMyBjb2RlCnN0YXRpYyBzdGQ6OmFycmF5PHVpbnQ2NF90LCAyPiBteWhhc2goY29uc3QgdWludDhfdCAqZGF0YSwgc3RkOjpzaXplX3QgbGVuKQp7CiAgICBzdGQ6OmFycmF5PHVpbnQ2NF90LCAyPiBoYXNoVmFsdWU7CiAgICBNdXJtdXJIYXNoM194NjRfMTI4KGRhdGEsIGxlbiwgMCwgaGFzaFZhbHVlLmRhdGEoKSk7CiAgICByZXR1cm4gaGFzaFZhbHVlOwp9Ci8vSGFzaCBhcnJheSBjcmVhdGVkIHVzaW5nIHRoZSBNdXJtdXJIYXNoMyBjb2RlCmlubGluZSBzaXplX3QgbnRoSGFzaChpbnQgbiwKICAgIHVpbnQ2NF90IGhhc2hBLAogICAgdWludDY0X3QgaGFzaEIsCiAgICBzaXplX3QgZmlsdGVyU2l6ZSkgewogICAgcmV0dXJuIChoYXNoQSArIG4gKiBoYXNoQikgJSBmaWx0ZXJTaXplOyAvLyA8LSBub3Qgc3VyZSBpZiB0aGF0IGlzIE9LLCBwZXJoYXBzIGl0IGlzLgp9Ci8vQWRkcyBhbiBlbGVtZW50IHRvIHRoZSBhcnJheQp2b2lkIEJsb29tRmlsdGVyOjphZGQoY29uc3QgdWludDhfdCAqZGF0YSwgc3RkOjpzaXplX3QgbGVuKSB7CiAgICBhdXRvIGhhc2hWYWx1ZXMgPSBteWhhc2goZGF0YSwgbGVuKTsKICAgIGZvciAoaW50IG4gPSAwOyBuIDwgbV9udW1IYXNoZXM7IG4rKykKICAgIHsKICAgICAgICBtX2JpdHNbbnRoSGFzaChuLCBoYXNoVmFsdWVzWzBdLCBoYXNoVmFsdWVzWzFdLCBtX2JpdHMuc2l6ZSgpKV0gPSB0cnVlOwogICAgfQp9Ci8vUmV0dXJucyB0cnVlIG9yIGZhbHNlIGJhc2VkIG9uIGEgcHJvYmFiaWxpc3RpYyBhc3Nlc21lbnQgb2YgdGhlIGFycmF5ICAgICAgICAgdXNpbmcgTXVybXVySGFzaDMKYm9vbCBCbG9vbUZpbHRlcjo6cG9zc2libHlDb250YWlucyhjb25zdCB1aW50OF90ICpkYXRhLCBzdGQ6OnNpemVfdCAgIGxlbikgY29uc3QgewogICAgYXV0byBoYXNoVmFsdWVzID0gbXloYXNoKGRhdGEsIGxlbik7CiAgICBmb3IgKGludCBuID0gMDsgbiA8IG1fbnVtSGFzaGVzOyBuKyspCiAgICB7CiAgICAgICAgaWYgKCFtX2JpdHNbbnRoSGFzaChuLCBoYXNoVmFsdWVzWzBdLCBoYXNoVmFsdWVzWzFdLCBtX2JpdHMuc2l6ZSgpKV0pCiAgICAgICAgewogICAgICAgICAgICByZXR1cm4gZmFsc2U7CiAgICAgICAgfQogICAgfQogICAgcmV0dXJuIHRydWU7Cn0KI2VuZGlmCgojaW5jbHVkZSA8ZnVuY3Rpb25hbD4KI2luY2x1ZGUgPGlvc3RyZWFtPgojaW5jbHVkZSA8YXNzZXJ0Lmg+Cgp1c2luZyBuYW1lc3BhY2Ugc3RkOwoKaW50IG1haW4oKQp7CiAgICBCbG9vbUZpbHRlciBiZigxMDI0ICogMTAyNCwgNSk7CiAgICBzdGQ6OnN0cmluZyBzID0gIjEyMzQ1IjsKICAgIGJmLmFkZCgodWludDhfdCopcy5jX3N0cigpLCBzLnNpemUoKSk7CiAgICBib29sIHBvc3NpYmx5Q29udGFpbnMgID0gYmYucG9zc2libHlDb250YWlucygodWludDhfdCopcy5jX3N0cigpLCBzLnNpemUoKSk7CiAgICBjb3V0IDw8ICJwb3NzaWJsZTogIiA8PCBwb3NzaWJseUNvbnRhaW5zIDw8IGVuZGw7CiAgICBhc3NlcnQocG9zc2libHlDb250YWlucyk7Cn0K