fork(13) download
  1. #ifndef BLOOM_FILTER_H
  2. #define BLOOM_FILTER_H
  3.  
  4. #include <vector>
  5. #include <array>
  6.  
  7. //#include "MurmurHash3.h"
  8. void MurmurHash3_x64_128(const void *key, int len, uint32_t seed, void *out)
  9. {
  10. // emulate MurmurHash3_x64_128
  11. size_t h = std::hash<std::string>()(std::string((const char*)key, len));
  12. size_t *s = reinterpret_cast<size_t*>(out);
  13. for (int i = 0; i < 128; i += 8*sizeof(size_t))
  14. *s++ = h;
  15. }
  16.  
  17. //basic structure of a bloom filter object
  18. struct BloomFilter {
  19. BloomFilter(size_t size, uint8_t numHashes);
  20. void add(const uint8_t *data, std::size_t len);
  21. bool possiblyContains(const uint8_t *data, std::size_t len) const;
  22. private:
  23. uint8_t m_numHashes;
  24. std::vector<bool> m_bits;
  25. };
  26. //Bloom filter constructor
  27. BloomFilter::BloomFilter(size_t size, uint8_t numHashes)
  28. : m_bits(size),
  29. m_numHashes(numHashes) {}
  30. //Hash array created using the MurmurHash3 code
  31. static std::array<uint64_t, 2> myhash(const uint8_t *data, std::size_t len)
  32. {
  33. std::array<uint64_t, 2> hashValue;
  34. MurmurHash3_x64_128(data, len, 0, hashValue.data());
  35. return hashValue;
  36. }
  37. //Hash array created using the MurmurHash3 code
  38. inline size_t nthHash(int n,
  39. uint64_t hashA,
  40. uint64_t hashB,
  41. size_t filterSize) {
  42. return (hashA + n * hashB) % filterSize; // <- not sure if that is OK, perhaps it is.
  43. }
  44. //Adds an element to the array
  45. void BloomFilter::add(const uint8_t *data, std::size_t len) {
  46. auto hashValues = myhash(data, len);
  47. for (int n = 0; n < m_numHashes; n++)
  48. {
  49. m_bits[nthHash(n, hashValues[0], hashValues[1], m_bits.size())] = true;
  50. }
  51. }
  52. //Returns true or false based on a probabilistic assesment of the array using MurmurHash3
  53. bool BloomFilter::possiblyContains(const uint8_t *data, std::size_t len) const {
  54. auto hashValues = myhash(data, len);
  55. for (int n = 0; n < m_numHashes; n++)
  56. {
  57. if (!m_bits[nthHash(n, hashValues[0], hashValues[1], m_bits.size())])
  58. {
  59. return false;
  60. }
  61. }
  62. return true;
  63. }
  64. #endif
  65.  
  66. #include <functional>
  67. #include <iostream>
  68. #include <assert.h>
  69.  
  70. using namespace std;
  71.  
  72. int main()
  73. {
  74. BloomFilter bf(1024 * 1024, 5);
  75. std::string s = "12345";
  76. bf.add((uint8_t*)s.c_str(), s.size());
  77. bool possiblyContains = bf.possiblyContains((uint8_t*)s.c_str(), s.size());
  78. cout << "possible: " << possiblyContains << endl;
  79. assert(possiblyContains);
  80. }
  81.  
Success #stdin #stdout 0s 15376KB
stdin
Standard input is empty
stdout
possible: 1