Add a nTweak to bloom filters to tweak the seed.

2025-03-13 06:01:45 +00:00 · 2012-11-02 18:33:50 -04:00 · 2012-11-02 18:33:50 -04:00 · b1f99bed6f
commit b1f99bed6f
parent 4c8fc1a588
3 changed files with 57 additions and 24 deletions
--- a/src/bloom.cpp
+++ b/src/bloom.cpp
@ -15,7 +15,7 @@ using namespace std;

 static const unsigned char bit_mask[8] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80};

-CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate) :
+CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn) :
 // The ideal size for a bloom filter with a given number of elements and false positive rate is:
 // - nElements * log(fp rate) / ln(2)^2
 // We ignore filter parameters which will create a bloom filter larger than the protocol limits
@ -23,14 +23,15 @@ vData(min((unsigned int)(-1  / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM
 // The ideal number of hash functions is filter size * ln(2) / number of elements
 // Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
 // See http://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
-nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS))
+nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)),
+nTweak(nTweakIn)
 {
 }

 inline unsigned int CBloomFilter::Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const
 {
    // 0xFBA4C795 chosen as it guarantees a reasonable bit difference between nHashNum values.
-    return MurmurHash3(nHashNum * 0xFBA4C795, vDataToHash) % (vData.size() * 8);
+    return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash) % (vData.size() * 8);
 }

 void CBloomFilter::insert(const vector<unsigned char>& vKey)
--- a/src/bloom.h
+++ b/src/bloom.h
@ -33,6 +33,7 @@ class CBloomFilter
 private:
    std::vector<unsigned char> vData;
    unsigned int nHashFuncs;
+    unsigned int nTweak;

    unsigned int Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const;

@ -41,7 +42,9 @@ public:
    // Note that if the given parameters will result in a filter outside the bounds of the protocol limits,
    // the filter created will be as close to the given parameters as possible within the protocol limits.
    // This will apply if nFPRate is very low or nElements is unreasonably high.
-    CBloomFilter(unsigned int nElements, double nFPRate);
+    // nTweak is a constant which is added to the seed value passed to the hash function
+    // It should generally always be a random value (and is largely only exposed for unit testing)
+    CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak);
    // Using a filter initialized with this results in undefined behavior
    // Should only be used for deserialization
    CBloomFilter() {}
@ -50,6 +53,7 @@ public:
    (
        READWRITE(vData);
        READWRITE(nHashFuncs);
+        READWRITE(nTweak);
    )

    void insert(const std::vector<unsigned char>& vKey);
--- a/src/test/bloom_tests.cpp
+++ b/src/test/bloom_tests.cpp