Browse Source

Merge pull request #6064

f46a680 Better mruset unit test (Pieter Wuille)
d4d5022 Use ring buffer of set iterators instead of deque of copies in mruset (Pieter Wuille)
d81cff3 Replace mruset setAddrKnown with CRollingBloomFilter addrKnown (Gavin Andresen)
69a5f8b Rolling bloom filter class (Gavin Andresen)
0.13
Wladimir J. van der Laan 10 years ago
parent
commit
b46e7c24e5
No known key found for this signature in database
GPG Key ID: 74810B012346C9A6
  1. 51
      src/bloom.cpp
  2. 28
      src/bloom.h
  3. 10
      src/main.cpp
  4. 36
      src/mruset.h
  5. 6
      src/net.cpp
  6. 6
      src/net.h
  7. 78
      src/test/bloom_tests.cpp
  8. 108
      src/test/mruset_tests.cpp

51
src/bloom.cpp

@ -40,6 +40,17 @@ nFlags(nFlagsIn) @@ -40,6 +40,17 @@ nFlags(nFlagsIn)
{
}
// Private constructor used by CRollingBloomFilter
CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn) :
vData((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)) / 8),
isFull(false),
isEmpty(true),
nHashFuncs((unsigned int)(vData.size() * 8 / nElements * LN2)),
nTweak(nTweakIn),
nFlags(BLOOM_UPDATE_NONE)
{
}
inline unsigned int CBloomFilter::Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const
{
// 0xFBA4C795 chosen as it guarantees a reasonable bit difference between nHashNum values.
@ -197,3 +208,43 @@ void CBloomFilter::UpdateEmptyFull() @@ -197,3 +208,43 @@ void CBloomFilter::UpdateEmptyFull()
isFull = full;
isEmpty = empty;
}
CRollingBloomFilter::CRollingBloomFilter(unsigned int nElements, double fpRate, unsigned int nTweak) :
b1(nElements * 2, fpRate, nTweak), b2(nElements * 2, fpRate, nTweak)
{
// Implemented using two bloom filters of 2 * nElements each.
// We fill them up, and clear them, staggered, every nElements
// inserted, so at least one always contains the last nElements
// inserted.
nBloomSize = nElements * 2;
nInsertions = 0;
}
void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
{
if (nInsertions == 0) {
b1.clear();
} else if (nInsertions == nBloomSize / 2) {
b2.clear();
}
b1.insert(vKey);
b2.insert(vKey);
if (++nInsertions == nBloomSize) {
nInsertions = 0;
}
}
bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const
{
if (nInsertions < nBloomSize / 2) {
return b2.contains(vKey);
}
return b1.contains(vKey);
}
void CRollingBloomFilter::clear()
{
b1.clear();
b2.clear();
nInsertions = 0;
}

28
src/bloom.h

@ -53,6 +53,10 @@ private: @@ -53,6 +53,10 @@ private:
unsigned int Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const;
// Private constructor for CRollingBloomFilter, no restrictions on size
CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak);
friend class CRollingBloomFilter;
public:
/**
* Creates a new bloom filter which will provide the given fp rate when filled with the given number of elements
@ -97,4 +101,28 @@ public: @@ -97,4 +101,28 @@ public:
void UpdateEmptyFull();
};
/**
* RollingBloomFilter is a probabilistic "keep track of most recently inserted" set.
* Construct it with the number of items to keep track of, and a false-positive rate.
*
* contains(item) will always return true if item was one of the last N things
* insert()'ed ... but may also return true for items that were not inserted.
*/
class CRollingBloomFilter
{
public:
CRollingBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak);
void insert(const std::vector<unsigned char>& vKey);
bool contains(const std::vector<unsigned char>& vKey) const;
void clear();
private:
unsigned int nBloomSize;
unsigned int nInsertions;
CBloomFilter b1, b2;
};
#endif // BITCOIN_BLOOM_H

10
src/main.cpp

@ -3995,7 +3995,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv, @@ -3995,7 +3995,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv,
{
LOCK(cs_vNodes);
// Use deterministic randomness to send to the same nodes for 24 hours
// at a time so the setAddrKnowns of the chosen nodes prevent repeats
// at a time so the addrKnowns of the chosen nodes prevent repeats
static uint256 hashSalt;
if (hashSalt.IsNull())
hashSalt = GetRandHash();
@ -4779,9 +4779,9 @@ bool SendMessages(CNode* pto, bool fSendTrickle) @@ -4779,9 +4779,9 @@ bool SendMessages(CNode* pto, bool fSendTrickle)
LOCK(cs_vNodes);
BOOST_FOREACH(CNode* pnode, vNodes)
{
// Periodically clear setAddrKnown to allow refresh broadcasts
// Periodically clear addrKnown to allow refresh broadcasts
if (nLastRebroadcast)
pnode->setAddrKnown.clear();
pnode->addrKnown.clear();
// Rebroadcast our address
AdvertizeLocal(pnode);
@ -4799,9 +4799,9 @@ bool SendMessages(CNode* pto, bool fSendTrickle) @@ -4799,9 +4799,9 @@ bool SendMessages(CNode* pto, bool fSendTrickle)
vAddr.reserve(pto->vAddrToSend.size());
BOOST_FOREACH(const CAddress& addr, pto->vAddrToSend)
{
// returns true if wasn't already contained in the set
if (pto->setAddrKnown.insert(addr).second)
if (!pto->addrKnown.contains(addr.GetKey()))
{
pto->addrKnown.insert(addr.GetKey());
vAddr.push_back(addr);
// receiver rejects addr messages larger than 1000
if (vAddr.size() >= 1000)

36
src/mruset.h

@ -1,12 +1,12 @@ @@ -1,12 +1,12 @@
// Copyright (c) 2012 The Bitcoin Core developers
// Copyright (c) 2012-2015 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef BITCOIN_MRUSET_H
#define BITCOIN_MRUSET_H
#include <deque>
#include <set>
#include <vector>
#include <utility>
/** STL-like set container that only keeps the most recent N elements. */
@ -22,11 +22,13 @@ public: @@ -22,11 +22,13 @@ public:
protected:
std::set<T> set;
std::deque<T> queue;
size_type nMaxSize;
std::vector<iterator> order;
size_type first_used;
size_type first_unused;
const size_type nMaxSize;
public:
mruset(size_type nMaxSizeIn = 0) { nMaxSize = nMaxSizeIn; }
mruset(size_type nMaxSizeIn = 1) : nMaxSize(nMaxSizeIn) { clear(); }
iterator begin() const { return set.begin(); }
iterator end() const { return set.end(); }
size_type size() const { return set.size(); }
@ -36,7 +38,9 @@ public: @@ -36,7 +38,9 @@ public:
void clear()
{
set.clear();
queue.clear();
order.assign(nMaxSize, set.end());
first_used = 0;
first_unused = 0;
}
bool inline friend operator==(const mruset<T>& a, const mruset<T>& b) { return a.set == b.set; }
bool inline friend operator==(const mruset<T>& a, const std::set<T>& b) { return a.set == b; }
@ -45,25 +49,17 @@ public: @@ -45,25 +49,17 @@ public:
{
std::pair<iterator, bool> ret = set.insert(x);
if (ret.second) {
if (nMaxSize && queue.size() == nMaxSize) {
set.erase(queue.front());
queue.pop_front();
if (set.size() == nMaxSize + 1) {
set.erase(order[first_used]);
order[first_used] = set.end();
if (++first_used == nMaxSize) first_used = 0;
}
queue.push_back(x);
order[first_unused] = ret.first;
if (++first_unused == nMaxSize) first_unused = 0;
}
return ret;
}
size_type max_size() const { return nMaxSize; }
size_type max_size(size_type s)
{
if (s)
while (queue.size() > s) {
set.erase(queue.front());
queue.pop_front();
}
nMaxSize = s;
return nMaxSize;
}
};
#endif // BITCOIN_MRUSET_H

6
src/net.cpp

@ -1905,7 +1905,10 @@ bool CAddrDB::Read(CAddrMan& addr) @@ -1905,7 +1905,10 @@ bool CAddrDB::Read(CAddrMan& addr)
unsigned int ReceiveFloodSize() { return 1000*GetArg("-maxreceivebuffer", 5*1000); }
unsigned int SendBufferSize() { return 1000*GetArg("-maxsendbuffer", 1*1000); }
CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fInboundIn) : ssSend(SER_NETWORK, INIT_PROTO_VERSION), setAddrKnown(5000)
CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fInboundIn) :
ssSend(SER_NETWORK, INIT_PROTO_VERSION),
addrKnown(5000, 0.001, insecure_rand()),
setInventoryKnown(SendBufferSize() / 1000)
{
nServices = 0;
hSocket = hSocketIn;
@ -1934,7 +1937,6 @@ CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fIn @@ -1934,7 +1937,6 @@ CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fIn
nStartingHeight = -1;
fGetAddr = false;
fRelayTxes = false;
setInventoryKnown.max_size(SendBufferSize() / 1000);
pfilter = new CBloomFilter();
nPingNonceSent = 0;
nPingUsecStart = 0;

6
src/net.h

@ -300,7 +300,7 @@ public: @@ -300,7 +300,7 @@ public:
// flood relay
std::vector<CAddress> vAddrToSend;
mruset<CAddress> setAddrKnown;
CRollingBloomFilter addrKnown;
bool fGetAddr;
std::set<uint256> setKnown;
@ -380,7 +380,7 @@ public: @@ -380,7 +380,7 @@ public:
void AddAddressKnown(const CAddress& addr)
{
setAddrKnown.insert(addr);
addrKnown.insert(addr.GetKey());
}
void PushAddress(const CAddress& addr)
@ -388,7 +388,7 @@ public: @@ -388,7 +388,7 @@ public:
// Known checking here is only to save space from duplicates.
// SendMessages will filter it again for knowns that were added
// after addresses were pushed.
if (addr.IsValid() && !setAddrKnown.count(addr)) {
if (addr.IsValid() && !addrKnown.contains(addr.GetKey())) {
if (vAddrToSend.size() >= MAX_ADDR_TO_SEND) {
vAddrToSend[insecure_rand() % vAddrToSend.size()] = addr;
} else {

78
src/test/bloom_tests.cpp

@ -8,6 +8,7 @@ @@ -8,6 +8,7 @@
#include "clientversion.h"
#include "key.h"
#include "merkleblock.h"
#include "random.h"
#include "serialize.h"
#include "streams.h"
#include "uint256.h"
@ -459,4 +460,81 @@ BOOST_AUTO_TEST_CASE(merkle_block_4_test_update_none) @@ -459,4 +460,81 @@ BOOST_AUTO_TEST_CASE(merkle_block_4_test_update_none)
BOOST_CHECK(!filter.contains(COutPoint(uint256S("0x02981fa052f0481dbc5868f4fc2166035a10f27a03cfd2de67326471df5bc041"), 0)));
}
static std::vector<unsigned char> RandomData()
{
uint256 r = GetRandHash();
return std::vector<unsigned char>(r.begin(), r.end());
}
BOOST_AUTO_TEST_CASE(rolling_bloom)
{
// last-100-entry, 1% false positive:
CRollingBloomFilter rb1(100, 0.01, 0);
// Overfill:
static const int DATASIZE=399;
std::vector<unsigned char> data[DATASIZE];
for (int i = 0; i < DATASIZE; i++) {
data[i] = RandomData();
rb1.insert(data[i]);
}
// Last 100 guaranteed to be remembered:
for (int i = 299; i < DATASIZE; i++) {
BOOST_CHECK(rb1.contains(data[i]));
}
// false positive rate is 1%, so we should get about 100 hits if
// testing 10,000 random keys. We get worst-case false positive
// behavior when the filter is as full as possible, which is
// when we've inserted one minus an integer multiple of nElement*2.
unsigned int nHits = 0;
for (int i = 0; i < 10000; i++) {
if (rb1.contains(RandomData()))
++nHits;
}
// Run test_bitcoin with --log_level=message to see BOOST_TEST_MESSAGEs:
BOOST_TEST_MESSAGE("RollingBloomFilter got " << nHits << " false positives (~100 expected)");
// Insanely unlikely to get a fp count outside this range:
BOOST_CHECK(nHits > 25);
BOOST_CHECK(nHits < 175);
BOOST_CHECK(rb1.contains(data[DATASIZE-1]));
rb1.clear();
BOOST_CHECK(!rb1.contains(data[DATASIZE-1]));
// Now roll through data, make sure last 100 entries
// are always remembered:
for (int i = 0; i < DATASIZE; i++) {
if (i >= 100)
BOOST_CHECK(rb1.contains(data[i-100]));
rb1.insert(data[i]);
}
// Insert 999 more random entries:
for (int i = 0; i < 999; i++) {
rb1.insert(RandomData());
}
// Sanity check to make sure the filter isn't just filling up:
nHits = 0;
for (int i = 0; i < DATASIZE; i++) {
if (rb1.contains(data[i]))
++nHits;
}
// Expect about 5 false positives, more than 100 means
// something is definitely broken.
BOOST_TEST_MESSAGE("RollingBloomFilter got " << nHits << " false positives (~5 expected)");
BOOST_CHECK(nHits < 100);
// last-1000-entry, 0.01% false positive:
CRollingBloomFilter rb2(1000, 0.001, 0);
for (int i = 0; i < DATASIZE; i++) {
rb2.insert(data[i]);
}
// ... room for all of them:
for (int i = 0; i < DATASIZE; i++) {
BOOST_CHECK(rb2.contains(data[i]));
}
}
BOOST_AUTO_TEST_SUITE_END()

108
src/test/mruset_tests.cpp

@ -17,82 +17,64 @@ @@ -17,82 +17,64 @@
using namespace std;
class mrutester
{
private:
mruset<int> mru;
std::set<int> set;
public:
mrutester() { mru.max_size(MAX_SIZE); }
int size() const { return set.size(); }
BOOST_FIXTURE_TEST_SUITE(mruset_tests, BasicTestingSetup)
void insert(int n)
BOOST_AUTO_TEST_CASE(mruset_test)
{
mru.insert(n);
set.insert(n);
BOOST_CHECK(mru == set);
// The mruset being tested.
mruset<int> mru(5000);
// Run the test 10 times.
for (int test = 0; test < 10; test++) {
// Reset mru.
mru.clear();
// A deque + set to simulate the mruset.
std::deque<int> rep;
std::set<int> all;
// Insert 10000 random integers below 15000.
for (int j=0; j<10000; j++) {
int add = GetRandInt(15000);
mru.insert(add);
// Add the number to rep/all as well.
if (all.count(add) == 0) {
all.insert(add);
rep.push_back(add);
if (all.size() == 5001) {
all.erase(rep.front());
rep.pop_front();
}
}
};
BOOST_FIXTURE_TEST_SUITE(mruset_tests, BasicTestingSetup)
// Do a full comparison between mru and the simulated mru every 1000 and every 5001 elements.
if (j % 1000 == 0 || j % 5001 == 0) {
mruset<int> mru2 = mru; // Also try making a copy
// Test that an mruset behaves like a set, as long as no more than MAX_SIZE elements are in it
BOOST_AUTO_TEST_CASE(mruset_like_set)
{
for (int nTest=0; nTest<NUM_TESTS; nTest++)
{
mrutester tester;
while (tester.size() < MAX_SIZE)
tester.insert(GetRandInt(2 * MAX_SIZE));
// Check that all elements that should be in there, are in there.
BOOST_FOREACH(int x, rep) {
BOOST_CHECK(mru.count(x));
BOOST_CHECK(mru2.count(x));
}
// Check that all elements that are in there, should be in there.
BOOST_FOREACH(int x, mru) {
BOOST_CHECK(all.count(x));
}
// Test that an mruset's size never exceeds its max_size
BOOST_AUTO_TEST_CASE(mruset_limited_size)
{
for (int nTest=0; nTest<NUM_TESTS; nTest++)
{
mruset<int> mru(MAX_SIZE);
for (int nAction=0; nAction<3*MAX_SIZE; nAction++)
{
int n = GetRandInt(2 * MAX_SIZE);
mru.insert(n);
BOOST_CHECK(mru.size() <= MAX_SIZE);
// Check that all elements that are in there, should be in there.
BOOST_FOREACH(int x, mru2) {
BOOST_CHECK(all.count(x));
}
for (int t = 0; t < 10; t++) {
int r = GetRandInt(15000);
BOOST_CHECK(all.count(r) == mru.count(r));
BOOST_CHECK(all.count(r) == mru2.count(r));
}
}
// 16-bit permutation function
int static permute(int n)
{
// hexadecimals of pi; verified to be linearly independent
static const int table[16] = {0x243F, 0x6A88, 0x85A3, 0x08D3, 0x1319, 0x8A2E, 0x0370, 0x7344,
0xA409, 0x3822, 0x299F, 0x31D0, 0x082E, 0xFA98, 0xEC4E, 0x6C89};
int ret = 0;
for (int bit=0; bit<16; bit++)
if (n & (1<<bit))
ret ^= table[bit];
return ret;
}
// Test that an mruset acts like a moving window, if no duplicate elements are added
BOOST_AUTO_TEST_CASE(mruset_window)
{
mruset<int> mru(MAX_SIZE);
for (int n=0; n<10*MAX_SIZE; n++)
{
mru.insert(permute(n));
set<int> tester;
for (int m=max(0,n-MAX_SIZE+1); m<=n; m++)
tester.insert(permute(m));
BOOST_CHECK(mru == tester);
}
}

Loading…
Cancel
Save