From bd21612c37cf4f2df3a6926beff8a7f89714235e Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Mon, 13 Aug 2012 05:26:27 +0200 Subject: [PATCH] Add a CBloomFilter class for use as a transaction filter. --- bitcoin-qt.pro | 2 + src/bloom.cpp | 133 +++++++++++++++++++++++++++++++++++++++ src/bloom.h | 70 +++++++++++++++++++++ src/makefile.linux-mingw | 1 + src/makefile.mingw | 1 + src/makefile.osx | 1 + src/makefile.unix | 1 + 7 files changed, 209 insertions(+) create mode 100644 src/bloom.cpp create mode 100644 src/bloom.h diff --git a/bitcoin-qt.pro b/bitcoin-qt.pro index 852879f8c..50ca80cb0 100644 --- a/bitcoin-qt.pro +++ b/bitcoin-qt.pro @@ -155,6 +155,7 @@ HEADERS += src/qt/bitcoingui.h \ src/script.h \ src/init.h \ src/irc.h \ + src/bloom.h \ src/mruset.h \ src/json/json_spirit_writer_template.h \ src/json/json_spirit_writer.h \ @@ -223,6 +224,7 @@ SOURCES += src/qt/bitcoin.cpp src/qt/bitcoingui.cpp \ src/init.cpp \ src/net.cpp \ src/irc.cpp \ + src/bloom.cpp \ src/checkpoints.cpp \ src/addrman.cpp \ src/db.cpp \ diff --git a/src/bloom.cpp b/src/bloom.cpp new file mode 100644 index 000000000..5fac1d06a --- /dev/null +++ b/src/bloom.cpp @@ -0,0 +1,133 @@ +// Copyright (c) 2012 The Bitcoin developers +// Distributed under the MIT/X11 software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. +#include +#include + +#include "bloom.h" +#include "main.h" +#include "script.h" + +#define LN2SQUARED 0.4804530139182014246671025263266649717305529515945455 +#define LN2 0.6931471805599453094172321214581765680755001343602552 + +using namespace std; + +static const unsigned char bit_mask[8] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80}; + +CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate) : +// The ideal size for a bloom filter with a given number of elements and false positive rate is: +// - nElements * log(fp rate) / ln(2)^2 +// We ignore filter parameters which will create a bloom filter larger than the protocol limits +vData(min((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8), +// The ideal number of hash functions is filter size * ln(2) / number of elements +// Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits +// See http://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas +nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)) +{ +} + +inline unsigned int CBloomFilter::Hash(unsigned int nHashNum, const std::vector& vDataToHash) const +{ + // 0xFBA4C795 chosen as it guarantees a reasonable bit difference between nHashNum values. + return MurmurHash3(nHashNum * 0xFBA4C795, vDataToHash) % (vData.size() * 8); +} + +void CBloomFilter::insert(const vector& vKey) +{ + for (unsigned int i = 0; i < nHashFuncs; i++) + { + unsigned int nIndex = Hash(i, vKey); + // Sets bit nIndex of vData + vData[nIndex >> 3] |= bit_mask[7 & nIndex]; + } +} + +void CBloomFilter::insert(const COutPoint& outpoint) +{ + CDataStream stream(SER_NETWORK, PROTOCOL_VERSION); + stream << outpoint; + vector data(stream.begin(), stream.end()); + insert(data); +} + +void CBloomFilter::insert(const uint256& hash) +{ + vector data(hash.begin(), hash.end()); + insert(data); +} + +bool CBloomFilter::contains(const vector& vKey) const +{ + for (unsigned int i = 0; i < nHashFuncs; i++) + { + unsigned int nIndex = Hash(i, vKey); + // Checks bit nIndex of vData + if (!(vData[nIndex >> 3] & bit_mask[7 & nIndex])) + return false; + } + return true; +} + +bool CBloomFilter::contains(const COutPoint& outpoint) const +{ + CDataStream stream(SER_NETWORK, PROTOCOL_VERSION); + stream << outpoint; + vector data(stream.begin(), stream.end()); + return contains(data); +} + +bool CBloomFilter::contains(const uint256& hash) const +{ + vector data(hash.begin(), hash.end()); + return contains(data); +} + +bool CBloomFilter::IsWithinSizeConstraints() const +{ + return vData.size() <= MAX_BLOOM_FILTER_SIZE && nHashFuncs <= MAX_HASH_FUNCS; +} + +bool CBloomFilter::IsTransactionRelevantToFilter(const CTransaction& tx) const +{ + // Match if the filter contains the hash of tx + // for finding tx when they appear in a block + if (contains(tx.GetHash())) + return true; + + BOOST_FOREACH(const CTxOut& txout, tx.vout) + { + // Match if the filter contains any arbitrary script data element in any scriptPubKey in tx + CScript::const_iterator pc = txout.scriptPubKey.begin(); + vector data; + while (pc < txout.scriptPubKey.end()) + { + opcodetype opcode; + if (!txout.scriptPubKey.GetOp(pc, opcode, data)) + break; + if (data.size() != 0 && contains(data)) + return true; + } + } + + BOOST_FOREACH(const CTxIn& txin, tx.vin) + { + // Match if the filter contains an outpoint tx spends + if (contains(txin.prevout)) + return true; + + // Match if the filter contains any arbitrary script data element in any scriptSig in tx + CScript::const_iterator pc = txin.scriptSig.begin(); + vector data; + while (pc < txin.scriptSig.end()) + { + opcodetype opcode; + if (!txin.scriptSig.GetOp(pc, opcode, data)) + break; + if (data.size() != 0 && contains(data)) + return true; + } + } + + return false; +} diff --git a/src/bloom.h b/src/bloom.h new file mode 100644 index 000000000..ce84e6655 --- /dev/null +++ b/src/bloom.h @@ -0,0 +1,70 @@ +// Copyright (c) 2012 The Bitcoin developers +// Distributed under the MIT/X11 software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. +#ifndef BITCOIN_BLOOM_H +#define BITCOIN_BLOOM_H + +#include + +#include "uint256.h" +#include "serialize.h" + +class COutPoint; +class CTransaction; + +// 20,000 items with fp rate < 0.1% or 10,000 items and <0.0001% +static const unsigned int MAX_BLOOM_FILTER_SIZE = 36000; // bytes +static const unsigned int MAX_HASH_FUNCS = 50; + + +/** + * BloomFilter is a probabilistic filter which SPV clients provide + * so that we can filter the transactions we sends them. + * + * This allows for significantly more efficient transaction and block downloads. + * + * Because bloom filters are probabilistic, an SPV node can increase the false- + * positive rate, making us send them transactions which aren't actually theirs, + * allowing clients to trade more bandwidth for more privacy by obfuscating which + * keys are owned by them. + */ +class CBloomFilter +{ +private: + std::vector vData; + unsigned int nHashFuncs; + + unsigned int Hash(unsigned int nHashNum, const std::vector& vDataToHash) const; + +public: + // Creates a new bloom filter which will provide the given fp rate when filled with the given number of elements + // Note that if the given parameters will result in a filter outside the bounds of the protocol limits, + // the filter created will be as close to the given parameters as possible within the protocol limits. + // This will apply if nFPRate is very low or nElements is unreasonably high. + CBloomFilter(unsigned int nElements, double nFPRate); + // Using a filter initialized with this results in undefined behavior + // Should only be used for deserialization + CBloomFilter() {} + + IMPLEMENT_SERIALIZE + ( + READWRITE(vData); + READWRITE(nHashFuncs); + ) + + void insert(const std::vector& vKey); + void insert(const COutPoint& outpoint); + void insert(const uint256& hash); + + bool contains(const std::vector& vKey) const; + bool contains(const COutPoint& outpoint) const; + bool contains(const uint256& hash) const; + + // True if the size is <= MAX_BLOOM_FILTER_SIZE and the number of hash functions is <= MAX_HASH_FUNCS + // (catch a filter which was just deserialized which was too big) + bool IsWithinSizeConstraints() const; + + bool IsTransactionRelevantToFilter(const CTransaction& tx) const; +}; + +#endif /* BITCOIN_BLOOM_H */ diff --git a/src/makefile.linux-mingw b/src/makefile.linux-mingw index 95e7e83cd..ff565f2a2 100644 --- a/src/makefile.linux-mingw +++ b/src/makefile.linux-mingw @@ -84,6 +84,7 @@ OBJS= \ obj/walletdb.o \ obj/noui.o \ obj/hash.o \ + obj/bloom.o \ obj/leveldb.o \ obj/txdb.o diff --git a/src/makefile.mingw b/src/makefile.mingw index 2abc34478..9a6680bf4 100644 --- a/src/makefile.mingw +++ b/src/makefile.mingw @@ -79,6 +79,7 @@ OBJS= \ obj/wallet.o \ obj/walletdb.o \ obj/hash.o \ + obj/bloom.o \ obj/noui.o \ obj/leveldb.o \ obj/txdb.o diff --git a/src/makefile.osx b/src/makefile.osx index 474f17b73..8b7c559fa 100644 --- a/src/makefile.osx +++ b/src/makefile.osx @@ -97,6 +97,7 @@ OBJS= \ obj/wallet.o \ obj/walletdb.o \ obj/hash.o \ + obj/bloom.o \ obj/noui.o \ obj/leveldb.o \ obj/txdb.o diff --git a/src/makefile.unix b/src/makefile.unix index 979a4a652..14cf1b8fa 100644 --- a/src/makefile.unix +++ b/src/makefile.unix @@ -128,6 +128,7 @@ OBJS= \ obj/wallet.o \ obj/walletdb.o \ obj/hash.o \ + obj/bloom.o \ obj/noui.o \ obj/leveldb.o \ obj/txdb.o