Merge pull request #1795 from TheBlueMatt/bloom

Bloom filters
2025-09-02 18:02:01 +00:00 · 2013-01-17 10:04:08 -08:00 · 2013-01-17 10:04:08 -08:00 · 91f70a75da
commit 91f70a75da
parent a1f4909e0b c51694eb9b
21 changed files with 1315 additions and 62 deletions
--- a/bitcoin-qt.pro
+++ b/bitcoin-qt.pro
@ -155,6 +155,7 @@ HEADERS += src/qt/bitcoingui.h \
    src/script.h \
    src/init.h \
    src/irc.h \
+    src/bloom.h \
    src/mruset.h \
    src/json/json_spirit_writer_template.h \
    src/json/json_spirit_writer.h \
@ -215,6 +216,7 @@ SOURCES += src/qt/bitcoin.cpp src/qt/bitcoingui.cpp \
    src/version.cpp \
    src/sync.cpp \
    src/util.cpp \
+    src/hash.cpp \
    src/netbase.cpp \
    src/key.cpp \
    src/script.cpp \
@ -222,6 +224,7 @@ SOURCES += src/qt/bitcoin.cpp src/qt/bitcoingui.cpp \
    src/init.cpp \
    src/net.cpp \
    src/irc.cpp \
+    src/bloom.cpp \
    src/checkpoints.cpp \
    src/addrman.cpp \
    src/db.cpp \
--- a/src/bloom.cpp
+++ b/src/bloom.cpp
@ -0,0 +1,156 @@
+// Copyright (c) 2012 The Bitcoin developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+#include <math.h>
+#include <stdlib.h>
+
+#include "bloom.h"
+#include "main.h"
+#include "script.h"
+
+#define LN2SQUARED 0.4804530139182014246671025263266649717305529515945455
+#define LN2 0.6931471805599453094172321214581765680755001343602552
+
+using namespace std;
+
+static const unsigned char bit_mask[8] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80};
+
+CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn, unsigned char nFlagsIn) :
+// The ideal size for a bloom filter with a given number of elements and false positive rate is:
+// - nElements * log(fp rate) / ln(2)^2
+// We ignore filter parameters which will create a bloom filter larger than the protocol limits
+vData(min((unsigned int)(-1  / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8),
+// The ideal number of hash functions is filter size * ln(2) / number of elements
+// Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
+// See http://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
+nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)),
+nTweak(nTweakIn),
+nFlags(nFlagsIn)
+{
+}
+
+inline unsigned int CBloomFilter::Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const
+{
+    // 0xFBA4C795 chosen as it guarantees a reasonable bit difference between nHashNum values.
+    return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash) % (vData.size() * 8);
+}
+
+void CBloomFilter::insert(const vector<unsigned char>& vKey)
+{
+    for (unsigned int i = 0; i < nHashFuncs; i++)
+    {
+        unsigned int nIndex = Hash(i, vKey);
+        // Sets bit nIndex of vData
+        vData[nIndex >> 3] |= bit_mask[7 & nIndex];
+    }
+}
+
+void CBloomFilter::insert(const COutPoint& outpoint)
+{
+    CDataStream stream(SER_NETWORK, PROTOCOL_VERSION);
+    stream << outpoint;
+    vector<unsigned char> data(stream.begin(), stream.end());
+    insert(data);
+}
+
+void CBloomFilter::insert(const uint256& hash)
+{
+    vector<unsigned char> data(hash.begin(), hash.end());
+    insert(data);
+}
+
+bool CBloomFilter::contains(const vector<unsigned char>& vKey) const
+{
+    for (unsigned int i = 0; i < nHashFuncs; i++)
+    {
+        unsigned int nIndex = Hash(i, vKey);
+        // Checks bit nIndex of vData
+        if (!(vData[nIndex >> 3] & bit_mask[7 & nIndex]))
+            return false;
+    }
+    return true;
+}
+
+bool CBloomFilter::contains(const COutPoint& outpoint) const
+{
+    CDataStream stream(SER_NETWORK, PROTOCOL_VERSION);
+    stream << outpoint;
+    vector<unsigned char> data(stream.begin(), stream.end());
+    return contains(data);
+}
+
+bool CBloomFilter::contains(const uint256& hash) const
+{
+    vector<unsigned char> data(hash.begin(), hash.end());
+    return contains(data);
+}
+
+bool CBloomFilter::IsWithinSizeConstraints() const
+{
+    return vData.size() <= MAX_BLOOM_FILTER_SIZE && nHashFuncs <= MAX_HASH_FUNCS;
+}
+
+bool CBloomFilter::IsRelevantAndUpdate(const CTransaction& tx, const uint256& hash)
+{
+    bool fFound = false;
+    // Match if the filter contains the hash of tx
+    //  for finding tx when they appear in a block
+    if (contains(hash))
+        fFound = true;
+
+    for (unsigned int i = 0; i < tx.vout.size(); i++)
+    {
+        const CTxOut& txout = tx.vout[i];
+        // Match if the filter contains any arbitrary script data element in any scriptPubKey in tx
+        // If this matches, also add the specific output that was matched.
+        // This means clients don't have to update the filter themselves when a new relevant tx 
+        // is discovered in order to find spending transactions, which avoids round-tripping and race conditions.
+        CScript::const_iterator pc = txout.scriptPubKey.begin();
+        vector<unsigned char> data;
+        while (pc < txout.scriptPubKey.end())
+        {
+            opcodetype opcode;
+            if (!txout.scriptPubKey.GetOp(pc, opcode, data))
+                break;
+            if (data.size() != 0 && contains(data))
+            {
+                fFound = true;
+                if ((nFlags & BLOOM_UPDATE_MASK) == BLOOM_UPDATE_ALL)
+                    insert(COutPoint(hash, i));
+                else if ((nFlags & BLOOM_UPDATE_MASK) == BLOOM_UPDATE_P2PUBKEY_ONLY)
+                {
+                    txnouttype type;
+                    vector<vector<unsigned char> > vSolutions;
+                    if (Solver(txout.scriptPubKey, type, vSolutions) &&
+                            (type == TX_PUBKEY || type == TX_MULTISIG))
+                        insert(COutPoint(hash, i));
+                }
+                break;
+            }
+        }
+    }
+
+    if (fFound)
+        return true;
+
+    BOOST_FOREACH(const CTxIn& txin, tx.vin)
+    {
+        // Match if the filter contains an outpoint tx spends
+        if (contains(txin.prevout))
+            return true;
+
+        // Match if the filter contains any arbitrary script data element in any scriptSig in tx
+        CScript::const_iterator pc = txin.scriptSig.begin();
+        vector<unsigned char> data;
+        while (pc < txin.scriptSig.end())
+        {
+            opcodetype opcode;
+            if (!txin.scriptSig.GetOp(pc, opcode, data))
+                break;
+            if (data.size() != 0 && contains(data))
+                return true;
+        }
+    }
+
+    return false;
+}
--- a/src/bloom.h
+++ b/src/bloom.h
@ -0,0 +1,88 @@
+// Copyright (c) 2012 The Bitcoin developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+#ifndef BITCOIN_BLOOM_H
+#define BITCOIN_BLOOM_H
+
+#include <vector>
+
+#include "uint256.h"
+#include "serialize.h"
+
+class COutPoint;
+class CTransaction;
+
+// 20,000 items with fp rate < 0.1% or 10,000 items and <0.0001%
+static const unsigned int MAX_BLOOM_FILTER_SIZE = 36000; // bytes
+static const unsigned int MAX_HASH_FUNCS = 50;
+
+// First two bits of nFlags control how much IsRelevantAndUpdate actually updates
+// The remaining bits are reserved
+enum bloomflags
+{
+    BLOOM_UPDATE_NONE = 0,
+    BLOOM_UPDATE_ALL = 1,
+    // Only adds outpoints to the filter if the output is a pay-to-pubkey/pay-to-multisig script
+    BLOOM_UPDATE_P2PUBKEY_ONLY = 2,
+    BLOOM_UPDATE_MASK = 3,
+};
+
+/**
+ * BloomFilter is a probabilistic filter which SPV clients provide
+ * so that we can filter the transactions we sends them.
+ * 
+ * This allows for significantly more efficient transaction and block downloads.
+ * 
+ * Because bloom filters are probabilistic, an SPV node can increase the false-
+ * positive rate, making us send them transactions which aren't actually theirs, 
+ * allowing clients to trade more bandwidth for more privacy by obfuscating which
+ * keys are owned by them.
+ */
+class CBloomFilter
+{
+private:
+    std::vector<unsigned char> vData;
+    unsigned int nHashFuncs;
+    unsigned int nTweak;
+    unsigned char nFlags;
+
+    unsigned int Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const;
+
+public:
+    // Creates a new bloom filter which will provide the given fp rate when filled with the given number of elements
+    // Note that if the given parameters will result in a filter outside the bounds of the protocol limits,
+    // the filter created will be as close to the given parameters as possible within the protocol limits.
+    // This will apply if nFPRate is very low or nElements is unreasonably high.
+    // nTweak is a constant which is added to the seed value passed to the hash function
+    // It should generally always be a random value (and is largely only exposed for unit testing)
+    // nFlags should be one of the BLOOM_UPDATE_* enums (not _MASK)
+    CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak, unsigned char nFlagsIn);
+    // Using a filter initialized with this results in undefined behavior
+    // Should only be used for deserialization
+    CBloomFilter() {}
+
+    IMPLEMENT_SERIALIZE
+    (
+        READWRITE(vData);
+        READWRITE(nHashFuncs);
+        READWRITE(nTweak);
+        READWRITE(nFlags);
+    )
+
+    void insert(const std::vector<unsigned char>& vKey);
+    void insert(const COutPoint& outpoint);
+    void insert(const uint256& hash);
+
+    bool contains(const std::vector<unsigned char>& vKey) const;
+    bool contains(const COutPoint& outpoint) const;
+    bool contains(const uint256& hash) const;
+
+    // True if the size is <= MAX_BLOOM_FILTER_SIZE and the number of hash functions is <= MAX_HASH_FUNCS
+    // (catch a filter which was just deserialized which was too big)
+    bool IsWithinSizeConstraints() const;
+
+    // Also adds any outputs which match the filter to the filter (to match their spending txes)
+    bool IsRelevantAndUpdate(const CTransaction& tx, const uint256& hash);
+};
+
+#endif /* BITCOIN_BLOOM_H */
--- a/src/hash.cpp
+++ b/src/hash.cpp
@ -0,0 +1,58 @@
+#include "hash.h"
+
+inline uint32_t ROTL32 ( uint32_t x, int8_t r )
+{
+    return (x << r) | (x >> (32 - r));
+}
+
+unsigned int MurmurHash3(unsigned int nHashSeed, const std::vector<unsigned char>& vDataToHash)
+{
+    // The following is MurmurHash3 (x86_32), see http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
+    uint32_t h1 = nHashSeed;
+    const uint32_t c1 = 0xcc9e2d51;
+    const uint32_t c2 = 0x1b873593;
+
+    const int nblocks = vDataToHash.size() / 4;
+
+    //----------
+    // body
+    const uint32_t * blocks = (const uint32_t *)(&vDataToHash[0] + nblocks*4);
+
+    for(int i = -nblocks; i; i++)
+    {
+        uint32_t k1 = blocks[i];
+
+        k1 *= c1;
+        k1 = ROTL32(k1,15);
+        k1 *= c2;
+
+        h1 ^= k1;
+        h1 = ROTL32(h1,13); 
+        h1 = h1*5+0xe6546b64;
+    }
+
+    //----------
+    // tail
+    const uint8_t * tail = (const uint8_t*)(&vDataToHash[0] + nblocks*4);
+
+    uint32_t k1 = 0;
+
+    switch(vDataToHash.size() & 3)
+    {
+    case 3: k1 ^= tail[2] << 16;
+    case 2: k1 ^= tail[1] << 8;
+    case 1: k1 ^= tail[0];
+            k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+    };
+
+    //----------
+    // finalization
+    h1 ^= vDataToHash.size();
+    h1 ^= h1 >> 16;
+    h1 *= 0x85ebca6b;
+    h1 ^= h1 >> 13;
+    h1 *= 0xc2b2ae35;
+    h1 ^= h1 >> 16;
+
+    return h1;
+}
--- a/src/hash.h
+++ b/src/hash.h
@ -10,6 +10,7 @@

 #include <openssl/sha.h>
 #include <openssl/ripemd.h>
+#include <vector>

 template<typename T1>
 inline uint256 Hash(const T1 pbegin, const T1 pend)
@ -113,4 +114,6 @@ inline uint160 Hash160(const std::vector<unsigned char>& vch)
    return hash2;
 }

+unsigned int MurmurHash3(unsigned int nHashSeed, const std::vector<unsigned char>& vDataToHash);
+
 #endif
--- a/src/main.cpp
+++ b/src/main.cpp
@ -2239,6 +2239,160 @@ bool ProcessBlock(CNode* pfrom, CBlock* pblock, CDiskBlockPos *dbp)



+CMerkleBlock::CMerkleBlock(const CBlock& block, CBloomFilter& filter)
+{
+    header = block.GetBlockHeader();
+
+    vector<bool> vMatch;
+    vector<uint256> vHashes;
+
+    vMatch.reserve(block.vtx.size());
+    vHashes.reserve(block.vtx.size());
+
+    for (unsigned int i = 0; i < block.vtx.size(); i++)
+    {
+        uint256 hash = block.vtx[i].GetHash();
+        if (filter.IsRelevantAndUpdate(block.vtx[i], hash))
+        {
+            vMatch.push_back(true);
+            vMatchedTxn.push_back(make_pair(i, hash));
+        }
+        else
+            vMatch.push_back(false);
+        vHashes.push_back(hash);
+    }
+
+    txn = CPartialMerkleTree(vHashes, vMatch);
+}
+
+
+
+
+
+
+
+
+uint256 CPartialMerkleTree::CalcHash(int height, unsigned int pos, const std::vector<uint256> &vTxid) {
+    if (height == 0) {
+        // hash at height 0 is the txids themself
+        return vTxid[pos];
+    } else {
+        // calculate left hash
+        uint256 left = CalcHash(height-1, pos*2, vTxid), right;
+        // calculate right hash if not beyong the end of the array - copy left hash otherwise1
+        if (pos*2+1 < CalcTreeWidth(height-1))
+            right = CalcHash(height-1, pos*2+1, vTxid);
+        else
+            right = left;
+        // combine subhashes
+        return Hash(BEGIN(left), END(left), BEGIN(right), END(right));
+    }
+}
+
+void CPartialMerkleTree::TraverseAndBuild(int height, unsigned int pos, const std::vector<uint256> &vTxid, const std::vector<bool> &vMatch) {
+    // determine whether this node is the parent of at least one matched txid
+    bool fParentOfMatch = false;
+    for (unsigned int p = pos << height; p < (pos+1) << height && p < nTransactions; p++)
+        fParentOfMatch |= vMatch[p];
+    // store as flag bit
+    vBits.push_back(fParentOfMatch);
+    if (height==0 || !fParentOfMatch) {
+        // if at height 0, or nothing interesting below, store hash and stop
+        vHash.push_back(CalcHash(height, pos, vTxid));
+    } else {
+        // otherwise, don't store any hash, but descend into the subtrees
+        TraverseAndBuild(height-1, pos*2, vTxid, vMatch);
+        if (pos*2+1 < CalcTreeWidth(height-1))
+            TraverseAndBuild(height-1, pos*2+1, vTxid, vMatch);
+    }
+}
+
+uint256 CPartialMerkleTree::TraverseAndExtract(int height, unsigned int pos, unsigned int &nBitsUsed, unsigned int &nHashUsed, std::vector<uint256> &vMatch) {
+    if (nBitsUsed >= vBits.size()) {
+        // overflowed the bits array - failure
+        fBad = true;
+        return 0;
+    }
+    bool fParentOfMatch = vBits[nBitsUsed++];
+    if (height==0 || !fParentOfMatch) {
+        // if at height 0, or nothing interesting below, use stored hash and do not descend
+        if (nHashUsed >= vHash.size()) {
+            // overflowed the hash array - failure
+            fBad = true;
+            return 0;
+        }
+        const uint256 &hash = vHash[nHashUsed++];
+        if (height==0 && fParentOfMatch) // in case of height 0, we have a matched txid
+            vMatch.push_back(hash);
+        return hash;
+    } else {
+        // otherwise, descend into the subtrees to extract matched txids and hashes
+        uint256 left = TraverseAndExtract(height-1, pos*2, nBitsUsed, nHashUsed, vMatch), right;
+        if (pos*2+1 < CalcTreeWidth(height-1))
+            right = TraverseAndExtract(height-1, pos*2+1, nBitsUsed, nHashUsed, vMatch);
+        else
+            right = left;
+        // and combine them before returning
+        return Hash(BEGIN(left), END(left), BEGIN(right), END(right));
+    }
+}
+
+CPartialMerkleTree::CPartialMerkleTree(const std::vector<uint256> &vTxid, const std::vector<bool> &vMatch) : nTransactions(vTxid.size()), fBad(false) {
+    // reset state
+    vBits.clear();
+    vHash.clear();
+
+    // calculate height of tree
+    int nHeight = 0;
+    while (CalcTreeWidth(nHeight) > 1)
+        nHeight++;
+
+    // traverse the partial tree
+    TraverseAndBuild(nHeight, 0, vTxid, vMatch);
+}
+
+CPartialMerkleTree::CPartialMerkleTree() : nTransactions(0), fBad(true) {}
+
+uint256 CPartialMerkleTree::ExtractMatches(std::vector<uint256> &vMatch) {
+    vMatch.clear();
+    // An empty set will not work
+    if (nTransactions == 0)
+        return 0;
+    // check for excessively high numbers of transactions
+    if (nTransactions > MAX_BLOCK_SIZE / 60) // 60 is the lower bound for the size of a serialized CTransaction
+        return 0;
+    // there can never be more hashes provided than one for every txid
+    if (vHash.size() > nTransactions)
+        return 0;
+    // there must be at least one bit per node in the partial tree, and at least one node per hash
+    if (vBits.size() < vHash.size())
+        return 0;
+    // calculate height of tree
+    int nHeight = 0;
+    while (CalcTreeWidth(nHeight) > 1)
+        nHeight++;
+    // traverse the partial tree
+    unsigned int nBitsUsed = 0, nHashUsed = 0;
+    uint256 hashMerkleRoot = TraverseAndExtract(nHeight, 0, nBitsUsed, nHashUsed, vMatch);
+    // verify that no problems occured during the tree traversal
+    if (fBad)
+        return 0;
+    // verify that all bits were consumed (except for the padding caused by serializing it as a byte sequence)
+    if ((nBitsUsed+7)/8 != (vBits.size()+7)/8)
+        return 0;
+    // verify that all hashes were consumed
+    if (nHashUsed != vHash.size())
+        return 0;
+    return hashMerkleRoot;
+}
+
+
+
+
+
+
+
+
 bool CheckDiskSpace(uint64 nAdditionalBytes)
 {
    uint64 nFreeBytesAvailable = filesystem::space(GetDataDir()).available;
@ -2815,6 +2969,10 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv)
            vRecv >> pfrom->strSubVer;
        if (!vRecv.empty())
            vRecv >> pfrom->nStartingHeight;
+        if (!vRecv.empty())
+            vRecv >> pfrom->fRelayTxes; // set to true after we get the first filter* message
+        else
+            pfrom->fRelayTxes = true;

        if (pfrom->fInbound && addrMe.IsRoutable())
        {
@ -3045,7 +3203,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv)
            if (fDebugNet || (vInv.size() == 1))
                printf("received getdata for: %s\n", inv.ToString().c_str());

-            if (inv.type == MSG_BLOCK)
+            if (inv.type == MSG_BLOCK || inv.type == MSG_FILTERED_BLOCK)
            {
                // Send block from disk
                map<uint256, CBlockIndex*>::iterator mi = mapBlockIndex.find(inv.hash);
@ -3053,7 +3211,29 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv)
                {
                    CBlock block;
                    block.ReadFromDisk((*mi).second);
-                    pfrom->PushMessage("block", block);
+                    if (inv.type == MSG_BLOCK)
+                        pfrom->PushMessage("block", block);
+                    else // MSG_FILTERED_BLOCK)
+                    {
+                        LOCK(pfrom->cs_filter);
+                        if (pfrom->pfilter)
+                        {
+                            CMerkleBlock merkleBlock(block, *pfrom->pfilter);
+                            // CMerkleBlock just contains hashes, so also push any transactions in the block the client did not see 
+                            // This avoids hurting performance by pointlessly requiring a round-trip
+                            // Note that there is currently no way for a node to request any single transactions we didnt send here -
+                            // they must either disconnect and retry or request the full block.
+                            // Thus, the protocol spec specified allows for us to provide duplicate txn here,
+                            // however we MUST always provide at least what the remote peer needs
+                            typedef std::pair<unsigned int, uint256> PairType;
+                            BOOST_FOREACH(PairType& pair, merkleBlock.vMatchedTxn)
+                                if (!pfrom->setInventoryKnown.count(CInv(MSG_TX, pair.second)))
+                                    pfrom->PushMessage("tx", block.vtx[pair.first]);
+                            pfrom->PushMessage("merkleblock", merkleBlock);
+                        }
+                        // else
+                            // no response
+                    }

                    // Trigger them to send a getblocks request for the next batch of inventory
                    if (inv.hash == pfrom->hashContinue)
@ -3184,7 +3364,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv)
        if (tx.AcceptToMemoryPool(true, &fMissingInputs))
        {
            SyncWithWallets(inv.hash, tx, NULL, true);
-            RelayMessage(inv, vMsg);
+            RelayTransaction(tx, inv.hash, vMsg);
            mapAlreadyAskedFor.erase(inv);
            vWorkQueue.push_back(inv.hash);
            vEraseQueue.push_back(inv.hash);
@ -3207,7 +3387,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv)
                    {
                        printf("   accepted orphan tx %s\n", inv.hash.ToString().substr(0,10).c_str());
                        SyncWithWallets(inv.hash, tx, NULL, true);
-                        RelayMessage(inv, vMsg);
+                        RelayTransaction(tx, inv.hash, vMsg);
                        mapAlreadyAskedFor.erase(inv);
                        vWorkQueue.push_back(inv.hash);
                        vEraseQueue.push_back(inv.hash);
@ -3266,13 +3446,16 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv)
    else if (strCommand == "mempool")
    {
        std::vector<uint256> vtxid;
+        LOCK2(mempool.cs, pfrom->cs_filter);
        mempool.queryHashes(vtxid);
        vector<CInv> vInv;
-        for (unsigned int i = 0; i < vtxid.size(); i++) {
-            CInv inv(MSG_TX, vtxid[i]);
-            vInv.push_back(inv);
-            if (i == (MAX_INV_SZ - 1))
-                    break;
+        BOOST_FOREACH(uint256& hash, vtxid) {
+            CInv inv(MSG_TX, hash);
+            if ((pfrom->pfilter && pfrom->pfilter->IsRelevantAndUpdate(mempool.lookup(hash), hash)) ||
+               (!pfrom->pfilter))
+                vInv.push_back(inv);
+            if (vInv.size() == MAX_INV_SZ)
+                break;
        }
        if (vInv.size() > 0)
            pfrom->PushMessage("inv", vInv);
@ -3332,6 +3515,53 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv)
    }


+    else if (strCommand == "filterload")
+    {
+        CBloomFilter filter;
+        vRecv >> filter;
+
+        if (!filter.IsWithinSizeConstraints())
+            // There is no excuse for sending a too-large filter
+            pfrom->Misbehaving(100);
+        else
+        {
+            LOCK(pfrom->cs_filter);
+            delete pfrom->pfilter;
+            pfrom->pfilter = new CBloomFilter(filter);
+        }
+        pfrom->fRelayTxes = true;
+    }
+
+
+    else if (strCommand == "filteradd")
+    {
+        vector<unsigned char> vData;
+        vRecv >> vData;
+
+        // Nodes must NEVER send a data item > 520 bytes (the max size for a script data object,
+        // and thus, the maximum size any matched object can have) in a filteradd message
+        if (vData.size() > 520)
+        {
+            pfrom->Misbehaving(100);
+        } else {
+            LOCK(pfrom->cs_filter);
+            if (pfrom->pfilter)
+                pfrom->pfilter->insert(vData);
+            else
+                pfrom->Misbehaving(100);
+        }
+    }
+
+
+    else if (strCommand == "filterclear")
+    {
+        LOCK(pfrom->cs_filter);
+        delete pfrom->pfilter;
+        pfrom->pfilter = NULL;
+        pfrom->fRelayTxes = true;
+    }
+
+
    else
    {
        // Ignore unknown commands for extensibility
--- a/src/main.h
+++ b/src/main.h
@ -1110,11 +1110,101 @@ public:



+/** Data structure that represents a partial merkle tree.
+ *
+ * It respresents a subset of the txid's of a known block, in a way that
+ * allows recovery of the list of txid's and the merkle root, in an
+ * authenticated way.
+ *
+ * The encoding works as follows: we traverse the tree in depth-first order,
+ * storing a bit for each traversed node, signifying whether the node is the
+ * parent of at least one matched leaf txid (or a matched txid itself). In
+ * case we are at the leaf level, or this bit is 0, its merkle node hash is
+ * stored, and its children are not explorer further. Otherwise, no hash is
+ * stored, but we recurse into both (or the only) child branch. During
+ * decoding, the same depth-first traversal is performed, consuming bits and
+ * hashes as they written during encoding.
+ *
+ * The serialization is fixed and provides a hard guarantee about the
+ * encoded size:
+ *
+ *   SIZE <= 10 + ceil(32.25*N)
+ *
+ * Where N represents the number of leaf nodes of the partial tree. N itself
+ * is bounded by:
+ *
+ *   N <= total_transactions
+ *   N <= 1 + matched_transactions*tree_height
+ *
+ * The serialization format:
+ *  - uint32     total_transactions (4 bytes)
+ *  - varint     number of hashes   (1-3 bytes)
+ *  - uint256[]  hashes in depth-first order (<= 32*N bytes)
+ *  - varint     number of bytes of flag bits (1-3 bytes)
+ *  - byte[]     flag bits, packed per 8 in a byte, least significant bit first (<= 2*N-1 bits)
+ * The size constraints follow from this.
+ */
+class CPartialMerkleTree
+{
+protected:
+    // the total number of transactions in the block
+    unsigned int nTransactions;

+    // node-is-parent-of-matched-txid bits
+    std::vector<bool> vBits;

+    // txids and internal hashes
+    std::vector<uint256> vHash;

+    // flag set when encountering invalid data
+    bool fBad;

+    // helper function to efficiently calculate the number of nodes at given height in the merkle tree
+    unsigned int CalcTreeWidth(int height) {
+        return (nTransactions+(1 << height)-1) >> height;
+    }

+    // calculate the hash of a node in the merkle tree (at leaf level: the txid's themself)
+    uint256 CalcHash(int height, unsigned int pos, const std::vector<uint256> &vTxid);
+
+    // recursive function that traverses tree nodes, storing the data as bits and hashes
+    void TraverseAndBuild(int height, unsigned int pos, const std::vector<uint256> &vTxid, const std::vector<bool> &vMatch);
+
+    // recursive function that traverses tree nodes, consuming the bits and hashes produced by TraverseAndBuild.
+    // it returns the hash of the respective node.
+    uint256 TraverseAndExtract(int height, unsigned int pos, unsigned int &nBitsUsed, unsigned int &nHashUsed, std::vector<uint256> &vMatch);
+
+public:
+
+    // serialization implementation
+    IMPLEMENT_SERIALIZE(
+        READWRITE(nTransactions);
+        READWRITE(vHash);
+        std::vector<unsigned char> vBytes;
+        if (fRead) {
+            READWRITE(vBytes);
+            CPartialMerkleTree &us = *(const_cast<CPartialMerkleTree*>(this));
+            us.vBits.resize(vBytes.size() * 8);
+            for (unsigned int p = 0; p < us.vBits.size(); p++)
+                us.vBits[p] = (vBytes[p / 8] & (1 << (p % 8))) != 0;
+            us.fBad = false;
+        } else {
+            vBytes.resize((vBits.size()+7)/8);
+            for (unsigned int p = 0; p < vBits.size(); p++)
+                vBytes[p / 8] |= vBits[p] << (p % 8);
+            READWRITE(vBytes);
+        }
+    )
+
+    // Construct a partial merkle tree from a list of transaction id's, and a mask that selects a subset of them
+    CPartialMerkleTree(const std::vector<uint256> &vTxid, const std::vector<bool> &vMatch);
+
+    CPartialMerkleTree();
+
+    // extract the matching txid's represented by this partial merkle tree.
+    // returns the merkle root, or 0 in case of failure
+    uint256 ExtractMatches(std::vector<uint256> &vMatch);
+};


 /** Nodes collect new transactions into a block, hash them into a hash tree,
@ -1218,6 +1308,18 @@ public:
        nDoS = 0;
    }

+    CBlockHeader GetBlockHeader() const
+    {
+        CBlockHeader block;
+        block.nVersion       = nVersion;
+        block.hashPrevBlock  = hashPrevBlock;
+        block.hashMerkleRoot = hashMerkleRoot;
+        block.nTime          = nTime;
+        block.nBits          = nBits;
+        block.nNonce         = nNonce;
+        return block;
+    }
+
    uint256 BuildMerkleTree() const
    {
        vMerkleTree.clear();
@ -2027,4 +2129,36 @@ struct CBlockTemplate
    std::vector<int64_t> vTxSigOps;
 };

+
+
+
+
+
+/** Used to relay blocks as header + vector<merkle branch>
+ * to filtered nodes.
+ */
+class CMerkleBlock
+{
+public:
+    // Public only for unit testing
+    CBlockHeader header;
+    CPartialMerkleTree txn;
+
+public:
+    // Public only for unit testing and relay testing
+    // (not relayed)
+    std::vector<std::pair<unsigned int, uint256> > vMatchedTxn;
+
+    // Create from a CBlock, filtering transactions according to filter
+    // Note that this will call IsRelevantAndUpdate on the filter for each transaction,
+    // thus the filter will likely be modified.
+    CMerkleBlock(const CBlock& block, CBloomFilter& filter);
+
+    IMPLEMENT_SERIALIZE
+    (
+        READWRITE(header);
+        READWRITE(txn);
+    )
+};
+
 #endif
--- a/src/makefile.linux-mingw
+++ b/src/makefile.linux-mingw
@ -83,6 +83,8 @@ OBJS= \
    obj/wallet.o \
    obj/walletdb.o \
    obj/noui.o \
+    obj/hash.o \
+    obj/bloom.o \
    obj/leveldb.o \
    obj/txdb.o

--- a/src/makefile.mingw
+++ b/src/makefile.mingw
@ -78,6 +78,8 @@ OBJS= \
    obj/util.o \
    obj/wallet.o \
    obj/walletdb.o \
+    obj/hash.o \
+    obj/bloom.o \
    obj/noui.o \
    obj/leveldb.o \
    obj/txdb.o
--- a/src/makefile.osx
+++ b/src/makefile.osx
@ -96,6 +96,8 @@ OBJS= \
    obj/util.o \
    obj/wallet.o \
    obj/walletdb.o \
+    obj/hash.o \
+    obj/bloom.o \
    obj/noui.o \
    obj/leveldb.o \
    obj/txdb.o
--- a/src/makefile.unix
+++ b/src/makefile.unix
@ -127,6 +127,8 @@ OBJS= \
    obj/util.o \
    obj/wallet.o \
    obj/walletdb.o \
+    obj/hash.o \
+    obj/bloom.o \
    obj/noui.o \
    obj/leveldb.o \
    obj/txdb.o
--- a/src/net.cpp
+++ b/src/net.cpp
@ -9,6 +9,7 @@
 #include "init.h"
 #include "addrman.h"
 #include "ui_interface.h"
+#include "script.h"

 #ifdef WIN32
 #include <string.h>
@ -1996,3 +1997,48 @@ public:
    }
 }
 instance_of_cnetcleanup;
+
+
+
+
+
+
+
+void RelayTransaction(const CTransaction& tx, const uint256& hash)
+{
+    CDataStream ss(SER_NETWORK, PROTOCOL_VERSION);
+    ss.reserve(10000);
+    ss << tx;
+    RelayTransaction(tx, hash, ss);
+}
+
+void RelayTransaction(const CTransaction& tx, const uint256& hash, const CDataStream& ss)
+{
+    CInv inv(MSG_TX, hash);
+    {
+        LOCK(cs_mapRelay);
+        // Expire old relay messages
+        while (!vRelayExpiration.empty() && vRelayExpiration.front().first < GetTime())
+        {
+            mapRelay.erase(vRelayExpiration.front().second);
+            vRelayExpiration.pop_front();
+        }
+
+        // Save original serialized message so newer versions are preserved
+        mapRelay.insert(std::make_pair(inv, ss));
+        vRelayExpiration.push_back(std::make_pair(GetTime() + 15 * 60, inv));
+    }
+    LOCK(cs_vNodes);
+    BOOST_FOREACH(CNode* pnode, vNodes)
+    {
+        if(!pnode->fRelayTxes)
+            continue;
+        LOCK(pnode->cs_filter);
+        if (pnode->pfilter)
+        {
+            if (pnode->pfilter->IsRelevantAndUpdate(tx, hash))
+                pnode->PushInventory(inv);
+        } else
+            pnode->PushInventory(inv);
+    }
+}
--- a/src/net.h
+++ b/src/net.h
@ -19,6 +19,7 @@
 #include "protocol.h"
 #include "addrman.h"
 #include "hash.h"
+#include "bloom.h"

 class CNode;
 class CBlockIndex;
@ -151,7 +152,14 @@ public:
    bool fNetworkNode;
    bool fSuccessfullyConnected;
    bool fDisconnect;
+    // We use fRelayTxes for two purposes -
+    // a) it allows us to not relay tx invs before receiving the peer's version message
+    // b) the peer may tell us in their version message that we should not relay tx invs
+    //    until they have initialized their bloom filter.
+    bool fRelayTxes;
    CSemaphoreGrant grantOutbound;
+    CCriticalSection cs_filter;
+    CBloomFilter* pfilter;
 protected:
    int nRefCount;

@ -208,7 +216,9 @@ public:
        nStartingHeight = -1;
        fGetAddr = false;
        nMisbehavior = 0;
+        fRelayTxes = false;
        setInventoryKnown.max_size(SendBufferSize() / 1000);
+        pfilter = NULL;

        // Be shy and don't send version until we hear
        if (!fInbound)
@ -222,6 +232,8 @@ public:
            closesocket(hSocket);
            hSocket = INVALID_SOCKET;
        }
+        if (pfilter)
+            delete pfilter;
    }

 private:
@ -556,51 +568,8 @@ public:



-
-
-
-
-
-
-
-inline void RelayInventory(const CInv& inv)
-{
-    // Put on lists to offer to the other nodes
-    {
-        LOCK(cs_vNodes);
-        BOOST_FOREACH(CNode* pnode, vNodes)
-            pnode->PushInventory(inv);
-    }
-}
-
-template<typename T>
-void RelayMessage(const CInv& inv, const T& a)
-{
-    CDataStream ss(SER_NETWORK, PROTOCOL_VERSION);
-    ss.reserve(10000);
-    ss << a;
-    RelayMessage(inv, ss);
-}
-
-template<>
-inline void RelayMessage<>(const CInv& inv, const CDataStream& ss)
-{
-    {
-        LOCK(cs_mapRelay);
-        // Expire old relay messages
-        while (!vRelayExpiration.empty() && vRelayExpiration.front().first < GetTime())
-        {
-            mapRelay.erase(vRelayExpiration.front().second);
-            vRelayExpiration.pop_front();
-        }
-
-        // Save original serialized message so newer versions are preserved
-        mapRelay.insert(std::make_pair(inv, ss));
-        vRelayExpiration.push_back(std::make_pair(GetTime() + 15 * 60, inv));
-    }
-
-    RelayInventory(inv);
-}
-
+class CTransaction;
+void RelayTransaction(const CTransaction& tx, const uint256& hash);
+void RelayTransaction(const CTransaction& tx, const uint256& hash, const CDataStream& ss);

 #endif
--- a/src/protocol.cpp
+++ b/src/protocol.cpp
@ -17,6 +17,7 @@ static const char* ppszTypeName[] =
    "ERROR",
    "tx",
    "block",
+    "filtered block"
 };

 CMessageHeader::CMessageHeader()
--- a/src/protocol.h
+++ b/src/protocol.h
@ -138,6 +138,9 @@ enum
 {
    MSG_TX = 1,
    MSG_BLOCK,
+    // Nodes may always request a MSG_FILTERED_BLOCK in a getdata, however,
+    // MSG_FILTERED_BLOCK should not appear in any invs except as a part of getdata.
+    MSG_FILTERED_BLOCK,
 };

 #endif // __INCLUDED_PROTOCOL_H__
--- a/src/rpcrawtransaction.cpp
+++ b/src/rpcrawtransaction.cpp
@ -558,7 +558,7 @@ Value sendrawtransaction(const Array& params, bool fHelp)
    } else {
        SyncWithWallets(hashTx, tx, NULL, true);
    }
-    RelayMessage(CInv(MSG_TX, hashTx), tx);
+    RelayTransaction(tx, hashTx);

    return hashTx.GetHex();
 }
--- a/src/test/bloom_tests.cpp
+++ b/src/test/bloom_tests.cpp
--- a/src/test/pmt_tests.cpp
+++ b/src/test/pmt_tests.cpp
@ -0,0 +1,98 @@
+#include <boost/test/unit_test.hpp>
+
+#include "uint256.h"
+#include "main.h"
+
+using namespace std;
+
+class CPartialMerkleTreeTester : public CPartialMerkleTree
+{
+public:
+    // flip one bit in one of the hashes - this should break the authentication
+    void Damage() {
+        unsigned int n = rand() % vHash.size();
+        int bit = rand() % 256;
+        uint256 &hash = vHash[n];
+        hash ^= ((uint256)1 << bit);
+    }
+};
+
+BOOST_AUTO_TEST_SUITE(pmt_tests)
+
+BOOST_AUTO_TEST_CASE(pmt_test1)
+{
+    static const unsigned int nTxCounts[] = {1, 4, 7, 17, 56, 100, 127, 256, 312, 513, 1000, 4095};
+
+    for (int n = 0; n < 12; n++) {
+        unsigned int nTx = nTxCounts[n];
+
+        // build a block with some dummy transactions
+        CBlock block;
+        for (unsigned int j=0; j<nTx; j++) {
+            CTransaction tx;
+            tx.nLockTime = rand(); // actual transaction data doesn't matter; just make the nLockTime's unique
+            block.vtx.push_back(tx);
+        }
+
+        // calculate actual merkle root and height
+        uint256 merkleRoot1 = block.BuildMerkleTree();
+        std::vector<uint256> vTxid(nTx, 0);
+        for (unsigned int j=0; j<nTx; j++)
+            vTxid[j] = block.vtx[j].GetHash();
+        int nHeight = 1, nTx_ = nTx;
+        while (nTx_ > 1) {
+            nTx_ = (nTx_+1)/2;
+            nHeight++;
+        }
+
+        // check with random subsets with inclusion chances 1, 1/2, 1/4, ..., 1/128
+        for (int att = 1; att < 15; att++) {
+            // build random subset of txid's
+            std::vector<bool> vMatch(nTx, false);
+            std::vector<uint256> vMatchTxid1;
+            for (unsigned int j=0; j<nTx; j++) {
+                bool fInclude = (rand() & ((1 << (att/2)) - 1)) == 0;
+                vMatch[j] = fInclude;
+                if (fInclude)
+                    vMatchTxid1.push_back(vTxid[j]);
+            }
+
+            // build the partial merkle tree
+            CPartialMerkleTree pmt1(vTxid, vMatch);
+
+            // serialize
+            CDataStream ss(SER_NETWORK, PROTOCOL_VERSION);
+            ss << pmt1;
+
+            // verify CPartialMerkleTree's size guarantees
+            unsigned int n = std::min<unsigned int>(nTx, 1 + vMatchTxid1.size()*nHeight);
+            BOOST_CHECK(ss.size() <= 10 + (258*n+7)/8);
+
+            // deserialize into a tester copy
+            CPartialMerkleTreeTester pmt2;
+            ss >> pmt2;
+
+            // extract merkle root and matched txids from copy
+            std::vector<uint256> vMatchTxid2;
+            uint256 merkleRoot2 = pmt2.ExtractMatches(vMatchTxid2);
+
+            // check that it has the same merkle root as the original, and a valid one
+            BOOST_CHECK(merkleRoot1 == merkleRoot2);
+            BOOST_CHECK(merkleRoot2 != 0);
+
+            // check that it contains the matched transactions (in the same order!)
+            BOOST_CHECK(vMatchTxid1 == vMatchTxid2);
+
+            // check that random bit flips break the authentication
+            for (int j=0; j<4; j++) {
+                CPartialMerkleTreeTester pmt3(pmt2);
+                pmt3.Damage();
+                std::vector<uint256> vMatchTxid3;
+                uint256 merkleRoot3 = pmt3.ExtractMatches(vMatchTxid3);
+                BOOST_CHECK(merkleRoot3 != merkleRoot1);
+            }
+        }
+    }
+}
+
+BOOST_AUTO_TEST_SUITE_END()
--- a/src/uint256.h
+++ b/src/uint256.h
@ -344,7 +344,17 @@ public:
        return (unsigned char*)&pn[WIDTH];
    }

-    unsigned int size()
+    const unsigned char* begin() const
+    {
+        return (unsigned char*)&pn[0];
+    }
+
+    const unsigned char* end() const
+    {
+        return (unsigned char*)&pn[WIDTH];
+    }
+
+    unsigned int size() const
    {
        return sizeof(pn);
    }
--- a/src/version.h
+++ b/src/version.h
@ -25,7 +25,7 @@ extern const std::string CLIENT_DATE;
 // network protocol versioning
 //

-static const int PROTOCOL_VERSION = 60002;
+static const int PROTOCOL_VERSION = 70001;

 // earlier versions not supported as of Feb 2012, and are disconnected
 static const int MIN_PROTO_VERSION = 209;
--- a/src/wallet.cpp
+++ b/src/wallet.cpp
@ -826,17 +826,16 @@ void CWalletTx::RelayWalletTransaction()
 {
    BOOST_FOREACH(const CMerkleTx& tx, vtxPrev)
    {
-        if (!tx.IsCoinBase()) {
+        if (!tx.IsCoinBase())
            if (tx.GetDepthInMainChain() == 0)
-                RelayMessage(CInv(MSG_TX, tx.GetHash()), (CTransaction)tx);
-        }
+                RelayTransaction((CTransaction)tx, tx.GetHash());
    }
    if (!IsCoinBase())
    {
        if (GetDepthInMainChain() == 0) {
            uint256 hash = GetHash();
            printf("Relaying wtx %s\n", hash.ToString().substr(0,10).c_str());
-            RelayMessage(CInv(MSG_TX, hash), (CTransaction)*this);
+            RelayTransaction((CTransaction)*this, hash);
        }
    }
 }