From 85ad31ede7bc338079c8ae643542fde7ad83ce55 Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Fri, 15 Apr 2016 12:23:57 -0700
Subject: [PATCH] Add partial-block block encodings API

---
 src/Makefile.am        |   2 +
 src/blockencodings.cpp | 158 +++++++++++++++++++++++++++++++
 src/blockencodings.h   | 205 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 365 insertions(+)
 create mode 100644 src/blockencodings.cpp
 create mode 100644 src/blockencodings.h

diff --git a/src/Makefile.am b/src/Makefile.am
index 3df8e267b..e3eaacdb4 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -74,6 +74,7 @@ BITCOIN_CORE_H = \
   addrman.h \
   base58.h \
   bloom.h \
+  blockencodings.h \
   chain.h \
   chainparams.h \
   chainparamsbase.h \
@@ -163,6 +164,7 @@ libbitcoin_server_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
 libbitcoin_server_a_SOURCES = \
   addrman.cpp \
   bloom.cpp \
+  blockencodings.cpp \
   chain.cpp \
   checkpoints.cpp \
   httprpc.cpp \
diff --git a/src/blockencodings.cpp b/src/blockencodings.cpp
new file mode 100644
index 000000000..c6b79f420
--- /dev/null
+++ b/src/blockencodings.cpp
@@ -0,0 +1,158 @@
+// Copyright (c) 2016 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include "blockencodings.h"
+#include "consensus/consensus.h"
+#include "consensus/validation.h"
+#include "chainparams.h"
+#include "hash.h"
+#include "random.h"
+#include "streams.h"
+#include "txmempool.h"
+#include "main.h"
+
+#include <unordered_map>
+
+#define MIN_TRANSACTION_SIZE (::GetSerializeSize(CTransaction(), SER_NETWORK, PROTOCOL_VERSION))
+
+CBlockHeaderAndShortTxIDs::CBlockHeaderAndShortTxIDs(const CBlock& block) :
+        nonce(GetRand(std::numeric_limits<uint64_t>::max())),
+        shorttxids(block.vtx.size() - 1), prefilledtxn(1), header(block) {
+    FillShortTxIDSelector();
+    //TODO: Use our mempool prior to block acceptance to predictively fill more than just the coinbase
+    prefilledtxn[0] = {0, block.vtx[0]};
+    for (size_t i = 1; i < block.vtx.size(); i++) {
+        const CTransaction& tx = block.vtx[i];
+        shorttxids[i - 1] = GetShortID(tx.GetHash());
+    }
+}
+
+void CBlockHeaderAndShortTxIDs::FillShortTxIDSelector() const {
+    CDataStream stream(SER_NETWORK, PROTOCOL_VERSION);
+    stream << header << nonce;
+    CSHA256 hasher;
+    hasher.Write((unsigned char*)&(*stream.begin()), stream.end() - stream.begin());
+    uint256 shorttxidhash;
+    hasher.Finalize(shorttxidhash.begin());
+    shorttxidk0 = shorttxidhash.GetUint64(0);
+    shorttxidk1 = shorttxidhash.GetUint64(1);
+}
+
+uint64_t CBlockHeaderAndShortTxIDs::GetShortID(const uint256& txhash) const {
+    static_assert(SHORTTXIDS_LENGTH == 6, "shorttxids calculation assumes 6-byte shorttxids");
+    return SipHashUint256(shorttxidk0, shorttxidk1, txhash) & 0xffffffffffffL;
+}
+
+
+
+ReadStatus PartiallyDownloadedBlock::InitData(const CBlockHeaderAndShortTxIDs& cmpctblock) {
+    if (cmpctblock.header.IsNull() || (cmpctblock.shorttxids.empty() && cmpctblock.prefilledtxn.empty()))
+        return READ_STATUS_INVALID;
+    if (cmpctblock.shorttxids.size() + cmpctblock.prefilledtxn.size() > MAX_BLOCK_SIZE / MIN_TRANSACTION_SIZE)
+        return READ_STATUS_INVALID;
+
+    assert(header.IsNull() && txn_available.empty());
+    header = cmpctblock.header;
+    txn_available.resize(cmpctblock.BlockTxCount());
+
+    int32_t lastprefilledindex = -1;
+    for (size_t i = 0; i < cmpctblock.prefilledtxn.size(); i++) {
+        if (cmpctblock.prefilledtxn[i].tx.IsNull())
+            return READ_STATUS_INVALID;
+
+        lastprefilledindex += cmpctblock.prefilledtxn[i].index + 1; //index is a uint16_t, so cant overflow here
+        if (lastprefilledindex > std::numeric_limits<uint16_t>::max())
+            return READ_STATUS_INVALID;
+        if ((uint32_t)lastprefilledindex > cmpctblock.shorttxids.size() + i) {
+            // If we are inserting a tx at an index greater than our full list of shorttxids
+            // plus the number of prefilled txn we've inserted, then we have txn for which we
+            // have neither a prefilled txn or a shorttxid!
+            return READ_STATUS_INVALID;
+        }
+        txn_available[lastprefilledindex] = std::make_shared<CTransaction>(cmpctblock.prefilledtxn[i].tx);
+    }
+
+    // Calculate map of txids -> positions and check mempool to see what we have (or dont)
+    // Because well-formed cmpctblock messages will have a (relatively) uniform distribution
+    // of short IDs, any highly-uneven distribution of elements can be safely treated as a
+    // READ_STATUS_FAILED.
+    std::unordered_map<uint64_t, uint16_t> shorttxids(cmpctblock.shorttxids.size());
+    uint16_t index_offset = 0;
+    for (size_t i = 0; i < cmpctblock.shorttxids.size(); i++) {
+        while (txn_available[i + index_offset])
+            index_offset++;
+        shorttxids[cmpctblock.shorttxids[i]] = i + index_offset;
+        // Bucket selection is a simple Binomial distribution. If we assume blocks of
+        // 10,000 transactions, allowing up to 12 elements per bucket should only fail
+        // once every ~1.3 million blocks and once every 74,000 blocks in a worst-case
+        // 16,000-transaction block.
+        if (shorttxids.bucket_size(shorttxids.bucket(cmpctblock.shorttxids[i])) > 12)
+            return READ_STATUS_FAILED;
+    }
+    // TODO: in the shortid-collision case, we should instead request both transactions
+    // which collided. Falling back to full-block-request here is overkill.
+    if (shorttxids.size() != cmpctblock.shorttxids.size())
+        return READ_STATUS_FAILED; // Short ID collision
+
+    std::vector<bool> have_txn(txn_available.size());
+    LOCK(pool->cs);
+    for (CTxMemPool::txiter it = pool->mapTx.begin(); it != pool->mapTx.end(); it++) {
+        std::unordered_map<uint64_t, uint16_t>::iterator idit = shorttxids.find(cmpctblock.GetShortID(it->GetTx().GetHash()));
+        if (idit != shorttxids.end()) {
+            if (!have_txn[idit->second]) {
+                txn_available[idit->second] = it->GetSharedTx();
+                have_txn[idit->second]  = true;
+            } else {
+                // If we find two mempool txn that match the short id, just request it.
+                // This should be rare enough that the extra bandwidth doesn't matter,
+                // but eating a round-trip due to FillBlock failure would be annoying
+                txn_available[idit->second].reset();
+            }
+        }
+        // Though ideally we'd continue scanning for the two-txn-match-shortid case,
+        // the performance win of an early exit here is too good to pass up and worth
+        // the extra risk.
+        if (mempool_count == shorttxids.size())
+            break;
+    }
+
+    return READ_STATUS_OK;
+}
+
+bool PartiallyDownloadedBlock::IsTxAvailable(size_t index) const {
+    assert(!header.IsNull());
+    assert(index < txn_available.size());
+    return txn_available[index] ? true : false;
+}
+
+ReadStatus PartiallyDownloadedBlock::FillBlock(CBlock& block, const std::vector<CTransaction>& vtx_missing) const {
+    assert(!header.IsNull());
+    block = header;
+    block.vtx.resize(txn_available.size());
+
+    size_t tx_missing_offset = 0;
+    for (size_t i = 0; i < txn_available.size(); i++) {
+        if (!txn_available[i]) {
+            if (vtx_missing.size() <= tx_missing_offset)
+                return READ_STATUS_INVALID;
+            block.vtx[i] = vtx_missing[tx_missing_offset++];
+        } else
+            block.vtx[i] = *txn_available[i];
+    }
+    if (vtx_missing.size() != tx_missing_offset)
+        return READ_STATUS_INVALID;
+
+    CValidationState state;
+    if (!CheckBlock(block, state, Params().GetConsensus())) {
+        // TODO: We really want to just check merkle tree manually here,
+        // but that is expensive, and CheckBlock caches a block's
+        // "checked-status" (in the CBlock?). CBlock should be able to
+        // check its own merkle root and cache that check.
+        if (state.CorruptionPossible())
+            return READ_STATUS_FAILED; // Possible Short ID collision
+        return READ_STATUS_INVALID;
+    }
+
+    return READ_STATUS_OK;
+}
diff --git a/src/blockencodings.h b/src/blockencodings.h
new file mode 100644
index 000000000..adc60c85d
--- /dev/null
+++ b/src/blockencodings.h
@@ -0,0 +1,205 @@
+// Copyright (c) 2016 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#ifndef BITCOIN_BLOCK_ENCODINGS_H
+#define BITCOIN_BLOCK_ENCODINGS_H
+
+#include "primitives/block.h"
+
+#include <memory>
+
+class CTxMemPool;
+
+// Dumb helper to handle CTransaction compression at serialize-time
+struct TransactionCompressor {
+private:
+    CTransaction& tx;
+public:
+    TransactionCompressor(CTransaction& txIn) : tx(txIn) {}
+
+    ADD_SERIALIZE_METHODS;
+
+    template <typename Stream, typename Operation>
+    inline void SerializationOp(Stream& s, Operation ser_action, int nType, int nVersion) {
+        READWRITE(tx); //TODO: Compress tx encoding
+    }
+};
+
+class BlockTransactionsRequest {
+public:
+    // A BlockTransactionsRequest message
+    uint256 blockhash;
+    std::vector<uint16_t> indexes;
+
+    ADD_SERIALIZE_METHODS;
+
+    template <typename Stream, typename Operation>
+    inline void SerializationOp(Stream& s, Operation ser_action, int nType, int nVersion) {
+        READWRITE(blockhash);
+        uint64_t indexes_size = (uint64_t)indexes.size();
+        READWRITE(COMPACTSIZE(indexes_size));
+        if (ser_action.ForRead()) {
+            size_t i = 0;
+            while (indexes.size() < indexes_size) {
+                indexes.resize(std::min((uint64_t)(1000 + indexes.size()), indexes_size));
+                for (; i < indexes.size(); i++) {
+                    uint64_t index = 0;
+                    READWRITE(COMPACTSIZE(index));
+                    if (index > std::numeric_limits<uint16_t>::max())
+                        throw std::ios_base::failure("index overflowed 16 bits");
+                    indexes[i] = index;
+                }
+            }
+
+            uint16_t offset = 0;
+            for (size_t i = 0; i < indexes.size(); i++) {
+                if (uint64_t(indexes[i]) + uint64_t(offset) > std::numeric_limits<uint16_t>::max())
+                    throw std::ios_base::failure("indexes overflowed 16 bits");
+                indexes[i] = indexes[i] + offset;
+                offset = indexes[i] + 1;
+            }
+        } else {
+            for (size_t i = 0; i < indexes.size(); i++) {
+                uint64_t index = indexes[i] - (i == 0 ? 0 : (indexes[i - 1] + 1));
+                READWRITE(COMPACTSIZE(index));
+            }
+        }
+    }
+};
+
+class BlockTransactions {
+public:
+    // A BlockTransactions message
+    uint256 blockhash;
+    std::vector<CTransaction> txn;
+
+    BlockTransactions() {}
+    BlockTransactions(const BlockTransactionsRequest& req) :
+        blockhash(req.blockhash), txn(req.indexes.size()) {}
+
+    ADD_SERIALIZE_METHODS;
+
+    template <typename Stream, typename Operation>
+    inline void SerializationOp(Stream& s, Operation ser_action, int nType, int nVersion) {
+        READWRITE(blockhash);
+        uint64_t txn_size = (uint64_t)txn.size();
+        READWRITE(COMPACTSIZE(txn_size));
+        if (ser_action.ForRead()) {
+            size_t i = 0;
+            while (txn.size() < txn_size) {
+                txn.resize(std::min((uint64_t)(1000 + txn.size()), txn_size));
+                for (; i < txn.size(); i++)
+                    READWRITE(REF(TransactionCompressor(txn[i])));
+            }
+        } else {
+            for (size_t i = 0; i < txn.size(); i++)
+                READWRITE(REF(TransactionCompressor(txn[i])));
+        }
+    }
+};
+
+// Dumb serialization/storage-helper for CBlockHeaderAndShortTxIDs and PartiallyDownlaodedBlock
+struct PrefilledTransaction {
+    // Used as an offset since last prefilled tx in CBlockHeaderAndShortTxIDs,
+    // as a proper transaction-in-block-index in PartiallyDownloadedBlock
+    uint16_t index;
+    CTransaction tx;
+
+    ADD_SERIALIZE_METHODS;
+
+    template <typename Stream, typename Operation>
+    inline void SerializationOp(Stream& s, Operation ser_action, int nType, int nVersion) {
+        uint64_t idx = index;
+        READWRITE(COMPACTSIZE(idx));
+        if (idx > std::numeric_limits<uint16_t>::max())
+            throw std::ios_base::failure("index overflowed 16-bits");
+        index = idx;
+        READWRITE(REF(TransactionCompressor(tx)));
+    }
+};
+
+typedef enum ReadStatus_t
+{
+    READ_STATUS_OK,
+    READ_STATUS_INVALID, // Invalid object, peer is sending bogus crap
+    READ_STATUS_FAILED, // Failed to process object
+} ReadStatus;
+
+class CBlockHeaderAndShortTxIDs {
+private:
+    mutable uint64_t shorttxidk0, shorttxidk1;
+    uint64_t nonce;
+
+    void FillShortTxIDSelector() const;
+
+    friend class PartiallyDownloadedBlock;
+
+    static const int SHORTTXIDS_LENGTH = 6;
+protected:
+    std::vector<uint64_t> shorttxids;
+    std::vector<PrefilledTransaction> prefilledtxn;
+
+public:
+    CBlockHeader header;
+
+    // Dummy for deserialization
+    CBlockHeaderAndShortTxIDs() {}
+
+    CBlockHeaderAndShortTxIDs(const CBlock& block);
+
+    uint64_t GetShortID(const uint256& txhash) const;
+
+    size_t BlockTxCount() const { return shorttxids.size() + prefilledtxn.size(); }
+
+    ADD_SERIALIZE_METHODS;
+
+    template <typename Stream, typename Operation>
+    inline void SerializationOp(Stream& s, Operation ser_action, int nType, int nVersion) {
+        READWRITE(header);
+        READWRITE(nonce);
+
+        uint64_t shorttxids_size = (uint64_t)shorttxids.size();
+        READWRITE(COMPACTSIZE(shorttxids_size));
+        if (ser_action.ForRead()) {
+            size_t i = 0;
+            while (shorttxids.size() < shorttxids_size) {
+                shorttxids.resize(std::min((uint64_t)(1000 + shorttxids.size()), shorttxids_size));
+                for (; i < shorttxids.size(); i++) {
+                    uint32_t lsb = 0; uint16_t msb = 0;
+                    READWRITE(lsb);
+                    READWRITE(msb);
+                    shorttxids[i] = (uint64_t(msb) << 32) | uint64_t(lsb);
+                    static_assert(SHORTTXIDS_LENGTH == 6, "shorttxids serialization assumes 6-byte shorttxids");
+                }
+            }
+        } else {
+            for (size_t i = 0; i < shorttxids.size(); i++) {
+                uint32_t lsb = shorttxids[i] & 0xffffffff;
+                uint16_t msb = (shorttxids[i] >> 32) & 0xffff;
+                READWRITE(lsb);
+                READWRITE(msb);
+            }
+        }
+
+        READWRITE(prefilledtxn);
+
+        if (ser_action.ForRead())
+            FillShortTxIDSelector();
+    }
+};
+
+class PartiallyDownloadedBlock {
+protected:
+    std::vector<std::shared_ptr<const CTransaction> > txn_available;
+    CTxMemPool* pool;
+public:
+    CBlockHeader header;
+    PartiallyDownloadedBlock(CTxMemPool* poolIn) : pool(poolIn) {}
+
+    ReadStatus InitData(const CBlockHeaderAndShortTxIDs& cmpctblock);
+    bool IsTxAvailable(size_t index) const;
+    ReadStatus FillBlock(CBlock& block, const std::vector<CTransaction>& vtx_missing) const;
+};
+
+#endif