diff --git a/src/init.cpp b/src/init.cpp index 655743488..e57ea0f43 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -1270,7 +1270,7 @@ bool AppInitMain(boost::thread_group& threadGroup, CScheduler& scheduler) g_connman = std::unique_ptr(new CConnman(GetRand(std::numeric_limits::max()), GetRand(std::numeric_limits::max()))); CConnman& connman = *g_connman; - peerLogic.reset(new PeerLogicValidation(&connman)); + peerLogic.reset(new PeerLogicValidation(&connman, scheduler)); RegisterValidationInterface(peerLogic.get()); // sanitize comments per BIP-0014, format user agent and check total size diff --git a/src/net.cpp b/src/net.cpp index 258599747..5eaeaab8f 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -1693,6 +1693,37 @@ void CConnman::ProcessOneShot() } } +bool CConnman::GetTryNewOutboundPeer() +{ + return m_try_another_outbound_peer; +} + +void CConnman::SetTryNewOutboundPeer(bool flag) +{ + m_try_another_outbound_peer = flag; + LogPrint(BCLog::NET, "net: setting try another outbound peer=%s\n", flag ? "true" : "false"); +} + +// Return the number of peers we have over our outbound connection limit +// Exclude peers that are marked for disconnect, or are going to be +// disconnected soon (eg one-shots and feelers) +// Also exclude peers that haven't finished initial connection handshake yet +// (so that we don't decide we're over our desired connection limit, and then +// evict some peer that has finished the handshake) +int CConnman::GetExtraOutboundCount() +{ + int nOutbound = 0; + { + LOCK(cs_vNodes); + for (CNode* pnode : vNodes) { + if (!pnode->fInbound && !pnode->m_manual_connection && !pnode->fFeeler && !pnode->fDisconnect && !pnode->fOneShot && pnode->fSuccessfullyConnected) { + ++nOutbound; + } + } + } + return std::max(nOutbound - nMaxOutbound, 0); +} + void CConnman::ThreadOpenConnections(const std::vector connect) { // Connect to specific addresses @@ -1781,7 +1812,8 @@ void CConnman::ThreadOpenConnections(const std::vector connect) // * Only make a feeler connection once every few minutes. // bool fFeeler = false; - if (nOutbound >= nMaxOutbound) { + + if (nOutbound >= nMaxOutbound && !GetTryNewOutboundPeer()) { int64_t nTime = GetTimeMicros(); // The current time right now (in microseconds). if (nTime > nNextFeeler) { nNextFeeler = PoissonNextSend(nTime, FEELER_INTERVAL); @@ -2204,6 +2236,7 @@ CConnman::CConnman(uint64_t nSeed0In, uint64_t nSeed1In) : nSeed0(nSeed0In), nSe semOutbound = nullptr; semAddnode = nullptr; flagInterruptMsgProc = false; + SetTryNewOutboundPeer(false); Options connOptions; Init(connOptions); diff --git a/src/net.h b/src/net.h index f373ab0cf..edca1171a 100644 --- a/src/net.h +++ b/src/net.h @@ -251,6 +251,19 @@ public: void GetBanned(banmap_t &banmap); void SetBanned(const banmap_t &banmap); + // This allows temporarily exceeding nMaxOutbound, with the goal of finding + // a peer that is better than all our current peers. + void SetTryNewOutboundPeer(bool flag); + bool GetTryNewOutboundPeer(); + + // Return the number of outbound peers we have in excess of our target (eg, + // if we previously called SetTryNewOutboundPeer(true), and have since set + // to false, we may have extra peers that we wish to disconnect). This may + // return a value less than (num_outbound_connections - num_outbound_slots) + // in cases where some outbound connections are not yet fully connected, or + // not yet fully disconnected. + int GetExtraOutboundCount(); + bool AddNode(const std::string& node); bool RemoveAddedNode(const std::string& node); std::vector GetAddedNodeInfo(); @@ -413,6 +426,13 @@ private: std::thread threadOpenAddedConnections; std::thread threadOpenConnections; std::thread threadMessageHandler; + + /** flag for deciding to connect to an extra outbound peer, + * in excess of nMaxOutbound + * This takes the place of a feeler connection */ + std::atomic_bool m_try_another_outbound_peer; + + friend struct CConnmanTest; }; extern std::unique_ptr g_connman; void Discover(boost::thread_group& threadGroup); diff --git a/src/net_processing.cpp b/src/net_processing.cpp index 2e93809ef..35d73a6a2 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -23,6 +23,7 @@ #include "primitives/transaction.h" #include "random.h" #include "reverse_iterator.h" +#include "scheduler.h" #include "tinyformat.h" #include "txmempool.h" #include "ui_interface.h" @@ -127,6 +128,9 @@ namespace { /** Number of outbound peers with m_chain_sync.m_protect. */ int g_outbound_peers_with_protect_from_disconnect = 0; + /** When our tip was last updated. */ + int64_t g_last_tip_update = 0; + /** Relay map, protected by cs_main. */ typedef std::map MapRelay; MapRelay mapRelay; @@ -231,6 +235,9 @@ struct CNodeState { ChainSyncTimeoutState m_chain_sync; + //! Time of last new block announcement + int64_t m_last_block_announcement; + CNodeState(CAddress addrIn, std::string addrNameIn) : address(addrIn), name(addrNameIn) { fCurrentlyConnected = false; nMisbehavior = 0; @@ -254,6 +261,7 @@ struct CNodeState { fWantsCmpctWitness = false; fSupportsDesiredCmpctVersion = false; m_chain_sync = { 0, nullptr, false, false }; + m_last_block_announcement = 0; } }; @@ -427,6 +435,15 @@ void MaybeSetPeerAsAnnouncingHeaderAndIDs(NodeId nodeid, CConnman* connman) { } } +bool TipMayBeStale(const Consensus::Params &consensusParams) +{ + AssertLockHeld(cs_main); + if (g_last_tip_update == 0) { + g_last_tip_update = GetTime(); + } + return g_last_tip_update < GetTime() - consensusParams.nPowTargetSpacing * 3 && mapBlocksInFlight.empty(); +} + // Requires cs_main bool CanDirectFetch(const Consensus::Params &consensusParams) { @@ -533,6 +550,15 @@ void FindNextBlocksToDownload(NodeId nodeid, unsigned int count, std::vectorm_last_block_announcement = time_in_seconds; +} + // Returns true for outbound peers, excluding manual connections, feelers, and // one-shots bool IsOutboundDisconnectionCandidate(const CNode *node) @@ -764,9 +790,17 @@ static bool StaleBlockRequestAllowed(const CBlockIndex* pindex, const Consensus: (GetBlockProofEquivalentTime(*pindexBestHeader, *pindex, *pindexBestHeader, consensusParams) < STALE_RELAY_AGE_LIMIT); } -PeerLogicValidation::PeerLogicValidation(CConnman* connmanIn) : connman(connmanIn) { +PeerLogicValidation::PeerLogicValidation(CConnman* connmanIn, CScheduler &scheduler) : connman(connmanIn), m_stale_tip_check_time(0) { // Initialize global variables that cannot be constructed at startup. recentRejects.reset(new CRollingBloomFilter(120000, 0.000001)); + + const Consensus::Params& consensusParams = Params().GetConsensus(); + // Stale tip checking and peer eviction are on two different timers, but we + // don't want them to get out of sync due to drift in the scheduler, so we + // combine them in one function and schedule at the quicker (peer-eviction) + // timer. + static_assert(EXTRA_PEER_CHECK_INTERVAL < STALE_CHECK_INTERVAL, "peer eviction timer should be less than stale tip check timer"); + scheduler.scheduleEvery(std::bind(&PeerLogicValidation::CheckForStaleTipAndEvictPeers, this, consensusParams), EXTRA_PEER_CHECK_INTERVAL * 1000); } void PeerLogicValidation::BlockConnected(const std::shared_ptr& pblock, const CBlockIndex* pindex, const std::vector& vtxConflicted) { @@ -797,6 +831,8 @@ void PeerLogicValidation::BlockConnected(const std::shared_ptr& pb } LogPrint(BCLog::MEMPOOL, "Erased %d orphan tx included or conflicted by block\n", nErased); } + + g_last_tip_update = GetTime(); } // All of the following cache a recent block, and are protected by cs_most_recent_block @@ -1215,6 +1251,7 @@ bool static ProcessHeadersMessage(CNode *pfrom, CConnman *connman, const std::ve return true; } + bool received_new_header = false; const CBlockIndex *pindexLast = nullptr; { LOCK(cs_main); @@ -1255,6 +1292,12 @@ bool static ProcessHeadersMessage(CNode *pfrom, CConnman *connman, const std::ve } hashLastBlock = header.GetHash(); } + + // If we don't have the last header, then they'll have given us + // something new (if these headers are valid). + if (mapBlockIndex.find(hashLastBlock) == mapBlockIndex.end()) { + received_new_header = true; + } } CValidationState state; @@ -1319,6 +1362,10 @@ bool static ProcessHeadersMessage(CNode *pfrom, CConnman *connman, const std::ve // because it is set in UpdateBlockAvailability. Some nullptr checks // are still present, however, as belt-and-suspenders. + if (received_new_header && pindexLast->nChainWork > chainActive.Tip()->nChainWork) { + nodestate->m_last_block_announcement = GetTime(); + } + if (nCount == MAX_HEADERS_RESULTS) { // Headers message had its maximum size; the peer may have more headers. // TODO: optimize: if pindexLast is an ancestor of chainActive.Tip or pindexBestHeader, continue @@ -1403,6 +1450,7 @@ bool static ProcessHeadersMessage(CNode *pfrom, CConnman *connman, const std::ve // If this is an outbound peer, check to see if we should protect // it from the bad/lagging chain logic. if (g_outbound_peers_with_protect_from_disconnect < MAX_OUTBOUND_PEERS_TO_PROTECT_FROM_DISCONNECT && nodestate->pindexBestKnownBlock->nChainWork >= chainActive.Tip()->nChainWork && !nodestate->m_chain_sync.m_protect) { + LogPrint(BCLog::NET, "Protecting outbound peer=%d from eviction\n", pfrom->GetId()); nodestate->m_chain_sync.m_protect = true; ++g_outbound_peers_with_protect_from_disconnect; } @@ -2219,6 +2267,8 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr CBlockHeaderAndShortTxIDs cmpctblock; vRecv >> cmpctblock; + bool received_new_header = false; + { LOCK(cs_main); @@ -2228,6 +2278,10 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr connman->PushMessage(pfrom, msgMaker.Make(NetMsgType::GETHEADERS, chainActive.GetLocator(pindexBestHeader), uint256())); return true; } + + if (mapBlockIndex.find(cmpctblock.header.GetHash()) == mapBlockIndex.end()) { + received_new_header = true; + } } const CBlockIndex *pindex = nullptr; @@ -2266,6 +2320,14 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr assert(pindex); UpdateBlockAvailability(pfrom->GetId(), pindex->GetBlockHash()); + CNodeState *nodestate = State(pfrom->GetId()); + + // If this was a new header with more work than our tip, update the + // peer's last block announcement time + if (received_new_header && pindex->nChainWork > chainActive.Tip()->nChainWork) { + nodestate->m_last_block_announcement = GetTime(); + } + std::map::iterator> >::iterator blockInFlightIt = mapBlocksInFlight.find(pindex->GetBlockHash()); bool fAlreadyInFlight = blockInFlightIt != mapBlocksInFlight.end(); @@ -2288,8 +2350,6 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr if (!fAlreadyInFlight && !CanDirectFetch(chainparams.GetConsensus())) return true; - CNodeState *nodestate = State(pfrom->GetId()); - if (IsWitnessEnabled(pindex->pprev, chainparams.GetConsensus()) && !nodestate->fSupportsDesiredCmpctVersion) { // Don't bother trying to process compact blocks from v1 peers // after segwit activates. @@ -2967,6 +3027,83 @@ void PeerLogicValidation::ConsiderEviction(CNode *pto, int64_t time_in_seconds) } } +void PeerLogicValidation::EvictExtraOutboundPeers(int64_t time_in_seconds) +{ + // Check whether we have too many outbound peers + int extra_peers = connman->GetExtraOutboundCount(); + if (extra_peers > 0) { + // If we have more outbound peers than we target, disconnect one. + // Pick the outbound peer that least recently announced + // us a new block, with ties broken by choosing the more recent + // connection (higher node id) + NodeId worst_peer = -1; + int64_t oldest_block_announcement = std::numeric_limits::max(); + + LOCK(cs_main); + + connman->ForEachNode([&](CNode* pnode) { + // Ignore non-outbound peers, or nodes marked for disconnect already + if (!IsOutboundDisconnectionCandidate(pnode) || pnode->fDisconnect) return; + CNodeState *state = State(pnode->GetId()); + if (state == nullptr) return; // shouldn't be possible, but just in case + // Don't evict our protected peers + if (state->m_chain_sync.m_protect) return; + if (state->m_last_block_announcement < oldest_block_announcement || (state->m_last_block_announcement == oldest_block_announcement && pnode->GetId() > worst_peer)) { + worst_peer = pnode->GetId(); + oldest_block_announcement = state->m_last_block_announcement; + } + }); + if (worst_peer != -1) { + bool disconnected = connman->ForNode(worst_peer, [&](CNode *pnode) { + // Only disconnect a peer that has been connected to us for + // some reasonable fraction of our check-frequency, to give + // it time for new information to have arrived. + // Also don't disconnect any peer we're trying to download a + // block from. + CNodeState &state = *State(pnode->GetId()); + if (time_in_seconds - pnode->nTimeConnected > MINIMUM_CONNECT_TIME && state.nBlocksInFlight == 0) { + LogPrint(BCLog::NET, "disconnecting extra outbound peer=%d (last block announcement received at time %d)\n", pnode->GetId(), oldest_block_announcement); + pnode->fDisconnect = true; + return true; + } else { + LogPrint(BCLog::NET, "keeping outbound peer=%d chosen for eviction (connect time: %d, blocks_in_flight: %d)\n", pnode->GetId(), pnode->nTimeConnected, state.nBlocksInFlight); + return false; + } + }); + if (disconnected) { + // If we disconnected an extra peer, that means we successfully + // connected to at least one peer after the last time we + // detected a stale tip. Don't try any more extra peers until + // we next detect a stale tip, to limit the load we put on the + // network from these extra connections. + connman->SetTryNewOutboundPeer(false); + } + } + } +} + +void PeerLogicValidation::CheckForStaleTipAndEvictPeers(const Consensus::Params &consensusParams) +{ + if (connman == nullptr) return; + + int64_t time_in_seconds = GetTime(); + + EvictExtraOutboundPeers(time_in_seconds); + + if (time_in_seconds > m_stale_tip_check_time) { + LOCK(cs_main); + // Check whether our tip is stale, and if so, allow using an extra + // outbound peer + if (TipMayBeStale(consensusParams)) { + LogPrintf("Potential stale tip detected, will try using extra outbound peer (last tip update: %d seconds ago)\n", time_in_seconds - g_last_tip_update); + connman->SetTryNewOutboundPeer(true); + } else if (connman->GetTryNewOutboundPeer()) { + connman->SetTryNewOutboundPeer(false); + } + m_stale_tip_check_time = time_in_seconds + STALE_CHECK_INTERVAL; + } +} + class CompareInvMempoolOrder { CTxMemPool *mp; diff --git a/src/net_processing.h b/src/net_processing.h index 656324bba..0a49972ee 100644 --- a/src/net_processing.h +++ b/src/net_processing.h @@ -8,6 +8,7 @@ #include "net.h" #include "validationinterface.h" +#include "consensus/params.h" /** Default for -maxorphantx, maximum number of orphan transactions kept in memory */ static const unsigned int DEFAULT_MAX_ORPHAN_TRANSACTIONS = 100; @@ -27,13 +28,19 @@ static constexpr int64_t HEADERS_DOWNLOAD_TIMEOUT_PER_HEADER = 1000; // 1ms/head static constexpr int32_t MAX_OUTBOUND_PEERS_TO_PROTECT_FROM_DISCONNECT = 4; /** Timeout for (unprotected) outbound peers to sync to our chainwork, in seconds */ static constexpr int64_t CHAIN_SYNC_TIMEOUT = 20 * 60; // 20 minutes +/** How frequently to check for stale tips, in seconds */ +static constexpr int64_t STALE_CHECK_INTERVAL = 10 * 60; // 10 minutes +/** How frequently to check for extra outbound peers and disconnect, in seconds */ +static constexpr int64_t EXTRA_PEER_CHECK_INTERVAL = 45; +/** Minimum time an outbound-peer-eviction candidate must be connected for, in order to evict, in seconds */ +static constexpr int64_t MINIMUM_CONNECT_TIME = 30; class PeerLogicValidation : public CValidationInterface, public NetEventsInterface { private: - CConnman* connman; + CConnman* const connman; public: - explicit PeerLogicValidation(CConnman* connman); + explicit PeerLogicValidation(CConnman* connman, CScheduler &scheduler); void BlockConnected(const std::shared_ptr& pblock, const CBlockIndex* pindexConnected, const std::vector& vtxConflicted) override; void UpdatedBlockTip(const CBlockIndex *pindexNew, const CBlockIndex *pindexFork, bool fInitialDownload) override; @@ -55,6 +62,11 @@ public: bool SendMessages(CNode* pto, std::atomic& interrupt) override; void ConsiderEviction(CNode *pto, int64_t time_in_seconds); + void CheckForStaleTipAndEvictPeers(const Consensus::Params &consensusParams); + void EvictExtraOutboundPeers(int64_t time_in_seconds); + +private: + int64_t m_stale_tip_check_time; //! Next time to check for stale tip }; struct CNodeStateStats { diff --git a/src/test/DoS_tests.cpp b/src/test/DoS_tests.cpp index 7bcf30483..d1f9e63ec 100644 --- a/src/test/DoS_tests.cpp +++ b/src/test/DoS_tests.cpp @@ -40,6 +40,8 @@ CService ip(uint32_t i) static NodeId id = 0; +void UpdateLastBlockAnnounceTime(NodeId node, int64_t time_in_seconds); + BOOST_FIXTURE_TEST_SUITE(DoS_tests, TestingSetup) // Test eviction of an outbound peer whose chain never advances @@ -87,6 +89,89 @@ BOOST_AUTO_TEST_CASE(outbound_slow_chain_eviction) peerLogic->FinalizeNode(dummyNode1.GetId(), dummy); } +void AddRandomOutboundPeer(std::vector &vNodes, PeerLogicValidation &peerLogic) +{ + CAddress addr(ip(GetRandInt(0xffffffff)), NODE_NONE); + vNodes.emplace_back(new CNode(id++, ServiceFlags(NODE_NETWORK|NODE_WITNESS), 0, INVALID_SOCKET, addr, 0, 0, CAddress(), "", /*fInboundIn=*/ false)); + CNode &node = *vNodes.back(); + node.SetSendVersion(PROTOCOL_VERSION); + + peerLogic.InitializeNode(&node); + node.nVersion = 1; + node.fSuccessfullyConnected = true; + + CConnmanTest::AddNode(node); +} + +BOOST_AUTO_TEST_CASE(stale_tip_peer_management) +{ + const Consensus::Params& consensusParams = Params().GetConsensus(); + constexpr int nMaxOutbound = 8; + CConnman::Options options; + options.nMaxConnections = 125; + options.nMaxOutbound = nMaxOutbound; + options.nMaxFeeler = 1; + + connman->Init(options); + std::vector vNodes; + + // Mock some outbound peers + for (int i=0; iCheckForStaleTipAndEvictPeers(consensusParams); + + // No nodes should be marked for disconnection while we have no extra peers + for (const CNode *node : vNodes) { + BOOST_CHECK(node->fDisconnect == false); + } + + SetMockTime(GetTime() + 3*consensusParams.nPowTargetSpacing + 1); + + // Now tip should definitely be stale, and we should look for an extra + // outbound peer + peerLogic->CheckForStaleTipAndEvictPeers(consensusParams); + BOOST_CHECK(connman->GetTryNewOutboundPeer()); + + // Still no peers should be marked for disconnection + for (const CNode *node : vNodes) { + BOOST_CHECK(node->fDisconnect == false); + } + + // If we add one more peer, something should get marked for eviction + // on the next check (since we're mocking the time to be in the future, the + // required time connected check should be satisfied). + AddRandomOutboundPeer(vNodes, *peerLogic); + + peerLogic->CheckForStaleTipAndEvictPeers(consensusParams); + for (int i=0; ifDisconnect == false); + } + // Last added node should get marked for eviction + BOOST_CHECK(vNodes.back()->fDisconnect == true); + + vNodes.back()->fDisconnect = false; + + // Update the last announced block time for the last + // peer, and check that the next newest node gets evicted. + UpdateLastBlockAnnounceTime(vNodes.back()->GetId(), GetTime()); + + peerLogic->CheckForStaleTipAndEvictPeers(consensusParams); + for (int i=0; ifDisconnect == false); + } + BOOST_CHECK(vNodes[nMaxOutbound-1]->fDisconnect == true); + BOOST_CHECK(vNodes.back()->fDisconnect == false); + + bool dummy; + for (const CNode *node : vNodes) { + peerLogic->FinalizeNode(node->GetId(), dummy); + } + + CConnmanTest::ClearNodes(); +} + BOOST_AUTO_TEST_CASE(DoS_banning) { std::atomic interruptDummy(false); diff --git a/src/test/test_bitcoin.cpp b/src/test/test_bitcoin.cpp index 79bc48a11..85476b6da 100644 --- a/src/test/test_bitcoin.cpp +++ b/src/test/test_bitcoin.cpp @@ -25,6 +25,18 @@ #include +void CConnmanTest::AddNode(CNode& node) +{ + LOCK(g_connman->cs_vNodes); + g_connman->vNodes.push_back(&node); +} + +void CConnmanTest::ClearNodes() +{ + LOCK(g_connman->cs_vNodes); + g_connman->vNodes.clear(); +} + uint256 insecure_rand_seed = GetRandHash(); FastRandomContext insecure_rand_ctx(insecure_rand_seed); @@ -86,7 +98,7 @@ TestingSetup::TestingSetup(const std::string& chainName) : BasicTestingSetup(cha threadGroup.create_thread(&ThreadScriptCheck); g_connman = std::unique_ptr(new CConnman(0x1337, 0x1337)); // Deterministic randomness for tests. connman = g_connman.get(); - peerLogic.reset(new PeerLogicValidation(connman)); + peerLogic.reset(new PeerLogicValidation(connman, scheduler)); } TestingSetup::~TestingSetup() diff --git a/src/test/test_bitcoin.h b/src/test/test_bitcoin.h index 2390aca34..62ded2aaf 100644 --- a/src/test/test_bitcoin.h +++ b/src/test/test_bitcoin.h @@ -49,6 +49,12 @@ struct BasicTestingSetup { * Included are data directory, coins database, script check threads setup. */ class CConnman; +class CNode; +struct CConnmanTest { + static void AddNode(CNode& node); + static void ClearNodes(); +}; + class PeerLogicValidation; struct TestingSetup: public BasicTestingSetup { CCoinsViewDB *pcoinsdbview;