diff --git a/src/init.cpp b/src/init.cpp index 655743488..e57ea0f43 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -1270,7 +1270,7 @@ bool AppInitMain(boost::thread_group& threadGroup, CScheduler& scheduler) g_connman = std::unique_ptr(new CConnman(GetRand(std::numeric_limits::max()), GetRand(std::numeric_limits::max()))); CConnman& connman = *g_connman; - peerLogic.reset(new PeerLogicValidation(&connman)); + peerLogic.reset(new PeerLogicValidation(&connman, scheduler)); RegisterValidationInterface(peerLogic.get()); // sanitize comments per BIP-0014, format user agent and check total size diff --git a/src/net.cpp b/src/net.cpp index 077a51bac..5eaeaab8f 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -1701,6 +1701,7 @@ bool CConnman::GetTryNewOutboundPeer() void CConnman::SetTryNewOutboundPeer(bool flag) { m_try_another_outbound_peer = flag; + LogPrint(BCLog::NET, "net: setting try another outbound peer=%s\n", flag ? "true" : "false"); } // Return the number of peers we have over our outbound connection limit diff --git a/src/net_processing.cpp b/src/net_processing.cpp index 59df5d650..3aa13fff4 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -23,6 +23,7 @@ #include "primitives/transaction.h" #include "random.h" #include "reverse_iterator.h" +#include "scheduler.h" #include "tinyformat.h" #include "txmempool.h" #include "ui_interface.h" @@ -127,7 +128,6 @@ namespace { /** Number of outbound peers with m_chain_sync.m_protect. */ int g_outbound_peers_with_protect_from_disconnect = 0; - /** When our tip was last updated. */ int64_t g_last_tip_update = 0; @@ -435,6 +435,15 @@ void MaybeSetPeerAsAnnouncingHeaderAndIDs(NodeId nodeid, CConnman* connman) { } } +bool TipMayBeStale(const Consensus::Params &consensusParams) +{ + AssertLockHeld(cs_main); + if (g_last_tip_update == 0) { + g_last_tip_update = GetTime(); + } + return g_last_tip_update < GetTime() - consensusParams.nPowTargetSpacing * 3 && mapBlocksInFlight.empty(); +} + // Requires cs_main bool CanDirectFetch(const Consensus::Params &consensusParams) { @@ -772,9 +781,17 @@ static bool StaleBlockRequestAllowed(const CBlockIndex* pindex, const Consensus: (GetBlockProofEquivalentTime(*pindexBestHeader, *pindex, *pindexBestHeader, consensusParams) < STALE_RELAY_AGE_LIMIT); } -PeerLogicValidation::PeerLogicValidation(CConnman* connmanIn) : connman(connmanIn) { +PeerLogicValidation::PeerLogicValidation(CConnman* connmanIn, CScheduler &scheduler) : connman(connmanIn), m_stale_tip_check_time(0) { // Initialize global variables that cannot be constructed at startup. recentRejects.reset(new CRollingBloomFilter(120000, 0.000001)); + + const Consensus::Params& consensusParams = Params().GetConsensus(); + // Stale tip checking and peer eviction are on two different timers, but we + // don't want them to get out of sync due to drift in the scheduler, so we + // combine them in one function and schedule at the quicker (peer-eviction) + // timer. + static_assert(EXTRA_PEER_CHECK_INTERVAL < STALE_CHECK_INTERVAL, "peer eviction timer should be less than stale tip check timer"); + scheduler.scheduleEvery(std::bind(&PeerLogicValidation::CheckForStaleTipAndEvictPeers, this, consensusParams), EXTRA_PEER_CHECK_INTERVAL * 1000); } void PeerLogicValidation::BlockConnected(const std::shared_ptr& pblock, const CBlockIndex* pindex, const std::vector& vtxConflicted) { @@ -1424,6 +1441,7 @@ bool static ProcessHeadersMessage(CNode *pfrom, CConnman *connman, const std::ve // If this is an outbound peer, check to see if we should protect // it from the bad/lagging chain logic. if (g_outbound_peers_with_protect_from_disconnect < MAX_OUTBOUND_PEERS_TO_PROTECT_FROM_DISCONNECT && nodestate->pindexBestKnownBlock->nChainWork >= chainActive.Tip()->nChainWork && !nodestate->m_chain_sync.m_protect) { + LogPrint(BCLog::NET, "Protecting outbound peer=%d from eviction\n", pfrom->GetId()); nodestate->m_chain_sync.m_protect = true; ++g_outbound_peers_with_protect_from_disconnect; } @@ -3004,6 +3022,83 @@ void PeerLogicValidation::ConsiderEviction(CNode *pto, int64_t time_in_seconds) } } +void PeerLogicValidation::EvictExtraOutboundPeers(int64_t time_in_seconds) +{ + // Check whether we have too many outbound peers + int extra_peers = connman->GetExtraOutboundCount(); + if (extra_peers > 0) { + // If we have more outbound peers than we target, disconnect one. + // Pick the outbound peer that least recently announced + // us a new block, with ties broken by choosing the more recent + // connection (higher node id) + NodeId worst_peer = -1; + int64_t oldest_block_announcement = std::numeric_limits::max(); + + LOCK(cs_main); + + connman->ForEachNode([&](CNode* pnode) { + // Ignore non-outbound peers, or nodes marked for disconnect already + if (!IsOutboundDisconnectionCandidate(pnode) || pnode->fDisconnect) return; + CNodeState *state = State(pnode->GetId()); + if (state == nullptr) return; // shouldn't be possible, but just in case + // Don't evict our protected peers + if (state->m_chain_sync.m_protect) return; + if (state->m_last_block_announcement < oldest_block_announcement || (state->m_last_block_announcement == oldest_block_announcement && pnode->GetId() > worst_peer)) { + worst_peer = pnode->GetId(); + oldest_block_announcement = state->m_last_block_announcement; + } + }); + if (worst_peer != -1) { + bool disconnected = connman->ForNode(worst_peer, [&](CNode *pnode) { + // Only disconnect a peer that has been connected to us for + // some reasonable fraction of our check-frequency, to give + // it time for new information to have arrived. + // Also don't disconnect any peer we're trying to download a + // block from. + CNodeState &state = *State(pnode->GetId()); + if (time_in_seconds - pnode->nTimeConnected > MINIMUM_CONNECT_TIME && state.nBlocksInFlight == 0) { + LogPrint(BCLog::NET, "disconnecting extra outbound peer=%d (last block announcement received at time %d)\n", pnode->GetId(), oldest_block_announcement); + pnode->fDisconnect = true; + return true; + } else { + LogPrint(BCLog::NET, "keeping outbound peer=%d chosen for eviction (connect time: %d, blocks_in_flight: %d)\n", pnode->GetId(), pnode->nTimeConnected, state.nBlocksInFlight); + return false; + } + }); + if (disconnected) { + // If we disconnected an extra peer, that means we successfully + // connected to at least one peer after the last time we + // detected a stale tip. Don't try any more extra peers until + // we next detect a stale tip, to limit the load we put on the + // network from these extra connections. + connman->SetTryNewOutboundPeer(false); + } + } + } +} + +void PeerLogicValidation::CheckForStaleTipAndEvictPeers(const Consensus::Params &consensusParams) +{ + if (connman == nullptr) return; + + int64_t time_in_seconds = GetTime(); + + EvictExtraOutboundPeers(time_in_seconds); + + if (time_in_seconds > m_stale_tip_check_time) { + LOCK(cs_main); + // Check whether our tip is stale, and if so, allow using an extra + // outbound peer + if (TipMayBeStale(consensusParams)) { + LogPrintf("Potential stale tip detected, will try using extra outbound peer (last tip update: %d seconds ago)\n", time_in_seconds - g_last_tip_update); + connman->SetTryNewOutboundPeer(true); + } else if (connman->GetTryNewOutboundPeer()) { + connman->SetTryNewOutboundPeer(false); + } + m_stale_tip_check_time = time_in_seconds + STALE_CHECK_INTERVAL; + } +} + class CompareInvMempoolOrder { CTxMemPool *mp; diff --git a/src/net_processing.h b/src/net_processing.h index 656324bba..0a49972ee 100644 --- a/src/net_processing.h +++ b/src/net_processing.h @@ -8,6 +8,7 @@ #include "net.h" #include "validationinterface.h" +#include "consensus/params.h" /** Default for -maxorphantx, maximum number of orphan transactions kept in memory */ static const unsigned int DEFAULT_MAX_ORPHAN_TRANSACTIONS = 100; @@ -27,13 +28,19 @@ static constexpr int64_t HEADERS_DOWNLOAD_TIMEOUT_PER_HEADER = 1000; // 1ms/head static constexpr int32_t MAX_OUTBOUND_PEERS_TO_PROTECT_FROM_DISCONNECT = 4; /** Timeout for (unprotected) outbound peers to sync to our chainwork, in seconds */ static constexpr int64_t CHAIN_SYNC_TIMEOUT = 20 * 60; // 20 minutes +/** How frequently to check for stale tips, in seconds */ +static constexpr int64_t STALE_CHECK_INTERVAL = 10 * 60; // 10 minutes +/** How frequently to check for extra outbound peers and disconnect, in seconds */ +static constexpr int64_t EXTRA_PEER_CHECK_INTERVAL = 45; +/** Minimum time an outbound-peer-eviction candidate must be connected for, in order to evict, in seconds */ +static constexpr int64_t MINIMUM_CONNECT_TIME = 30; class PeerLogicValidation : public CValidationInterface, public NetEventsInterface { private: - CConnman* connman; + CConnman* const connman; public: - explicit PeerLogicValidation(CConnman* connman); + explicit PeerLogicValidation(CConnman* connman, CScheduler &scheduler); void BlockConnected(const std::shared_ptr& pblock, const CBlockIndex* pindexConnected, const std::vector& vtxConflicted) override; void UpdatedBlockTip(const CBlockIndex *pindexNew, const CBlockIndex *pindexFork, bool fInitialDownload) override; @@ -55,6 +62,11 @@ public: bool SendMessages(CNode* pto, std::atomic& interrupt) override; void ConsiderEviction(CNode *pto, int64_t time_in_seconds); + void CheckForStaleTipAndEvictPeers(const Consensus::Params &consensusParams); + void EvictExtraOutboundPeers(int64_t time_in_seconds); + +private: + int64_t m_stale_tip_check_time; //! Next time to check for stale tip }; struct CNodeStateStats { diff --git a/src/test/test_bitcoin.cpp b/src/test/test_bitcoin.cpp index 79bc48a11..c768446d3 100644 --- a/src/test/test_bitcoin.cpp +++ b/src/test/test_bitcoin.cpp @@ -86,7 +86,7 @@ TestingSetup::TestingSetup(const std::string& chainName) : BasicTestingSetup(cha threadGroup.create_thread(&ThreadScriptCheck); g_connman = std::unique_ptr(new CConnman(0x1337, 0x1337)); // Deterministic randomness for tests. connman = g_connman.get(); - peerLogic.reset(new PeerLogicValidation(connman)); + peerLogic.reset(new PeerLogicValidation(connman, scheduler)); } TestingSetup::~TestingSetup()