diff --git a/qa/rpc-tests/llmq-chainlocks.py b/qa/rpc-tests/llmq-chainlocks.py index 2086b0d15c38..1ce0e4fbd5a5 100755 --- a/qa/rpc-tests/llmq-chainlocks.py +++ b/qa/rpc-tests/llmq-chainlocks.py @@ -68,17 +68,17 @@ def run_test(self): # Keep node connected and let it try to reorg the chain good_tip = self.nodes[0].getbestblockhash() - self.nodes[0].invalidateblock(self.nodes[0].getbestblockhash()) # Restart it so that it forgets all the chainlocks from the past stop_node(self.nodes[0], 0) self.nodes[0] = start_node(0, self.options.tmpdir, self.extra_args) connect_nodes(self.nodes[0], 1) + self.nodes[0].invalidateblock(self.nodes[0].getbestblockhash()) # Now try to reorg the chain self.nodes[0].generate(2) - sleep(2) + sleep(6) assert(self.nodes[1].getbestblockhash() == good_tip) self.nodes[0].generate(2) - sleep(2) + sleep(6) assert(self.nodes[1].getbestblockhash() == good_tip) # Now let the node which is on the wrong chain reorg back to the locked chain diff --git a/src/llmq/quorums_chainlocks.cpp b/src/llmq/quorums_chainlocks.cpp index 3721bfebc683..2ab6372046bd 100644 --- a/src/llmq/quorums_chainlocks.cpp +++ b/src/llmq/quorums_chainlocks.cpp @@ -40,7 +40,8 @@ void CChainLocksHandler::Start() { quorumSigningManager->RegisterRecoveredSigsListener(this); scheduler->scheduleEvery([&]() { - // regularely retry signing the current chaintip as it might have failed before due to missing ixlocks + EnforceBestChainLock(); + // regularly retry signing the current chaintip as it might have failed before due to missing ixlocks TrySignChainTip(); }, 5000); } @@ -151,52 +152,47 @@ void CChainLocksHandler::ProcessNewChainLock(NodeId from, const llmq::CChainLock bestChainLockBlockIndex = pindex; } - EnforceBestChainLock(); + scheduler->scheduleFromNow([&]() { + EnforceBestChainLock(); + }, 0); LogPrintf("CChainLocksHandler::%s -- processed new CLSIG (%s), peer=%d\n", __func__, clsig.ToString(), from); - - if (lastNotifyChainLockBlockIndex != bestChainLockBlockIndex) { - lastNotifyChainLockBlockIndex = bestChainLockBlockIndex; - GetMainSignals().NotifyChainLock(bestChainLockBlockIndex); - } } void CChainLocksHandler::AcceptedBlockHeader(const CBlockIndex* pindexNew) { - bool doEnforce = false; - { - LOCK2(cs_main, cs); + LOCK2(cs_main, cs); - if (pindexNew->GetBlockHash() == bestChainLock.blockHash) { - LogPrintf("CChainLocksHandler::%s -- block header %s came in late, updating and enforcing\n", __func__, pindexNew->GetBlockHash().ToString()); - - if (bestChainLock.nHeight != pindexNew->nHeight) { - // Should not happen, same as the conflict check from ProcessNewChainLock. - LogPrintf("CChainLocksHandler::%s -- height of CLSIG (%s) does not match the specified block's height (%d)\n", - __func__, bestChainLock.ToString(), pindexNew->nHeight); - return; - } + if (pindexNew->GetBlockHash() == bestChainLock.blockHash) { + LogPrintf("CChainLocksHandler::%s -- block header %s came in late, updating and enforcing\n", __func__, pindexNew->GetBlockHash().ToString()); - bestChainLockBlockIndex = pindexNew; - doEnforce = true; + if (bestChainLock.nHeight != pindexNew->nHeight) { + // Should not happen, same as the conflict check from ProcessNewChainLock. + LogPrintf("CChainLocksHandler::%s -- height of CLSIG (%s) does not match the specified block's height (%d)\n", + __func__, bestChainLock.ToString(), pindexNew->nHeight); + return; } - } - if (doEnforce) { - EnforceBestChainLock(); + + // when EnforceBestChainLock is called later, it might end up invalidating other chains but not activating the + // CLSIG locked chain. This happens when only the header is known but the block is still missing yet. The usual + // block processing logic will handle this when the block arrives + bestChainLockBlockIndex = pindexNew; } } void CChainLocksHandler::UpdatedBlockTip(const CBlockIndex* pindexNew, const CBlockIndex* pindexFork) { // don't call TrySignChainTip directly but instead let the scheduler call it. This way we ensure that cs_main is - // never locked and TrySignChainTip is not called twice in parallel + // never locked and TrySignChainTip is not called twice in parallel. Also avoids recursive calls due to + // EnforceBestChainLock switching chains. LOCK(cs); if (tryLockChainTipScheduled) { return; } tryLockChainTipScheduled = true; scheduler->scheduleFromNow([&]() { + EnforceBestChainLock(); TrySignChainTip(); LOCK(cs); tryLockChainTipScheduled = false; @@ -231,17 +227,6 @@ void CChainLocksHandler::TrySignChainTip() { LOCK(cs); - if (bestChainLockBlockIndex == pindex) { - // we first got the CLSIG, then the header, and then the block was connected. - // In this case there is no need to continue here. - // However, NotifyChainLock might not have been called yet, so call it now if needed - if (lastNotifyChainLockBlockIndex != bestChainLockBlockIndex) { - lastNotifyChainLockBlockIndex = bestChainLockBlockIndex; - GetMainSignals().NotifyChainLock(bestChainLockBlockIndex); - } - return; - } - if (pindex->nHeight == lastSignedHeight) { // already signed this one return; @@ -253,12 +238,8 @@ void CChainLocksHandler::TrySignChainTip() } if (InternalHasConflictingChainLock(pindex->nHeight, pindex->GetBlockHash())) { - if (!inEnforceBestChainLock) { - // we accepted this block when there was no lock yet, but now a conflicting lock appeared. Invalidate it. - LogPrintf("CChainLocksHandler::%s -- conflicting lock after block was accepted, invalidating now\n", - __func__); - ScheduleInvalidateBlock(pindex); - } + // don't sign if another conflicting CLSIG is already present. EnforceBestChainLock will later enforce + // the correct chain. return; } } @@ -403,23 +384,30 @@ bool CChainLocksHandler::IsTxSafeForMining(const uint256& txid) } // WARNING: cs_main and cs should not be held! +// This should also not be called from validation signals, as this might result in recursive calls void CChainLocksHandler::EnforceBestChainLock() { CChainLockSig clsig; const CBlockIndex* pindex; + const CBlockIndex* currentBestChainLockBlockIndex; { LOCK(cs); clsig = bestChainLockWithKnownBlock; - pindex = bestChainLockBlockIndex; + pindex = currentBestChainLockBlockIndex = this->bestChainLockBlockIndex; + + if (!currentBestChainLockBlockIndex) { + // we don't have the header/block, so we can't do anything right now + return; + } } + bool activateNeeded; { LOCK(cs_main); // Go backwards through the chain referenced by clsig until we find a block that is part of the main chain. // For each of these blocks, check if there are children that are NOT part of the chain referenced by clsig // and invalidate each of them. - inEnforceBestChainLock = true; // avoid unnecessary ScheduleInvalidateBlock calls inside UpdatedBlockTip while (pindex && !chainActive.Contains(pindex)) { // Invalidate all blocks that have the same prevBlockHash but are not equal to blockHash auto itp = mapPrevBlockIndex.equal_range(pindex->pprev->GetBlockHash()); @@ -434,14 +422,34 @@ void CChainLocksHandler::EnforceBestChainLock() pindex = pindex->pprev; } - inEnforceBestChainLock = false; + // In case blocks from the correct chain are invalid at the moment, reconsider them. The only case where this + // can happen right now is when missing superblock triggers caused the main chain to be dismissed first. When + // the trigger later appears, this should bring us to the correct chain eventually. Please note that this does + // NOT enforce invalid blocks in any way, it just causes re-validation. + if (!currentBestChainLockBlockIndex->IsValid()) { + ResetBlockFailureFlags(mapBlockIndex.at(currentBestChainLockBlockIndex->GetBlockHash())); + } + + activateNeeded = chainActive.Tip()->GetAncestor(currentBestChainLockBlockIndex->nHeight) != currentBestChainLockBlockIndex; } CValidationState state; - if (!ActivateBestChain(state, Params())) { + if (activateNeeded && !ActivateBestChain(state, Params())) { LogPrintf("CChainLocksHandler::%s -- ActivateBestChain failed: %s\n", __func__, FormatStateMessage(state)); - // This should not have happened and we are in a state were it's not safe to continue anymore - assert(false); + } + + const CBlockIndex* pindexNotify = nullptr; + { + LOCK(cs_main); + if (lastNotifyChainLockBlockIndex != currentBestChainLockBlockIndex && + chainActive.Tip()->GetAncestor(currentBestChainLockBlockIndex->nHeight) == currentBestChainLockBlockIndex) { + lastNotifyChainLockBlockIndex = currentBestChainLockBlockIndex; + pindexNotify = currentBestChainLockBlockIndex; + } + } + + if (pindexNotify) { + GetMainSignals().NotifyChainLock(pindexNotify); } } @@ -471,16 +479,6 @@ void CChainLocksHandler::HandleNewRecoveredSig(const llmq::CRecoveredSig& recove ProcessNewChainLock(-1, clsig, ::SerializeHash(clsig)); } -void CChainLocksHandler::ScheduleInvalidateBlock(const CBlockIndex* pindex) -{ - // Calls to InvalidateBlock and ActivateBestChain might result in re-invocation of the UpdatedBlockTip and other - // signals, so we can't directly call it from signal handlers. We solve this by doing the call from the scheduler - - scheduler->scheduleFromNow([this, pindex]() { - DoInvalidateBlock(pindex, true); - }, 0); -} - // WARNING, do not hold cs while calling this method as we'll otherwise run into a deadlock void CChainLocksHandler::DoInvalidateBlock(const CBlockIndex* pindex, bool activateBestChain) { diff --git a/src/llmq/quorums_chainlocks.h b/src/llmq/quorums_chainlocks.h index 722621fc654f..660a983a7118 100644 --- a/src/llmq/quorums_chainlocks.h +++ b/src/llmq/quorums_chainlocks.h @@ -53,7 +53,6 @@ class CChainLocksHandler : public CRecoveredSigsListener CScheduler* scheduler; CCriticalSection cs; bool tryLockChainTipScheduled{false}; - std::atomic inEnforceBestChainLock{false}; uint256 bestChainLockHash; CChainLockSig bestChainLock; @@ -104,7 +103,6 @@ class CChainLocksHandler : public CRecoveredSigsListener bool InternalHasChainLock(int nHeight, const uint256& blockHash); bool InternalHasConflictingChainLock(int nHeight, const uint256& blockHash); - void ScheduleInvalidateBlock(const CBlockIndex* pindex); void DoInvalidateBlock(const CBlockIndex* pindex, bool activateBestChain); void Cleanup(); diff --git a/src/llmq/quorums_dkgsessionhandler.cpp b/src/llmq/quorums_dkgsessionhandler.cpp index d4770e6f6c52..311a931435b6 100644 --- a/src/llmq/quorums_dkgsessionhandler.cpp +++ b/src/llmq/quorums_dkgsessionhandler.cpp @@ -553,7 +553,7 @@ void CDKGSessionHandler::PhaseHandlerThread() status.aborted = true; return true; }); - LogPrintf("CDKGSessionHandler::%s -- aborted current DKG session\n", __func__); + LogPrintf("CDKGSessionHandler::%s -- aborted current DKG session for llmq=%s\n", __func__, params.name); } } } diff --git a/src/validation.cpp b/src/validation.cpp index ae8a13c7877a..a198aee3bd34 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -2888,6 +2888,11 @@ bool ActivateBestChain(CValidationState &state, const CChainParams& chainparams, // sanely for performance or correctness! AssertLockNotHeld(cs_main); + // make sure that no matter what, only one thread is executing ActivateBestChain. This avoids a race condition when + // validation signals are invoked, which might result in out-of-order execution. + static CCriticalSection cs_activateBestChain; + LOCK(cs_activateBestChain); + CBlockIndex *pindexMostWork = NULL; CBlockIndex *pindexNewTip = NULL; do {