From 9fed4564f4aa979560a9446a40545bdb3ed85227 Mon Sep 17 00:00:00 2001 From: pasta Date: Wed, 20 Nov 2024 22:08:56 -0600 Subject: [PATCH] Merge #6407: fix: dataraces 5078baea2b1ca3684ea582eda6ac4cea0924ce43 fix(test): wait for chainlock before mining a block we expect to include said chainlock (pasta) f39c1e6f4c2f7595a0d8b369f9edf6bb0132c6ce fix: guard m_can_tx_relay behind m_tx_relay_mutex; make it private; add additional annotations (pasta) Pull request description: ## Issue being fixed or feature implemented See each commit; fixes two bugs, both discovered while running feature_llmq_chainlocks.py with tsan / debug. one a datarace in net_processing.cpp and the other in the test I was using to ensure this fix was correct, feature_llmq_chainlocks ## What was done? ### net_processing.cpp You can see the datarace here: https://gist.github.com/PastaPastaPasta/c966a9f805758b34524085e3d52ea7f8 We simply guard it with an existing mutex that is always locked in close proximity. ### feature_llmq_chainlocks.py Most of the time, while generating the cycle quorum, there is sufficient time to generate a chainlock; however, this is racey, and I've observed locally where the block gets generated before a chainlock is present and as such `test_coinbase_best_cl` fails. We should instead wait for the chainlock first, and then mine the block. This was we can ensure the mined block will include that chainlock. This was observed locally maybe 1/10 times or so ## How Has This Been Tested? ran feature_llmq_chainlocks.py ~40 times locally with tsan / debug ## Breaking Changes None ## Checklist: _Go over all the following points, and put an `x` in all the boxes that apply._ - [x] I have performed a self-review of my own code - [x] I have commented my code, particularly in hard-to-understand areas - [ ] I have added or updated relevant unit/integration/functional/e2e tests - [ ] I have made corresponding changes to the documentation - [x] I have assigned this pull request to a milestone _(for repository code-owners and collaborators only)_ ACKs for top commit: kwvg: utACK 5078baea2b1ca3684ea582eda6ac4cea0924ce43 knst: utACK 5078baea2b1ca3684ea582eda6ac4cea0924ce43 UdjinM6: utACK 5078baea2b1ca3684ea582eda6ac4cea0924ce43 Tree-SHA512: b346fc60809df72d0161f625073dce7062bd2641d35e4f80160fac9afeec63707de552e2856940ac2604875908ae3b98a225d352de36bfbfc6ee3fbe1e1538ff --- src/net_processing.cpp | 16 +++++++++------- test/functional/feature_llmq_chainlocks.py | 7 +++---- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/net_processing.cpp b/src/net_processing.cpp index 8676ad337116d..b6f75dfa00155 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -309,21 +309,23 @@ struct Peer { /** * (Bitcoin) Initializes a TxRelay struct for this peer. Can be called at most once for a peer. * (Dash) Enables the flag that allows GetTxRelay() to return m_tx_relay */ - TxRelay* SetTxRelay() + TxRelay* SetTxRelay() LOCKS_EXCLUDED(m_tx_relay_mutex) { + LOCK(m_tx_relay_mutex); Assume(!m_can_tx_relay); m_can_tx_relay = true; - return WITH_LOCK(m_tx_relay_mutex, return m_tx_relay.get()); + return m_tx_relay.get(); }; - TxRelay* GetInvRelay() + TxRelay* GetInvRelay() LOCKS_EXCLUDED(m_tx_relay_mutex) { return WITH_LOCK(m_tx_relay_mutex, return m_tx_relay.get()); } - TxRelay* GetTxRelay() + TxRelay* GetTxRelay() LOCKS_EXCLUDED(m_tx_relay_mutex) { - return m_can_tx_relay ? WITH_LOCK(m_tx_relay_mutex, return m_tx_relay.get()) : nullptr; + LOCK(m_tx_relay_mutex); + return m_can_tx_relay ? m_tx_relay.get() : nullptr; }; /** A vector of addresses to send to the peer, limited to MAX_ADDR_TO_SEND. */ @@ -353,8 +355,6 @@ struct Peer { * This field must correlate with whether m_addr_known has been * initialized.*/ std::atomic_bool m_addr_relay_enabled{false}; - /** Whether a peer can relay transactions */ - bool m_can_tx_relay{false}; /** Whether a getaddr request to this peer is outstanding. */ bool m_getaddr_sent GUARDED_BY(NetEventsInterface::g_msgproc_mutex){false}; /** Guards address sending timers. */ @@ -406,6 +406,8 @@ struct Peer { * (non-transaction relay should use GetInvRelay(), which will provide * unconditional access) */ std::unique_ptr m_tx_relay GUARDED_BY(m_tx_relay_mutex){std::make_unique()}; + /** Whether a peer can relay transactions */ + bool m_can_tx_relay GUARDED_BY(m_tx_relay_mutex) {false}; }; using PeerRef = std::shared_ptr; diff --git a/test/functional/feature_llmq_chainlocks.py b/test/functional/feature_llmq_chainlocks.py index 1714393939b00..599d350c7508b 100755 --- a/test/functional/feature_llmq_chainlocks.py +++ b/test/functional/feature_llmq_chainlocks.py @@ -56,11 +56,10 @@ def run_test(self): self.move_to_next_cycle() self.log.info("Cycle H+2C height:" + str(self.nodes[0].getblockcount())) self.mine_cycle_quorum(llmq_type_name="llmq_test_dip0024", llmq_type=103) - - - self.log.info("Mine single block, wait for chainlock") - self.generate(self.nodes[0], 1, sync_fun=self.no_op) self.wait_for_chainlocked_block_all_nodes(self.nodes[0].getbestblockhash()) + + self.log.info("Mine single block, ensure it includes latest chainlock") + self.generate(self.nodes[0], 1, sync_fun=self.sync_blocks) self.test_coinbase_best_cl(self.nodes[0]) # ChainLock locks all the blocks below it so nocl_block_hash should be locked too