From 9fed4564f4aa979560a9446a40545bdb3ed85227 Mon Sep 17 00:00:00 2001
From: pasta <pasta@dashboost.org>
Date: Wed, 20 Nov 2024 22:08:56 -0600
Subject: [PATCH] Merge #6407: fix: dataraces

5078baea2b1ca3684ea582eda6ac4cea0924ce43 fix(test): wait for chainlock before mining a block we expect to include said chainlock (pasta)
f39c1e6f4c2f7595a0d8b369f9edf6bb0132c6ce fix: guard m_can_tx_relay behind m_tx_relay_mutex; make it private; add additional annotations (pasta)

Pull request description:

  ## Issue being fixed or feature implemented
  See each commit; fixes two bugs, both discovered while running feature_llmq_chainlocks.py with tsan / debug.

  one a datarace in net_processing.cpp and the other in the test I was using to ensure this fix was correct, feature_llmq_chainlocks

  ## What was done?
  ### net_processing.cpp
  You can see the datarace here: https://gist.github.com/PastaPastaPasta/c966a9f805758b34524085e3d52ea7f8

  We simply guard it with an existing mutex that is always locked in close proximity.

  ### feature_llmq_chainlocks.py
  Most of the time, while generating the cycle quorum, there is sufficient time to generate a chainlock; however, this is racey, and I've observed locally where the block gets generated before a chainlock is present and as such `test_coinbase_best_cl` fails. We should instead wait for the chainlock first, and then mine the block. This was we can ensure the mined block will include that chainlock.

  This was observed locally maybe 1/10 times or so

  ## How Has This Been Tested?
  ran feature_llmq_chainlocks.py ~40 times locally with tsan / debug

  ## Breaking Changes
  None

  ## Checklist:
    _Go over all the following points, and put an `x` in all the boxes that apply._
  - [x] I have performed a self-review of my own code
  - [x] I have commented my code, particularly in hard-to-understand areas
  - [ ] I have added or updated relevant unit/integration/functional/e2e tests
  - [ ] I have made corresponding changes to the documentation
  - [x] I have assigned this pull request to a milestone _(for repository code-owners and collaborators only)_

ACKs for top commit:
  kwvg:
    utACK 5078baea2b1ca3684ea582eda6ac4cea0924ce43
  knst:
    utACK 5078baea2b1ca3684ea582eda6ac4cea0924ce43
  UdjinM6:
    utACK 5078baea2b1ca3684ea582eda6ac4cea0924ce43

Tree-SHA512: b346fc60809df72d0161f625073dce7062bd2641d35e4f80160fac9afeec63707de552e2856940ac2604875908ae3b98a225d352de36bfbfc6ee3fbe1e1538ff
---
 src/net_processing.cpp                     | 16 +++++++++-------
 test/functional/feature_llmq_chainlocks.py |  7 +++----
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/net_processing.cpp b/src/net_processing.cpp
index 8676ad337116d..b6f75dfa00155 100644
--- a/src/net_processing.cpp
+++ b/src/net_processing.cpp
@@ -309,21 +309,23 @@ struct Peer {
     /**
      * (Bitcoin) Initializes a TxRelay struct for this peer. Can be called at most once for a peer.
      * (Dash)    Enables the flag that allows GetTxRelay() to return m_tx_relay */
-    TxRelay* SetTxRelay()
+    TxRelay* SetTxRelay() LOCKS_EXCLUDED(m_tx_relay_mutex)
     {
+        LOCK(m_tx_relay_mutex);
         Assume(!m_can_tx_relay);
         m_can_tx_relay = true;
-        return WITH_LOCK(m_tx_relay_mutex, return m_tx_relay.get());
+        return m_tx_relay.get();
     };
 
-    TxRelay* GetInvRelay()
+    TxRelay* GetInvRelay() LOCKS_EXCLUDED(m_tx_relay_mutex)
     {
         return WITH_LOCK(m_tx_relay_mutex, return m_tx_relay.get());
     }
 
-    TxRelay* GetTxRelay()
+    TxRelay* GetTxRelay() LOCKS_EXCLUDED(m_tx_relay_mutex)
     {
-        return m_can_tx_relay ? WITH_LOCK(m_tx_relay_mutex, return m_tx_relay.get()) : nullptr;
+        LOCK(m_tx_relay_mutex);
+        return m_can_tx_relay ? m_tx_relay.get() : nullptr;
     };
 
     /** A vector of addresses to send to the peer, limited to MAX_ADDR_TO_SEND. */
@@ -353,8 +355,6 @@ struct Peer {
      *  This field must correlate with whether m_addr_known has been
      *  initialized.*/
     std::atomic_bool m_addr_relay_enabled{false};
-    /** Whether a peer can relay transactions */
-    bool m_can_tx_relay{false};
     /** Whether a getaddr request to this peer is outstanding. */
     bool m_getaddr_sent GUARDED_BY(NetEventsInterface::g_msgproc_mutex){false};
     /** Guards address sending timers. */
@@ -406,6 +406,8 @@ struct Peer {
      *           (non-transaction relay should use GetInvRelay(), which will provide
      *           unconditional access) */
     std::unique_ptr<TxRelay> m_tx_relay GUARDED_BY(m_tx_relay_mutex){std::make_unique<TxRelay>()};
+    /** Whether a peer can relay transactions */
+    bool m_can_tx_relay GUARDED_BY(m_tx_relay_mutex) {false};
 };
 
 using PeerRef = std::shared_ptr<Peer>;
diff --git a/test/functional/feature_llmq_chainlocks.py b/test/functional/feature_llmq_chainlocks.py
index 1714393939b00..599d350c7508b 100755
--- a/test/functional/feature_llmq_chainlocks.py
+++ b/test/functional/feature_llmq_chainlocks.py
@@ -56,11 +56,10 @@ def run_test(self):
         self.move_to_next_cycle()
         self.log.info("Cycle H+2C height:" + str(self.nodes[0].getblockcount()))
         self.mine_cycle_quorum(llmq_type_name="llmq_test_dip0024", llmq_type=103)
-
-
-        self.log.info("Mine single block, wait for chainlock")
-        self.generate(self.nodes[0], 1, sync_fun=self.no_op)
         self.wait_for_chainlocked_block_all_nodes(self.nodes[0].getbestblockhash())
+
+        self.log.info("Mine single block, ensure it includes latest chainlock")
+        self.generate(self.nodes[0], 1, sync_fun=self.sync_blocks)
         self.test_coinbase_best_cl(self.nodes[0])
 
         # ChainLock locks all the blocks below it so nocl_block_hash should be locked too