redpanda-data · mmaslankaprv · Oct 29, 2024 · Oct 28, 2024 · Oct 25, 2024 · Oct 25, 2024
diff --git a/src/v/raft/consensus.cc b/src/v/raft/consensus.cc
@@ -3930,7 +3930,7 @@ reply_result consensus::lightweight_heartbeat(
           target_node,
           _self,
           source_node);
-        return reply_result::failure;
+        return reply_result::group_unavailable;
     }
 
     /**
@@ -3985,7 +3985,7 @@ ss::future<full_heartbeat_reply> consensus::full_heartbeat(
           target_vnode,
           _self,
           source_vnode);
-        reply.result = reply_result::failure;
+        reply.result = reply_result::group_unavailable;
         co_return reply;
     }
     /**

diff --git a/src/v/raft/heartbeat_manager.cc b/src/v/raft/heartbeat_manager.cc
@@ -455,6 +455,17 @@ void heartbeat_manager::process_reply(
         return;
     }
     auto& reply = r.value();
+
+    if (reply.source() != n) {
+        vlog(
+          raftlog.warn,
+          "got heartbeat reply from a different node id {} (expected {}), "
+          "ignoring",
+          reply.source(),
+          n);
+        return;
+    }
+
     reply.for_each_lw_reply([this, n, target = reply.target(), &groups](
                               group_id group, reply_result result) {
         auto it = _consensus_groups.find(group);

diff --git a/tests/rptest/tests/admin_uuid_operations_test.py b/tests/rptest/tests/admin_uuid_operations_test.py
@@ -276,14 +276,12 @@ def test_force_uuid_override(self, mode):
                    backoff_sec=2,
                    err_msg=f"{to_stop.name} did not take the UUID override")
 
-        self.logger.debug(f"Wait for the cluster to become healthy...")
+        self.logger.debug(f"Decommission ghost node [{ghost_node_id}]...")
+        self._decommission(ghost_node_id)
 
+        self.logger.debug(f"...and wait for the cluster to become healthy.")
         self.wait_until_cluster_healthy(timeout_sec=30)
 
-        self.logger.debug(
-            f".. and decommission ghost node [{ghost_node_id}]...")
-        self._decommission(ghost_node_id)
-
         self.logger.debug(
             "Check that all this state sticks across a rolling restart")
 
@@ -373,14 +371,11 @@ def test_force_uuid_override_multinode(self, mode):
                 auto_assign_node_id=True,
             )
 
-        self.logger.debug("Wait for the cluster to become healthy...")
+        self.logger.debug(f"Decommission ghost node [{ghost_node_id}]...")
+        self._decommission(ghost_node_id)
 
+        self.logger.debug("...and wait for the cluster to become healthy.")
         controller_leader = self.wait_until_cluster_healthy(timeout_sec=30)
 
         assert controller_leader is not None, "Didn't elect a controller leader"
         assert controller_leader not in to_stop, f"Unexpected controller leader {controller_leader.account.hostname}"
-
-        self.logger.debug(
-            f"...and decommission ghost node [{ghost_node_id}]...")
-
-        self._decommission(ghost_node_id)