From 6013f9c2fd370f386a4a0a9f47e421e7ef8121ef Mon Sep 17 00:00:00 2001 From: reubenninan Date: Mon, 29 Jul 2024 20:30:16 +0000 Subject: [PATCH] Fix candidate stepdown logic Candidates should not be stepping down based on their pterm but rather their actual term. Signed-off-by: reubenninan --- server/raft.go | 7 +++--- server/raft_test.go | 52 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/server/raft.go b/server/raft.go index 5bf95ae4f0f..801708a321d 100644 --- a/server/raft.go +++ b/server/raft.go @@ -3187,10 +3187,9 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) { // another node has taken on the leader role already, so we should convert // to a follower of that node instead. if n.State() == Candidate { - // Ignore old terms, otherwise we might end up stepping down incorrectly. - // Needs to be ahead of our pterm (last log index), as an isolated node - // could have bumped its vote term up considerably past this point. - if ae.term >= n.pterm { + // If we have a leader in the current term or higher, we should stepdown, + // write the term and vote if the term of the request is higher. + if ae.term >= n.term { // If the append entry term is newer than the current term, erase our // vote. if ae.term > n.term { diff --git a/server/raft_test.go b/server/raft_test.go index d3077185147..aa230a7c0d4 100644 --- a/server/raft_test.go +++ b/server/raft_test.go @@ -844,3 +844,55 @@ func TestNRGNoResetOnAppendEntryResponse(t *testing.T) { require_NotEqual(t, leader.pterm, 0) require_NotEqual(t, leader.pindex, 0) } + +func TestNRGCandidateDontStepdownDueToLeaderOfPreviousTerm(t *testing.T) { + c := createJetStreamClusterExplicit(t, "R3S", 3) + defer c.shutdown() + c.waitOnLeader() + + nc, _ := jsClientConnect(t, c.leader(), nats.UserInfo("admin", "s3cr3t!")) + defer nc.Close() + + rg := c.createRaftGroup("TEST", 3, newStateAdder) + rg.waitOnLeader() + + // create a candidate that has received entries while they were a follower in a previous term + candidate := rg.nonLeader().node().(*raft) + candidate.Lock() + candidate.switchState(Candidate) + candidate.pterm = 50 + candidate.pindex = 70 + candidate.term = 100 + candidate.Unlock() + + // leader term is behind candidate + leader := rg.leader().node().(*raft) + leader.Lock() + leader.term = candidate.pterm + leader.pterm = candidate.pterm + leader.pindex = candidate.pindex + leader.Unlock() + + // Subscribe to the append entry subject. + sub, err := nc.SubscribeSync(leader.asubj) + require_NoError(t, err) + + // Get the first append entry that we receive, should be heartbeat from leader of prev term + msg, err := sub.NextMsg(5 * time.Second) + require_NoError(t, err) + + // Decode the append entry + ae, err := leader.decodeAppendEntry(msg.Data, nil, msg.Reply) + require_NoError(t, err) + + // check that the append entry is from the leader of the previous term + leader.RLock() + require_Equal(t, ae.term, leader.term) + require_Equal(t, ae.leader, leader.id) + leader.RUnlock() + + // check that we haven't stepped down + require_Equal(t, candidate.State(), Candidate) + // check that our term is still ahead of the leader's term + require_True(t, candidate.term > ae.term) +}