Skip to content

Commit

Permalink
raft: don't step down to newer term while fortifying
Browse files Browse the repository at this point in the history
Informs cockroachdb#132762.

This commit fixes a bug where a peer would step down to a newer term while
fortifying a leader. This would break the leader fortification promise, which
is critical for the leader lease disjointness property.

Instead, we now ignore the message as a follower and let the leader handle the
process of stepping down, safely defortifying, and eventually stepping back up
leader in the new term.

A subsequent commit will combine this logic with the other leader lease check
for votes and pre-votes.

Release note: None
  • Loading branch information
nvanbenschoten committed Nov 12, 2024
1 parent 4680ec6 commit 61226e5
Show file tree
Hide file tree
Showing 23 changed files with 372 additions and 131 deletions.
1 change: 1 addition & 0 deletions pkg/raft/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ go_test(
"//pkg/raft/tracker",
"//pkg/settings/cluster",
"//pkg/testutils",
"//pkg/util/hlc",
"@com_github_cockroachdb_datadriven//:datadriven",
"@com_github_stretchr_testify//assert",
"@com_github_stretchr_testify//require",
Expand Down
61 changes: 41 additions & 20 deletions pkg/raft/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -1029,8 +1029,8 @@ func (r *raft) reset(term uint64) {
//
// [*] this case, and other cases where a state transition implies
// de-fortification.
assertTrue(!r.supportingFortifiedLeader() || r.lead == r.id,
"should not be changing terms when supporting a fortified leader; leader exempted")
assertTrue(!r.supportingFortifiedLeader(),
"should not be changing terms when supporting a fortified leader")
r.setTerm(term)
}

Expand Down Expand Up @@ -1542,33 +1542,49 @@ func (r *raft) Step(m pb.Message) error {

switch {
case m.Type == pb.MsgPreVote:
// Never change our term in response to a PreVote
// Never change our term in response to a PreVote.
case m.Type == pb.MsgPreVoteResp && !m.Reject:
// We send pre-vote requests with a term in our future. If the
// pre-vote is granted, we will increment our term when we get a
// quorum. If it is not, the term comes from the node that
// rejected our vote so we should become a follower at the new
// term.
case IsMsgIndicatingLeader(m.Type):
// We've just received a message that indicates that a new leader
// was elected at a higher term, but the message may not be from the
// leader itself. Either way, the old leader is no longer fortified,
// so it's safe to de-fortify at this point.
r.deFortify(m.From, m.Term)
var lead pb.PeerID
if IsMsgFromLeader(m.Type) {
lead = m.From
}
r.logMsgHigherTerm(m, "new leader indicated, advancing term")
r.becomeFollower(m.Term, lead)
default:
r.logger.Infof("%x [term: %d] received a %s message with higher term from %x [term: %d], advancing term",
r.id, r.Term, m.Type, m.From, m.Term)
if IsMsgIndicatingLeader(m.Type) {
// We've just received a message that indicates that a new leader
// was elected at a higher term, but the message may not be from the
// leader itself. Either way, the old leader is no longer fortified,
// so it's safe to de-fortify at this point.
r.deFortify(m.From, m.Term)
var lead pb.PeerID
if IsMsgFromLeader(m.Type) {
lead = m.From
// We've just received a message that does not indicate that a new
// leader was elected at a higher term. All it means is that some
// other peer has this term.
if r.supportingFortifiedLeader() {
// If we are supporting a fortified leader, we can't just jump to the
// larger term. Instead, we need to wait for the leader to learn about
// the stranded peer, step down, and defortify. The only thing to do
// here is check whether we are that leader, in which case we should
// step down to kick off this process of catching up to the term of the
// stranded peer.
if r.state == pb.StateLeader {
r.logMsgHigherTerm(m, "stepping down as leader to recover stranded peer")
r.becomeFollower(r.Term, r.id)
} else {
r.logMsgHigherTerm(m, "ignoring and still supporting fortified leader")
}
r.becomeFollower(m.Term, lead)
} else {
// We've just received a message that does not indicate that a new
// leader was elected at a higher term. All it means is that some
// other peer has this term.
r.becomeFollower(m.Term, None)
return nil
}

// If we're not supporting a fortified leader, we can just jump to the
// term communicated by the peer.
r.logMsgHigherTerm(m, "advancing term")
r.becomeFollower(m.Term, None)
}

case m.Term < r.Term:
Expand Down Expand Up @@ -1726,6 +1742,11 @@ func (r *raft) Step(m pb.Message) error {
return nil
}

func (r *raft) logMsgHigherTerm(m pb.Message, suffix redact.SafeString) {
r.logger.Infof("%x [term: %d] received a %s message with higher term from %x [term: %d], %s",
r.id, r.Term, m.Type, m.From, m.Term, suffix)
}

type stepFunc func(r *raft, m pb.Message) error

func stepLeader(r *raft, m pb.Message) error {
Expand Down
120 changes: 104 additions & 16 deletions pkg/raft/raft_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/raft/tracker"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -2137,7 +2138,7 @@ func testLeaderElectionWithCheckQuorum(t *testing.T, storeLivenessEnabled bool)

// TestFreeStuckCandidateWithCheckQuorum ensures that a candidate with a higher term
// can disrupt the leader even if the leader still "officially" holds the lease, The
// leader is expected to step down and adopt the candidate's term
// leader is expected to step down and adopt the candidate's term.
func TestFreeStuckCandidateWithCheckQuorum(t *testing.T) {
testutils.RunTrueAndFalse(t, "store-liveness-enabled",
func(t *testing.T, storeLivenessEnabled bool) {
Expand Down Expand Up @@ -2172,10 +2173,12 @@ func testFreeStuckCandidateWithCheckQuorum(t *testing.T, storeLivenessEnabled bo

nt := newNetworkWithConfigAndLivenessFabric(nil, fabric, a, b, c)

for i := int64(0); i < b.electionTimeout; i++ {
b.tick()
}
// Elect node 1 as leader.
nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup})
assert.Equal(t, pb.StateLeader, a.state)
if storeLivenessEnabled {
assert.Equal(t, hlc.MaxTimestamp, getLeadSupportStatus(a).LeadSupportUntil)
}

nt.isolate(1)
if storeLivenessEnabled {
Expand All @@ -2193,7 +2196,7 @@ func testFreeStuckCandidateWithCheckQuorum(t *testing.T, storeLivenessEnabled bo
assert.Equal(t, pb.StateCandidate, c.state)
assert.Equal(t, b.Term+1, c.Term)

// Vote again for safety
// Vote again for safety.
nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})

assert.Equal(t, pb.StateFollower, b.state)
Expand All @@ -2206,19 +2209,52 @@ func testFreeStuckCandidateWithCheckQuorum(t *testing.T, storeLivenessEnabled bo
nt.livenessFabric.GrantSupportForPeerFromAllPeers(1)
}

nt.send(pb.Message{From: 1, To: 3, Type: pb.MsgHeartbeat, Term: a.Term})
// If the stuck candidate were to talk to the follower, it may be ignored,
// depending on whether the follower is fortified by the leader.
nt.send(pb.Message{From: 3, To: 2, Type: pb.MsgAppResp, Term: c.Term})
if storeLivenessEnabled {
assert.Equal(t, c.Term-2, b.Term)
} else {
assert.Equal(t, c.Term, b.Term)
}

// Disrupt the leader so that the stuck peer is freed
// Disrupt the leader so that the stuck peer is freed. The leader steps down
// immediately, but only changes its term if it was not fortified. If it was,
// it waits for defortification.
hbType := pb.MsgHeartbeat
if storeLivenessEnabled {
hbType = pb.MsgFortifyLeader
}
nt.send(pb.Message{From: 1, To: 3, Type: hbType, Term: a.Term})
assert.Equal(t, pb.StateFollower, a.state)
assert.Equal(t, a.Term, c.Term)

// Vote again, should become leader this time
if storeLivenessEnabled {
// We need to withdraw from 1 to allow 3 to campaign and get elected.
nt.livenessFabric.WithdrawSupportForPeerFromAllPeers(1)
// Node 1 still remembers that it was the leader.
assert.Equal(t, c.Term-2, a.Term)
assert.Equal(t, a.id, a.lead)

// The ex-leader still hasn't defortified, so the stranded peer still can't
// win an election.
nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})
assert.Equal(t, pb.StateCandidate, c.state)
assert.Equal(t, pb.StateFollower, a.state)
assert.Equal(t, c.Term-3, a.Term)
assert.Equal(t, a.id, a.lead)

// The ex-leader defortifies itself and its followers once its remaining
// support has expired.
require.False(t, a.shouldBcastDeFortify())
fabric.SetSupportExpired(1, true)
require.True(t, a.shouldBcastDeFortify())
for range a.heartbeatTimeout {
nt.tick(a)
}
} else {
assert.Equal(t, c.Term, a.Term)
assert.Equal(t, None, a.lead)
}
nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})

// Vote again, should become leader this time.
nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})
assert.Equal(t, pb.StateLeader, c.state)
}

Expand Down Expand Up @@ -4436,6 +4472,11 @@ func testPreVoteMigrationWithFreeStuckPreCandidate(t *testing.T, storeLivenessEn
n2 := nt.peers[2].(*raft)
n3 := nt.peers[3].(*raft)

assert.Equal(t, pb.StateLeader, n1.state)
if storeLivenessEnabled {
assert.Equal(t, hlc.MaxTimestamp, getLeadSupportStatus(n1).LeadSupportUntil)
}

if storeLivenessEnabled {
// 1 needs to withdraw support for 3 before it can become a preCandidate.
nt.livenessFabric.WithdrawSupport(1, 3)
Expand All @@ -4447,19 +4488,66 @@ func testPreVoteMigrationWithFreeStuckPreCandidate(t *testing.T, storeLivenessEn
assert.Equal(t, pb.StateFollower, n2.state)
assert.Equal(t, pb.StatePreCandidate, n3.state)

// Pre-Vote again for safety
// Pre-Vote again for safety.
nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})

assert.Equal(t, pb.StateLeader, n1.state)
assert.Equal(t, pb.StateFollower, n2.state)
assert.Equal(t, pb.StatePreCandidate, n3.state)

nt.send(pb.Message{From: 1, To: 3, Type: pb.MsgHeartbeat, Term: n1.Term})
// If the stuck candidate were to talk to the follower, it may be ignored,
// depending on whether the follower is fortified by the leader.
nt.send(pb.Message{From: 3, To: 2, Type: pb.MsgAppResp, Term: n3.Term})
if storeLivenessEnabled {
assert.Equal(t, n3.Term-2, n2.Term)
} else {
assert.Equal(t, n3.Term, n2.Term)
}

// Disrupt the leader so that the stuck peer is freed
// Disrupt the leader so that the stuck peer is freed. The leader steps down
// immediately, but only changes its term if it was not fortified. If it was,
// it waits for defortification.
hbType := pb.MsgHeartbeat
if storeLivenessEnabled {
hbType = pb.MsgFortifyLeader
}
nt.send(pb.Message{From: 1, To: 3, Type: hbType, Term: n1.Term})
assert.Equal(t, pb.StateFollower, n1.state)
if storeLivenessEnabled {
// Node 1 still remembers that it was the leader.
assert.Equal(t, n3.Term-2, n1.Term)
assert.Equal(t, n1.id, n1.lead)

// The ex-leader still hasn't defortified, so the stranded peer still can't
// win an election.
nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})
assert.Equal(t, pb.StatePreCandidate, n3.state)
assert.Equal(t, pb.StateFollower, n1.state)
assert.Equal(t, n3.Term-2, n1.Term)
assert.Equal(t, n1.id, n1.lead)

// The ex-leader defortifies itself and its followers once its remaining
// support has expired.
require.False(t, n1.shouldBcastDeFortify())
fabric.SetSupportExpired(1, true)
require.True(t, n1.shouldBcastDeFortify())
for range n1.heartbeatTimeout {
nt.tick(n1)
}

// The ex-leader calls an election, which it loses and through which it
// learns about the larger term.
fabric.SetSupportExpired(1, false)
nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup})
assert.Equal(t, pb.StateFollower, n1.state)
}

assert.Equal(t, n1.Term, n3.Term)
assert.Equal(t, None, n1.lead)

// The ex-leader calls an election, which it wins.
nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup})
assert.Equal(t, pb.StateLeader, n1.state)
}

func testConfChangeCheckBeforeCampaign(t *testing.T, v2 bool, storeLivenessEnabled bool) {
Expand Down
13 changes: 10 additions & 3 deletions pkg/raft/rawnode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,18 +351,25 @@ func testRawNodeJointAutoLeave(t *testing.T, storeLivenessEnabled bool) {
cc = &ccc
}
if cc != nil {
// Force it step down.
// Force it to step down.
rawNode.Step(pb.Message{Type: pb.MsgHeartbeatResp, From: 1, Term: rawNode.raft.Term + 1})
require.Equal(t, pb.StateFollower, rawNode.raft.state)
if storeLivenessEnabled {
// And also wait for defortification.
for range rawNode.raft.heartbeatTimeout {
rawNode.Tick()
}
}
cs = rawNode.ApplyConfChange(cc)
}
}
rawNode.Advance(rd)
// Once we are the leader, propose a command and a ConfChange.
if !proposed && rd.HardState.Lead == rawNode.raft.id {
if !proposed && rawNode.raft.state == pb.StateLeader {
require.NoError(t, rawNode.Propose([]byte("somedata")))
ccdata, err = testCc.Marshal()
require.NoError(t, err)
rawNode.ProposeConfChange(testCc)
require.NoError(t, rawNode.ProposeConfChange(testCc))
proposed = true
}
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/raft/testdata/async_storage_writes_append_aba_race.txt
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ Messages:
deliver-msgs 1
----
4->1 MsgFortifyLeader Term:3 Log:0/0
INFO 1 [term: 2] received a MsgFortifyLeader message with higher term from 4 [term: 3], advancing term
INFO 1 [term: 2] received a MsgFortifyLeader message with higher term from 4 [term: 3], new leader indicated, advancing term
INFO 1 became follower at term 3
4->1 MsgApp Term:3 Log:1/11 Commit:11 Entries:[3/12 EntryNormal ""]
INFO found conflict at index 12 [existing term: 2, conflicting term: 3]
Expand Down
2 changes: 1 addition & 1 deletion pkg/raft/testdata/confchange_fortification_safety.txt
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ stabilize 1 4
1->3 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryNormal ""]
> 4 receiving messages
1->4 MsgFortifyLeader Term:1 Log:0/0
INFO 4 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1], advancing term
INFO 4 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1], new leader indicated, advancing term
INFO 4 became follower at term 1
1->4 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v4]
DEBUG 4 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1
Expand Down
2 changes: 1 addition & 1 deletion pkg/raft/testdata/confchange_v1_add_single.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ stabilize
1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChange v2]
> 2 receiving messages
1->2 MsgFortifyLeader Term:1 Log:0/0
INFO 2 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1], advancing term
INFO 2 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1], new leader indicated, advancing term
INFO 2 became follower at term 1
1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChange v2]
DEBUG 2 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1
Expand Down
4 changes: 2 additions & 2 deletions pkg/raft/testdata/confchange_v2_add_double_auto.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ stabilize 1 2
----
> 2 receiving messages
1->2 MsgFortifyLeader Term:1 Log:0/0
INFO 2 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1], advancing term
INFO 2 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1], new leader indicated, advancing term
INFO 2 became follower at term 1
1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2 v3]
DEBUG 2 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1
Expand Down Expand Up @@ -178,7 +178,7 @@ stabilize 1 3
----
> 3 receiving messages
1->3 MsgFortifyLeader Term:1 Log:0/0
INFO 3 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1], advancing term
INFO 3 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1], new leader indicated, advancing term
INFO 3 became follower at term 1
1->3 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2 v3]
DEBUG 3 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1
Expand Down
2 changes: 1 addition & 1 deletion pkg/raft/testdata/confchange_v2_add_single_auto.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ stabilize
1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2]
> 2 receiving messages
1->2 MsgFortifyLeader Term:1 Log:0/0
INFO 2 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1], advancing term
INFO 2 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1], new leader indicated, advancing term
INFO 2 became follower at term 1
1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2]
DEBUG 2 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1
Expand Down
2 changes: 1 addition & 1 deletion pkg/raft/testdata/confchange_v2_add_single_explicit.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ stabilize 1 2
1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2]
> 2 receiving messages
1->2 MsgFortifyLeader Term:1 Log:0/0
INFO 2 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1], advancing term
INFO 2 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1], new leader indicated, advancing term
INFO 2 became follower at term 1
1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2]
DEBUG 2 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1
Expand Down
2 changes: 1 addition & 1 deletion pkg/raft/testdata/confchange_v2_add_single_implicit.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ stabilize
1->2 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryConfChangeV2]
> 3 receiving messages
1->3 MsgFortifyLeader Term:1 Log:0/0
INFO 3 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1], advancing term
INFO 3 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1], new leader indicated, advancing term
INFO 3 became follower at term 1
1->3 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v3]
DEBUG 3 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1
Expand Down
Loading

0 comments on commit 61226e5

Please sign in to comment.