Skip to content

Commit

Permalink
close tikv#5753 fix unsafe recovery auto detect mode
Browse files Browse the repository at this point in the history
Signed-off-by: Connor1996 <zbk602423539@gmail.com>
  • Loading branch information
Connor1996 committed Dec 5, 2022
1 parent 8158054 commit 82fa664
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 0 deletions.
15 changes: 15 additions & 0 deletions server/cluster/unsafe_recovery_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,21 @@ func (u *unsafeRecoveryController) generateForceLeaderPlan(newestRegionTree *reg
storeRecoveryPlan.ForceLeader.FailedStores = append(storeRecoveryPlan.ForceLeader.FailedStores, store)
}
}
if u.autoDetect {
// For auto detect, the failedStores is empty. So need to add the detected failed store to the list
for _, peer := range u.getFailedPeers(leader.Region()) {
found := false
for _, store := range storeRecoveryPlan.ForceLeader.FailedStores {
if store == peer.StoreId {
found = true
break
}
}
if !found {
storeRecoveryPlan.ForceLeader.FailedStores = append(storeRecoveryPlan.ForceLeader.FailedStores, peer.StoreId)
}
}
}
storeRecoveryPlan.ForceLeader.EnterForceLeaders = append(storeRecoveryPlan.ForceLeader.EnterForceLeaders, region.GetId())
u.recordAffectedRegion(leader.Region())
hasPlan = true
Expand Down
41 changes: 41 additions & 0 deletions server/cluster/unsafe_recovery_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,44 @@ func newStoreHeartbeat(storeID uint64, report *pdpb.StoreReport) *pdpb.StoreHear
}
}

func hasQuorum(region *metapb.Region, failedStores []uint64) bool {
hasQuorum := func(voters []*metapb.Peer) bool {
numFailedVoters := 0
numLiveVoters := 0

for _, voter := range voters {
found := false
for _, store := range failedStores {
if store == voter.GetStoreId() {
found = true
break
}
}
if found {
numFailedVoters += 1
} else {
numLiveVoters += 1
}
}
return numFailedVoters < numLiveVoters
}

// consider joint consensus
var incomingVoters []*metapb.Peer
var outgoingVoters []*metapb.Peer

for _, peer := range region.Peers {
if peer.Role == metapb.PeerRole_Voter || peer.Role == metapb.PeerRole_IncomingVoter {
incomingVoters = append(incomingVoters, peer)
}
if peer.Role == metapb.PeerRole_Voter || peer.Role == metapb.PeerRole_DemotingVoter {
outgoingVoters = append(outgoingVoters, peer)
}
}

return hasQuorum(incomingVoters) && hasQuorum(outgoingVoters)
}

func applyRecoveryPlan(re *require.Assertions, storeID uint64, storeReports map[uint64]*pdpb.StoreReport, resp *pdpb.StoreHeartbeatResponse) {
plan := resp.GetRecoveryPlan()
if plan == nil {
Expand All @@ -55,6 +93,9 @@ func applyRecoveryPlan(re *require.Assertions, storeID uint64, storeReports map[
for _, report := range reports.PeerReports {
region := report.GetRegionState().GetRegion()
if region.GetId() == forceLeader {
if hasQuorum(region, forceLeaders.GetFailedStores()) {
re.FailNow("should not enter force leader when quorum is still alive")
}
report.IsForceLeader = true
break
}
Expand Down

0 comments on commit 82fa664

Please sign in to comment.