diff --git a/server/schedule/checker/rule_checker.go b/server/schedule/checker/rule_checker.go index fd6165b5880..b569ea546fc 100644 --- a/server/schedule/checker/rule_checker.go +++ b/server/schedule/checker/rule_checker.go @@ -232,12 +232,27 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg if len(fit.OrphanPeers) == 0 { return nil, nil } - // remove orphan peers only when all rules are satisfied (count+role) + // remove orphan peers only when all rules are satisfied (count+role) and all peers selected + // by RuleFits is not pending or down. for _, rf := range fit.RuleFits { if !rf.IsSatisfied() { checkerCounter.WithLabelValues("rule_checker", "skip-remove-orphan-peer").Inc() return nil, nil } + for _, p := range rf.Peers { + for _, pendingPeer := range region.GetPendingPeers() { + if pendingPeer.Id == p.Id { + checkerCounter.WithLabelValues("rule_checker", "skip-remove-orphan-peer").Inc() + return nil, nil + } + } + for _, downPeer := range region.GetDownPeers() { + if downPeer.Peer.Id == p.Id { + checkerCounter.WithLabelValues("rule_checker", "skip-remove-orphan-peer").Inc() + return nil, nil + } + } + } } checkerCounter.WithLabelValues("rule_checker", "remove-orphan-peer").Inc() peer := fit.OrphanPeers[0] diff --git a/server/schedule/checker/rule_checker_test.go b/server/schedule/checker/rule_checker_test.go index 1afbc714882..b75270c706a 100644 --- a/server/schedule/checker/rule_checker_test.go +++ b/server/schedule/checker/rule_checker_test.go @@ -571,3 +571,45 @@ func (s *testRuleCheckerSuite) TestFixOfflinePeer(c *C) { s.ruleManager.SetRule(rule) c.Assert(s.rc.Check(region), IsNil) } + +// Ref https://github.com/tikv/pd/issues/4045 +func (s *testRuleCheckerSuite) TestSkipFixOrphanPeerIfSelectedPeerisPendingOrDown(c *C) { + s.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + s.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host1"}) + s.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host2"}) + s.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4"}) + s.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2, 3, 4) + + // set peer3 and peer4 to pending + r1 := s.cluster.GetRegion(1) + r1 = r1.Clone(core.WithPendingPeers([]*metapb.Peer{r1.GetStorePeer(3), r1.GetStorePeer(4)})) + s.cluster.PutRegion(r1) + + // should not remove extra peer + op := s.rc.Check(s.cluster.GetRegion(1)) + c.Assert(op, IsNil) + + // set peer3 to down-peer + r1 = r1.Clone(core.WithPendingPeers([]*metapb.Peer{r1.GetStorePeer(4)})) + r1 = r1.Clone(core.WithDownPeers([]*pdpb.PeerStats{ + { + Peer: r1.GetStorePeer(3), + DownSeconds: 42, + }, + })) + s.cluster.PutRegion(r1) + + // should not remove extra peer + op = s.rc.Check(s.cluster.GetRegion(1)) + c.Assert(op, IsNil) + + // set peer3 to normal + r1 = r1.Clone(core.WithDownPeers(nil)) + s.cluster.PutRegion(r1) + + // should remove extra peer now + var remove operator.RemovePeer + op = s.rc.Check(s.cluster.GetRegion(1)) + c.Assert(op.Step(0), FitsTypeOf, remove) + c.Assert(op.Desc(), Equals, "remove-orphan-peer") +}