diff --git a/server/schedule/checker/rule_checker.go b/server/schedule/checker/rule_checker.go index f7caff72760..ff5c487c19c 100644 --- a/server/schedule/checker/rule_checker.go +++ b/server/schedule/checker/rule_checker.go @@ -59,7 +59,7 @@ func (c *RuleChecker) Check(region *core.RegionInfo) *operator.Operator { op, err := c.fixRulePeer(region, fit, rf) if err != nil { log.Debug("fail to fix rule peer", zap.Error(err), zap.String("rule-group", rf.Rule.GroupID), zap.String("rule-id", rf.Rule.ID)) - return nil + break } if op != nil { return op @@ -202,6 +202,13 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg if len(fit.OrphanPeers) == 0 { return nil, nil } + // remove orphan peers only when all rules are satisfied (count+role) + for _, rf := range fit.RuleFits { + if !rf.IsSatisfied() { + checkerCounter.WithLabelValues("rule_checker", "skip-remove-orphan-peer").Inc() + return nil, nil + } + } checkerCounter.WithLabelValues("rule_checker", "remove-orphan-peer").Inc() peer := fit.OrphanPeers[0] return operator.CreateRemovePeerOperator("remove-orphan-peer", c.cluster, 0, region, peer.StoreId) diff --git a/server/schedule/checker/rule_checker_test.go b/server/schedule/checker/rule_checker_test.go index d463dc88ce7..31a320c373a 100644 --- a/server/schedule/checker/rule_checker_test.go +++ b/server/schedule/checker/rule_checker_test.go @@ -110,6 +110,28 @@ func (s *testRuleCheckerSuite) TestFixOrphanPeers(c *C) { c.Assert(op.Step(0).(operator.RemovePeer).FromStore, Equals, uint64(4)) } +func (s *testRuleCheckerSuite) TestFixOrphanPeers2(c *C) { + // check orphan peers can only be handled when all rules are satisfied. + s.cluster.AddLabelsStore(1, 1, map[string]string{"foo": "bar"}) + s.cluster.AddLabelsStore(2, 1, map[string]string{"foo": "bar"}) + s.cluster.AddLabelsStore(3, 1, map[string]string{"foo": "baz"}) + s.cluster.AddLeaderRegionWithRange(1, "", "", 1, 3) + s.ruleManager.SetRule(&placement.Rule{ + GroupID: "pd", + ID: "r1", + Index: 100, + Override: true, + Role: placement.Leader, + Count: 2, + LabelConstraints: []placement.LabelConstraint{ + {Key: "foo", Op: "in", Values: []string{"baz"}}, + }, + }) + s.cluster.SetStoreDown(2) + op := s.rc.Check(s.cluster.GetRegion(1)) + c.Assert(op, IsNil) +} + func (s *testRuleCheckerSuite) TestFixRole(c *C) { s.cluster.AddLeaderStore(1, 1) s.cluster.AddLeaderStore(2, 1) @@ -198,3 +220,26 @@ func (s *testRuleCheckerSuite) TestNoBetterReplacement(c *C) { op := s.rc.Check(s.cluster.GetRegion(1)) c.Assert(op, IsNil) } + +func (s *testRuleCheckerSuite) TestIssue2419(c *C) { + s.cluster.AddLeaderStore(1, 1) + s.cluster.AddLeaderStore(2, 1) + s.cluster.AddLeaderStore(3, 1) + s.cluster.AddLeaderStore(4, 1) + s.cluster.SetStoreOffline(3) + s.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2, 3) + r := s.cluster.GetRegion(1) + r = r.Clone(core.WithAddPeer(&metapb.Peer{Id: 5, StoreId: 4, IsLearner: true})) + op := s.rc.Check(r) + c.Assert(op, NotNil) + c.Assert(op.Desc(), Equals, "remove-orphan-peer") + c.Assert(op.Step(0).(operator.RemovePeer).FromStore, Equals, uint64(4)) + + r = r.Clone(core.WithRemoveStorePeer(4)) + op = s.rc.Check(r) + c.Assert(op, NotNil) + c.Assert(op.Desc(), Equals, "replace-rule-offline-peer") + c.Assert(op.Step(0).(operator.AddLearner).ToStore, Equals, uint64(4)) + c.Assert(op.Step(1).(operator.PromoteLearner).ToStore, Equals, uint64(4)) + c.Assert(op.Step(2).(operator.RemovePeer).FromStore, Equals, uint64(3)) +}