From 8359cbbfe807ed493c766f98680241319826e6cd Mon Sep 17 00:00:00 2001 From: ShuNing Date: Thu, 25 Mar 2021 17:33:24 +0800 Subject: [PATCH 1/2] cherry pick #3522 to release-4.0 Signed-off-by: ti-srebot --- server/schedule/checker/rule_checker.go | 12 +- server/schedule/checker/rule_checker_test.go | 164 +++++++++++++++++++ 2 files changed, 170 insertions(+), 6 deletions(-) diff --git a/server/schedule/checker/rule_checker.go b/server/schedule/checker/rule_checker.go index 4ed4820bdf9..755d1acd009 100644 --- a/server/schedule/checker/rule_checker.go +++ b/server/schedule/checker/rule_checker.go @@ -61,6 +61,11 @@ func (c *RuleChecker) Check(region *core.RegionInfo) *operator.Operator { // multiple rules. return c.fixRange(region) } + op, err := c.fixOrphanPeers(region, fit) + if err == nil && op != nil { + return op + } + log.Debug("fail to fix orphan peer", errs.ZapError(err)) for _, rf := range fit.RuleFits { op, err := c.fixRulePeer(region, fit, rf) if err != nil { @@ -71,12 +76,7 @@ func (c *RuleChecker) Check(region *core.RegionInfo) *operator.Operator { return op } } - op, err := c.fixOrphanPeers(region, fit) - if err != nil { - log.Debug("fail to fix orphan peer", errs.ZapError(err)) - return nil - } - return op + return nil } func (c *RuleChecker) fixRange(region *core.RegionInfo) *operator.Operator { diff --git a/server/schedule/checker/rule_checker_test.go b/server/schedule/checker/rule_checker_test.go index 466a8a7c2ff..ea2825420f8 100644 --- a/server/schedule/checker/rule_checker_test.go +++ b/server/schedule/checker/rule_checker_test.go @@ -299,3 +299,167 @@ func (s *testRuleCheckerSuite) TestIssue2419(c *C) { c.Assert(op.Step(1).(operator.PromoteLearner).ToStore, Equals, uint64(4)) c.Assert(op.Step(2).(operator.RemovePeer).FromStore, Equals, uint64(3)) } +<<<<<<< HEAD +======= + +// Ref https://github.com/tikv/pd/issues/3521 +// The problem is when offline a store, we may add learner multiple times if +// the operator is timeout. +func (s *testRuleCheckerSuite) TestIssue3521_PriorityFixOrphanPeer(c *C) { + s.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + s.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host1"}) + s.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host2"}) + s.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4"}) + s.cluster.AddLabelsStore(5, 1, map[string]string{"host": "host5"}) + s.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2, 3) + op := s.rc.Check(s.cluster.GetRegion(1)) + c.Assert(op, IsNil) + var add operator.AddLearner + var remove operator.RemovePeer + s.cluster.SetStoreOffline(2) + op = s.rc.Check(s.cluster.GetRegion(1)) + c.Assert(op, NotNil) + c.Assert(op.Step(0), FitsTypeOf, add) + c.Assert(op.Desc(), Equals, "replace-rule-offline-peer") + r := s.cluster.GetRegion(1).Clone(core.WithAddPeer( + &metapb.Peer{ + Id: 5, + StoreId: 4, + Role: metapb.PeerRole_Learner, + })) + s.cluster.PutRegion(r) + op = s.rc.Check(s.cluster.GetRegion(1)) + c.Assert(op.Step(0), FitsTypeOf, remove) + c.Assert(op.Desc(), Equals, "remove-orphan-peer") +} + +func (s *testRuleCheckerSuite) TestIssue3293(c *C) { + s.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + s.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host1"}) + s.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host2"}) + s.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4"}) + s.cluster.AddLabelsStore(5, 1, map[string]string{"host": "host5"}) + s.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2) + err := s.ruleManager.SetRule(&placement.Rule{ + GroupID: "TiDB_DDL_51", + ID: "0", + Role: placement.Follower, + Count: 1, + LabelConstraints: []placement.LabelConstraint{ + { + Key: "host", + Values: []string{ + "host5", + }, + Op: placement.In, + }, + }, + }) + c.Assert(err, IsNil) + s.cluster.DeleteStore(s.cluster.TakeStore(5)) + err = s.ruleManager.SetRule(&placement.Rule{ + GroupID: "TiDB_DDL_51", + ID: "default", + Role: placement.Voter, + Count: 3, + }) + c.Assert(err, IsNil) + err = s.ruleManager.DeleteRule("pd", "default") + c.Assert(err, IsNil) + op := s.rc.Check(s.cluster.GetRegion(1)) + c.Assert(op, NotNil) + c.Assert(op.Desc(), Equals, "add-rule-peer") +} + +func (s *testRuleCheckerSuite) TestIssue3299(c *C) { + s.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + s.cluster.AddLabelsStore(2, 1, map[string]string{"dc": "sh"}) + s.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2) + + testCases := []struct { + constraints []placement.LabelConstraint + err string + }{ + { + constraints: []placement.LabelConstraint{ + { + Key: "host", + Values: []string{"host5"}, + Op: placement.In, + }, + }, + err: ".*can not match any store", + }, + { + constraints: []placement.LabelConstraint{ + { + Key: "ho", + Values: []string{"sh"}, + Op: placement.In, + }, + }, + err: ".*can not match any store", + }, + { + constraints: []placement.LabelConstraint{ + { + Key: "host", + Values: []string{"host1"}, + Op: placement.In, + }, + { + Key: "host", + Values: []string{"host1"}, + Op: placement.NotIn, + }, + }, + err: ".*can not match any store", + }, + { + constraints: []placement.LabelConstraint{ + { + Key: "host", + Values: []string{"host1"}, + Op: placement.In, + }, + { + Key: "host", + Values: []string{"host3"}, + Op: placement.In, + }, + }, + err: ".*can not match any store", + }, + { + constraints: []placement.LabelConstraint{ + { + Key: "host", + Values: []string{"host1"}, + Op: placement.In, + }, + { + Key: "host", + Values: []string{"host1"}, + Op: placement.In, + }, + }, + err: "", + }, + } + + for _, t := range testCases { + err := s.ruleManager.SetRule(&placement.Rule{ + GroupID: "p", + ID: "0", + Role: placement.Follower, + Count: 1, + LabelConstraints: t.constraints, + }) + if t.err != "" { + c.Assert(err, ErrorMatches, t.err) + } else { + c.Assert(err, IsNil) + } + } +} +>>>>>>> 60508bde... checker: priority to fix orphan peers (#3522) From fd2587822e20a04aeb61a2da3f158ec4b2e372ca Mon Sep 17 00:00:00 2001 From: nolouch Date: Fri, 23 Apr 2021 12:56:58 +0800 Subject: [PATCH 2/2] fix Signed-off-by: nolouch --- server/schedule/checker/rule_checker_test.go | 139 +------------------ 1 file changed, 3 insertions(+), 136 deletions(-) diff --git a/server/schedule/checker/rule_checker_test.go b/server/schedule/checker/rule_checker_test.go index ea2825420f8..41d99006982 100644 --- a/server/schedule/checker/rule_checker_test.go +++ b/server/schedule/checker/rule_checker_test.go @@ -299,8 +299,6 @@ func (s *testRuleCheckerSuite) TestIssue2419(c *C) { c.Assert(op.Step(1).(operator.PromoteLearner).ToStore, Equals, uint64(4)) c.Assert(op.Step(2).(operator.RemovePeer).FromStore, Equals, uint64(3)) } -<<<<<<< HEAD -======= // Ref https://github.com/tikv/pd/issues/3521 // The problem is when offline a store, we may add learner multiple times if @@ -323,143 +321,12 @@ func (s *testRuleCheckerSuite) TestIssue3521_PriorityFixOrphanPeer(c *C) { c.Assert(op.Desc(), Equals, "replace-rule-offline-peer") r := s.cluster.GetRegion(1).Clone(core.WithAddPeer( &metapb.Peer{ - Id: 5, - StoreId: 4, - Role: metapb.PeerRole_Learner, + Id: 5, + StoreId: 4, + IsLearner: true, })) s.cluster.PutRegion(r) op = s.rc.Check(s.cluster.GetRegion(1)) c.Assert(op.Step(0), FitsTypeOf, remove) c.Assert(op.Desc(), Equals, "remove-orphan-peer") } - -func (s *testRuleCheckerSuite) TestIssue3293(c *C) { - s.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) - s.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host1"}) - s.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host2"}) - s.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4"}) - s.cluster.AddLabelsStore(5, 1, map[string]string{"host": "host5"}) - s.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2) - err := s.ruleManager.SetRule(&placement.Rule{ - GroupID: "TiDB_DDL_51", - ID: "0", - Role: placement.Follower, - Count: 1, - LabelConstraints: []placement.LabelConstraint{ - { - Key: "host", - Values: []string{ - "host5", - }, - Op: placement.In, - }, - }, - }) - c.Assert(err, IsNil) - s.cluster.DeleteStore(s.cluster.TakeStore(5)) - err = s.ruleManager.SetRule(&placement.Rule{ - GroupID: "TiDB_DDL_51", - ID: "default", - Role: placement.Voter, - Count: 3, - }) - c.Assert(err, IsNil) - err = s.ruleManager.DeleteRule("pd", "default") - c.Assert(err, IsNil) - op := s.rc.Check(s.cluster.GetRegion(1)) - c.Assert(op, NotNil) - c.Assert(op.Desc(), Equals, "add-rule-peer") -} - -func (s *testRuleCheckerSuite) TestIssue3299(c *C) { - s.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) - s.cluster.AddLabelsStore(2, 1, map[string]string{"dc": "sh"}) - s.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2) - - testCases := []struct { - constraints []placement.LabelConstraint - err string - }{ - { - constraints: []placement.LabelConstraint{ - { - Key: "host", - Values: []string{"host5"}, - Op: placement.In, - }, - }, - err: ".*can not match any store", - }, - { - constraints: []placement.LabelConstraint{ - { - Key: "ho", - Values: []string{"sh"}, - Op: placement.In, - }, - }, - err: ".*can not match any store", - }, - { - constraints: []placement.LabelConstraint{ - { - Key: "host", - Values: []string{"host1"}, - Op: placement.In, - }, - { - Key: "host", - Values: []string{"host1"}, - Op: placement.NotIn, - }, - }, - err: ".*can not match any store", - }, - { - constraints: []placement.LabelConstraint{ - { - Key: "host", - Values: []string{"host1"}, - Op: placement.In, - }, - { - Key: "host", - Values: []string{"host3"}, - Op: placement.In, - }, - }, - err: ".*can not match any store", - }, - { - constraints: []placement.LabelConstraint{ - { - Key: "host", - Values: []string{"host1"}, - Op: placement.In, - }, - { - Key: "host", - Values: []string{"host1"}, - Op: placement.In, - }, - }, - err: "", - }, - } - - for _, t := range testCases { - err := s.ruleManager.SetRule(&placement.Rule{ - GroupID: "p", - ID: "0", - Role: placement.Follower, - Count: 1, - LabelConstraints: t.constraints, - }) - if t.err != "" { - c.Assert(err, ErrorMatches, t.err) - } else { - c.Assert(err, IsNil) - } - } -} ->>>>>>> 60508bde... checker: priority to fix orphan peers (#3522)