From d07d4695e8a9c2b81b7f5c1c811304503a355688 Mon Sep 17 00:00:00 2001 From: Andy Lok Date: Fri, 3 Sep 2021 01:28:09 +0800 Subject: [PATCH 1/2] address comment Signed-off-by: Andy Lok --- server/schedule/checker/rule_checker.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/server/schedule/checker/rule_checker.go b/server/schedule/checker/rule_checker.go index bb4be874156..97843e74f67 100644 --- a/server/schedule/checker/rule_checker.go +++ b/server/schedule/checker/rule_checker.go @@ -276,12 +276,27 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg if len(fit.OrphanPeers) == 0 { return nil, nil } - // remove orphan peers only when all rules are satisfied (count+role) + // remove orphan peers only when all rules are satisfied (count+role) and all peers selected + // by RuleFits is not pending or down. for _, rf := range fit.RuleFits { if !rf.IsSatisfied() { checkerCounter.WithLabelValues("rule_checker", "skip-remove-orphan-peer").Inc() return nil, nil } + for _, p := range rf.Peers { + for _, pendingPeer := range region.GetPendingPeers() { + if pendingPeer.Id == p.Id { + checkerCounter.WithLabelValues("rule_checker", "skip-remove-orphan-peer").Inc() + return nil, nil + } + } + for _, downPeer := range region.GetDownPeers() { + if downPeer.Peer.Id == p.Id { + checkerCounter.WithLabelValues("rule_checker", "skip-remove-orphan-peer").Inc() + return nil, nil + } + } + } } checkerCounter.WithLabelValues("rule_checker", "remove-orphan-peer").Inc() peer := fit.OrphanPeers[0] From 9b830723acad91069f08a0090ba29d95275687e3 Mon Sep 17 00:00:00 2001 From: Andy Lok Date: Fri, 3 Sep 2021 18:25:19 +0800 Subject: [PATCH 2/2] add test Signed-off-by: Andy Lok --- server/schedule/checker/rule_checker_test.go | 42 ++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/server/schedule/checker/rule_checker_test.go b/server/schedule/checker/rule_checker_test.go index 3c84859a990..11554fa4c13 100644 --- a/server/schedule/checker/rule_checker_test.go +++ b/server/schedule/checker/rule_checker_test.go @@ -631,3 +631,45 @@ func (s *testRuleCheckerSerialSuite) TestRuleCache(c *C) { c.Assert(s.rc.Check(region), IsNil) c.Assert(failpoint.Disable("github.com/tikv/pd/server/schedule/checker/assertCache"), IsNil) } + +// Ref https://github.com/tikv/pd/issues/4045 +func (s *testRuleCheckerSuite) TestSkipFixOrphanPeerIfSelectedPeerisPendingOrDown(c *C) { + s.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + s.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host1"}) + s.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host2"}) + s.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4"}) + s.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2, 3, 4) + + // set peer3 and peer4 to pending + r1 := s.cluster.GetRegion(1) + r1 = r1.Clone(core.WithPendingPeers([]*metapb.Peer{r1.GetStorePeer(3), r1.GetStorePeer(4)})) + s.cluster.PutRegion(r1) + + // should not remove extra peer + op := s.rc.Check(s.cluster.GetRegion(1)) + c.Assert(op, IsNil) + + // set peer3 to down-peer + r1 = r1.Clone(core.WithPendingPeers([]*metapb.Peer{r1.GetStorePeer(4)})) + r1 = r1.Clone(core.WithDownPeers([]*pdpb.PeerStats{ + { + Peer: r1.GetStorePeer(3), + DownSeconds: 42, + }, + })) + s.cluster.PutRegion(r1) + + // should not remove extra peer + op = s.rc.Check(s.cluster.GetRegion(1)) + c.Assert(op, IsNil) + + // set peer3 to normal + r1 = r1.Clone(core.WithDownPeers(nil)) + s.cluster.PutRegion(r1) + + // should remove extra peer now + var remove operator.RemovePeer + op = s.rc.Check(s.cluster.GetRegion(1)) + c.Assert(op.Step(0), FitsTypeOf, remove) + c.Assert(op.Desc(), Equals, "remove-orphan-peer") +}