diff --git a/server/schedulers/evict_leader.go b/server/schedulers/evict_leader.go index 0f4c7d5183a..83dbf306772 100644 --- a/server/schedulers/evict_leader.go +++ b/server/schedulers/evict_leader.go @@ -278,15 +278,29 @@ func scheduleEvictLeaderBatch(name string, cluster opt.Cluster, storeRanges map[ func scheduleEvictLeaderOnce(name string, cluster opt.Cluster, storeRanges map[uint64][]core.KeyRange) []*operator.Operator { ops := make([]*operator.Operator, 0, len(storeRanges)) for id, ranges := range storeRanges { + var filters []filter.Filter region := cluster.RandLeaderRegion(id, ranges, opt.HealthRegion(cluster)) if region == nil { - schedulerCounter.WithLabelValues(name, "no-leader").Inc() - continue + // try to pick unhealthy region + region = cluster.RandLeaderRegion(id, ranges) + if region == nil { + schedulerCounter.WithLabelValues(name, "no-leader").Inc() + continue + } + schedulerCounter.WithLabelValues(name, "pick-unhealthy-region").Inc() + unhealthyPeerStores := make(map[uint64]struct{}) + for _, peer := range region.GetDownPeers() { + unhealthyPeerStores[peer.GetPeer().GetStoreId()] = struct{}{} + } + for _, peer := range region.GetPendingPeers() { + unhealthyPeerStores[peer.GetStoreId()] = struct{}{} + } + filters = append(filters, filter.NewExcludedFilter(EvictLeaderName, nil, unhealthyPeerStores)) } + filters = append(filters, &filter.StoreStateFilter{ActionScope: EvictLeaderName, TransferLeader: true}) target := filter.NewCandidates(cluster.GetFollowerStores(region)). - FilterTarget(cluster.GetOpts(), &filter.StoreStateFilter{ActionScope: EvictLeaderName, TransferLeader: true}). - RandomPick() + FilterTarget(cluster.GetOpts(), filters...).RandomPick() if target == nil { schedulerCounter.WithLabelValues(name, "no-target-store").Inc() continue diff --git a/server/schedulers/scheduler_test.go b/server/schedulers/scheduler_test.go index fd77265dd98..5841df4e773 100644 --- a/server/schedulers/scheduler_test.go +++ b/server/schedulers/scheduler_test.go @@ -19,6 +19,7 @@ import ( . "github.com/pingcap/check" "github.com/pingcap/kvproto/pkg/metapb" + "github.com/pingcap/kvproto/pkg/pdpb" "github.com/tikv/pd/pkg/mock/mockcluster" "github.com/tikv/pd/pkg/testutil" "github.com/tikv/pd/server/config" @@ -266,6 +267,40 @@ func (s *testEvictLeaderSuite) TestEvictLeader(c *C) { testutil.CheckTransferLeader(c, op[0], operator.OpLeader, 1, 2) } +func (s *testEvictLeaderSuite) TestEvictLeaderWithUnhealthyPeer(c *C) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + opt := config.NewTestOptions() + tc := mockcluster.NewCluster(ctx, opt) + sl, err := schedule.CreateScheduler(EvictLeaderType, schedule.NewOperatorController(ctx, nil, nil), core.NewStorage(kv.NewMemoryKV()), schedule.ConfigSliceDecoder(EvictLeaderType, []string{"1"})) + c.Assert(err, IsNil) + + // Add stores 1, 2, 3 + tc.AddLeaderStore(1, 0) + tc.AddLeaderStore(2, 0) + tc.AddLeaderStore(3, 0) + // Add region 1, which has 3 peers. 1 is leader. 2 is healthy or pending, 3 is healthy or down. + tc.AddLeaderRegion(1, 1, 2, 3) + region := tc.MockRegionInfo(1, 1, []uint64{2, 3}, nil, nil) + withDownPeer := core.WithDownPeers([]*pdpb.PeerStats{{ + Peer: region.GetPeers()[2], + DownSeconds: 1000, + }}) + withPendingPeer := core.WithPendingPeers([]*metapb.Peer{region.GetPeers()[1]}) + + // only pending + tc.PutRegion(region.Clone(withPendingPeer)) + op := sl.Schedule(tc) + testutil.CheckTransferLeader(c, op[0], operator.OpLeader, 1, 3) + // only down + tc.PutRegion(region.Clone(withDownPeer)) + op = sl.Schedule(tc) + testutil.CheckTransferLeader(c, op[0], operator.OpLeader, 1, 2) + // pending + down + tc.PutRegion(region.Clone(withPendingPeer, withDownPeer)) + c.Assert(sl.Schedule(tc), HasLen, 0) +} + var _ = Suite(&testShuffleRegionSuite{}) type testShuffleRegionSuite struct{}