diff --git a/pkg/core/store.go b/pkg/core/store.go index 5baedafdb05..4030084f870 100644 --- a/pkg/core/store.go +++ b/pkg/core/store.go @@ -779,6 +779,7 @@ func (s *StoresInfo) ResetStores() { func (s *StoresInfo) PauseLeaderTransfer(storeID uint64) error { s.Lock() defer s.Unlock() + log.Info("pause store leader transfer", zap.Uint64("store-id", storeID)) store, ok := s.stores[storeID] if !ok { return errs.ErrStoreNotFound.FastGenByArgs(storeID) @@ -795,6 +796,7 @@ func (s *StoresInfo) PauseLeaderTransfer(storeID uint64) error { func (s *StoresInfo) ResumeLeaderTransfer(storeID uint64) { s.Lock() defer s.Unlock() + log.Info("resume store leader transfer", zap.Uint64("store-id", storeID)) store, ok := s.stores[storeID] if !ok { log.Warn("try to clean a store's pause state, but it is not found. It may be cleanup", diff --git a/pkg/schedule/schedulers/grant_leader.go b/pkg/schedule/schedulers/grant_leader.go index 3fb8225be39..9dbd627b325 100644 --- a/pkg/schedule/schedulers/grant_leader.go +++ b/pkg/schedule/schedulers/grant_leader.go @@ -45,7 +45,7 @@ type grantLeaderSchedulerConfig struct { } func (conf *grantLeaderSchedulerConfig) buildWithArgs(args []string) error { - if len(args) != 1 { + if len(args) < 1 { return errs.ErrSchedulerConfig.FastGenByArgs("id") } @@ -271,6 +271,7 @@ func (handler *grantLeaderHandler) updateConfig(w http.ResponseWriter, r *http.R err := handler.config.buildWithArgs(args) if err != nil { + log.Error("fail to build config", errs.ZapError(err)) handler.config.Lock() handler.config.cluster.ResumeLeaderTransfer(id) handler.config.Unlock() @@ -279,6 +280,7 @@ func (handler *grantLeaderHandler) updateConfig(w http.ResponseWriter, r *http.R } err = handler.config.persist() if err != nil { + log.Error("fail to persist config", errs.ZapError(err)) _, _ = handler.config.removeStore(id) handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return diff --git a/pkg/schedule/schedulers/init.go b/pkg/schedule/schedulers/init.go index 48b4f1c4239..51d857ae445 100644 --- a/pkg/schedule/schedulers/init.go +++ b/pkg/schedule/schedulers/init.go @@ -134,7 +134,7 @@ func schedulersRegister() { // evict leader RegisterSliceDecoderBuilder(types.EvictLeaderScheduler, func(args []string) ConfigDecoder { return func(v any) error { - if len(args) != 1 { + if len(args) < 1 { return errs.ErrSchedulerConfig.FastGenByArgs("id") } conf, ok := v.(*evictLeaderSchedulerConfig) @@ -268,7 +268,7 @@ func schedulersRegister() { // grant leader RegisterSliceDecoderBuilder(types.GrantLeaderScheduler, func(args []string) ConfigDecoder { return func(v any) error { - if len(args) != 1 { + if len(args) < 1 { return errs.ErrSchedulerConfig.FastGenByArgs("id") } diff --git a/plugin/scheduler_example/evict_leader.go b/plugin/scheduler_example/evict_leader.go index 2f55b2d8ecb..71ce89de851 100644 --- a/plugin/scheduler_example/evict_leader.go +++ b/plugin/scheduler_example/evict_leader.go @@ -50,7 +50,7 @@ const ( func init() { schedulers.RegisterSliceDecoderBuilder(userEvictLeaderScheduler, func(args []string) schedulers.ConfigDecoder { return func(v any) error { - if len(args) != 1 { + if len(args) < 1 { return errors.New("should specify the store-id") } conf, ok := v.(*evictLeaderSchedulerConfig) @@ -101,7 +101,7 @@ type evictLeaderSchedulerConfig struct { // BuildWithArgs builds the config with the args. func (conf *evictLeaderSchedulerConfig) BuildWithArgs(args []string) error { - if len(args) != 1 { + if len(args) < 1 { return errors.New("should specify the store-id") } diff --git a/tests/integrations/realcluster/real_cluster.go b/tests/integrations/realcluster/real_cluster.go index 441a13f4a73..22a50069728 100644 --- a/tests/integrations/realcluster/real_cluster.go +++ b/tests/integrations/realcluster/real_cluster.go @@ -37,7 +37,7 @@ type realClusterSuite struct { } var ( - playgroundLogDir = filepath.Join("tmp", "real_cluster", "playground") + playgroundLogDir = "/tmp/real_cluster/playground" tiupBin = os.Getenv("HOME") + "/.tiup/bin/tiup" ) diff --git a/tests/integrations/realcluster/scheduler_test.go b/tests/integrations/realcluster/scheduler_test.go index c0aff2669e9..dd544490580 100644 --- a/tests/integrations/realcluster/scheduler_test.go +++ b/tests/integrations/realcluster/scheduler_test.go @@ -21,12 +21,14 @@ import ( "testing" "time" + "github.com/pingcap/log" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "github.com/tikv/pd/client/http" "github.com/tikv/pd/client/testutil" "github.com/tikv/pd/pkg/schedule/labeler" "github.com/tikv/pd/pkg/schedule/types" + "go.uber.org/zap" ) type schedulerSuite struct { @@ -201,3 +203,72 @@ func (s *schedulerSuite) TestRegionLabelDenyScheduler() { return true }, testutil.WithWaitFor(time.Minute)) } + +func (s *schedulerSuite) TestGrantOrEvictLeaderTwice() { + re := require.New(s.T()) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + pdHTTPCli := http.NewClient("pd-real-cluster-test", getPDEndpoints(s.T())) + regions, err := pdHTTPCli.GetRegions(ctx) + re.NoError(err) + re.NotEmpty(regions.Regions) + region1 := regions.Regions[0] + + var i int + evictLeader := func() { + re.NoError(pdHTTPCli.CreateScheduler(ctx, types.EvictLeaderScheduler.String(), uint64(region1.Leader.StoreID))) + // if the second evict leader scheduler cause the pause-leader-filter + // disable, the balance-leader-scheduler need some time to transfer + // leader. See details in https://github.com/tikv/pd/issues/8756. + if i == 1 { + time.Sleep(3 * time.Second) + } + testutil.Eventually(re, func() bool { + regions, err := pdHTTPCli.GetRegions(ctx) + if err != nil { + log.Error("get regions failed", zap.Error(err)) + return false + } + for _, region := range regions.Regions { + if region.Leader.StoreID == region1.Leader.StoreID { + return false + } + } + return true + }, testutil.WithWaitFor(time.Minute)) + + i++ + } + + evictLeader() + evictLeader() + pdHTTPCli.DeleteScheduler(ctx, types.EvictLeaderScheduler.String()) + + i = 0 + grantLeader := func() { + re.NoError(pdHTTPCli.CreateScheduler(ctx, types.GrantLeaderScheduler.String(), uint64(region1.Leader.StoreID))) + if i == 1 { + time.Sleep(3 * time.Second) + } + testutil.Eventually(re, func() bool { + regions, err := pdHTTPCli.GetRegions(ctx) + if err != nil { + log.Error("get regions failed", zap.Error(err)) + return false + } + for _, region := range regions.Regions { + if region.Leader.StoreID != region1.Leader.StoreID { + return false + } + } + return true + }, testutil.WithWaitFor(2*time.Minute)) + + i++ + } + + grantLeader() + grantLeader() + pdHTTPCli.DeleteScheduler(ctx, types.GrantLeaderScheduler.String()) +}