Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scheduler: add conflict detect for grant hot leader scheduler #4903

Merged
merged 6 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions server/api/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
"github.com/pingcap/errors"
"github.com/pingcap/log"
"github.com/tikv/pd/pkg/errs"
"github.com/tikv/pd/pkg/mcs/utils/constant"
"github.com/tikv/pd/pkg/schedule/types"
"github.com/tikv/pd/pkg/slice"
"github.com/tikv/pd/pkg/utils/apiutil"
"github.com/tikv/pd/server"
"github.com/unrolled/render"
Expand Down Expand Up @@ -143,6 +145,15 @@
}
collector(strconv.FormatUint(limit, 10))
case types.GrantHotRegionScheduler:
isExist, err := h.isSchedulerExist(types.BalanceHotRegionScheduler)
if err != nil {
h.r.JSON(w, http.StatusInternalServerError, err.Error())
return

Check warning on line 151 in server/api/scheduler.go

View check run for this annotation

Codecov / codecov/patch

server/api/scheduler.go#L150-L151

Added lines #L150 - L151 were not covered by tests
}
if isExist {
h.r.JSON(w, http.StatusBadRequest, "balance-hot-region-scheduler is running, please remove it first")
return
}
leaderID, ok := input["store-leader-id"].(string)
if !ok {
h.r.JSON(w, http.StatusBadRequest, "missing leader id")
Expand All @@ -155,6 +166,16 @@
}
collector(leaderID)
collector(peerIDs)
case types.BalanceHotRegionScheduler:
isExist, err := h.isSchedulerExist(types.GrantHotRegionScheduler)
if err != nil {
h.r.JSON(w, http.StatusInternalServerError, err.Error())
return

Check warning on line 173 in server/api/scheduler.go

View check run for this annotation

Codecov / codecov/patch

server/api/scheduler.go#L172-L173

Added lines #L172 - L173 were not covered by tests
}
if isExist {
h.r.JSON(w, http.StatusBadRequest, "grant-hot-region-scheduler is running, please remove it first")
return
}
}

if err := h.AddScheduler(tp, args...); err != nil {
Expand Down Expand Up @@ -246,6 +267,23 @@
h.r.JSON(w, http.StatusOK, "Pause or resume the scheduler successfully.")
}

func (h *schedulerHandler) isSchedulerExist(scheduler types.CheckerSchedulerType) (bool, error) {
rc, err := h.GetRaftCluster()
if err != nil {
return false, err

Check warning on line 273 in server/api/scheduler.go

View check run for this annotation

Codecov / codecov/patch

server/api/scheduler.go#L273

Added line #L273 was not covered by tests
}
if rc.IsServiceIndependent(constant.SchedulingServiceName) {
handlers := rc.GetSchedulerHandlers()
_, ok := handlers[scheduler.String()]
return ok, nil
}
schedulers := rc.GetSchedulers()
if slice.Contains(schedulers, scheduler.String()) {
return !rc.GetSchedulerConfig().IsSchedulerDisabled(scheduler), nil
}
return false, nil
}

type schedulerConfigHandler struct {
svr *server.Server
rd *render.Render
Expand Down
7 changes: 4 additions & 3 deletions tools/pd-ctl/pdctl/command/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -355,9 +355,10 @@ func NewSplitBucketSchedulerCommand() *cobra.Command {
// NewGrantHotRegionSchedulerCommand returns a command to add a grant-hot-region-scheduler.
func NewGrantHotRegionSchedulerCommand() *cobra.Command {
c := &cobra.Command{
Use: "grant-hot-region-scheduler <store_leader_id> <store_leader_id,store_peer_id_1,store_peer_id_2>",
Short: "add a scheduler to grant hot region to fixed stores",
Run: addSchedulerForGrantHotRegionCommandFunc,
Use: "grant-hot-region-scheduler <store_leader_id> <store_leader_id,store_peer_id_1,store_peer_id_2>",
Copy link
Member

@rleungx rleungx Sep 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, is it necessary to set store_leader_id again?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I also think it is not necessary. I will modify it in another pr.

Short: `add a scheduler to grant hot region to fixed stores.
Note: If there is balance-hot-region-scheduler running, please remove it first, otherwise grant-hot-region-scheduler will not work.`,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it ok if the hot scheduler is not work but exist, for example being pause status?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this pr, even though the hot-scheduler is paused, adding grant-hot-leader-scheduler is still not allowed.

If this is allowed, let's look at an example where I suspend the hot-scheduler for ten seconds and then add grant-hot-leader-scheduler, which is fine for ten seconds, but after ten seconds the hot-scheduler resumes scheduling and they will conflict again.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, understand. BTW, the hot scheduler config still exist if I enable it again.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the configuration still exists. If you think this pr is ok, I will cancel /hold.

Run: addSchedulerForGrantHotRegionCommandFunc,
}
return c
}
Expand Down
133 changes: 97 additions & 36 deletions tools/pd-ctl/tests/scheduler/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -490,34 +490,6 @@ func (suite *schedulerTestSuite) checkSchedulerConfig(cluster *pdTests.TestClust
"balance-hot-region-scheduler": true,
})

// test grant hot region scheduler config
checkSchedulerCommand(re, cmd, pdAddr, []string{"-u", pdAddr, "scheduler", "add", "grant-hot-region-scheduler", "1", "1,2,3"}, map[string]bool{
"balance-region-scheduler": true,
"balance-leader-scheduler": true,
"balance-hot-region-scheduler": true,
"grant-hot-region-scheduler": true,
})
var conf3 map[string]any
expected3 := map[string]any{
"store-id": []any{float64(1), float64(2), float64(3)},
"store-leader-id": float64(1),
}
mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3)
re.Equal(expected3, conf3)

echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler", "set", "2", "1,2,3"}, nil)
re.Contains(echo, "Success!")
expected3["store-leader-id"] = float64(2)
testutil.Eventually(re, func() bool {
mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3)
return reflect.DeepEqual(expected3, conf3)
})
checkSchedulerCommand(re, cmd, pdAddr, []string{"-u", pdAddr, "scheduler", "remove", "grant-hot-region-scheduler"}, map[string]bool{
"balance-region-scheduler": true,
"balance-leader-scheduler": true,
"balance-hot-region-scheduler": true,
})

// test shuffle hot region scheduler
echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "shuffle-hot-region-scheduler"}, nil)
re.Contains(echo, "Success!")
Expand Down Expand Up @@ -587,6 +559,103 @@ func (suite *schedulerTestSuite) checkSchedulerConfig(cluster *pdTests.TestClust
re.Contains(echo, "Success!")
}

func (suite *schedulerTestSuite) TestGrantLeaderScheduler() {
suite.env.RunTestBasedOnMode(suite.checkGrantLeaderScheduler)
}

func (suite *schedulerTestSuite) checkGrantLeaderScheduler(cluster *pdTests.TestCluster) {
re := suite.Require()
pdAddr := cluster.GetConfig().GetClientURL()
cmd := ctl.GetRootCmd()

stores := []*metapb.Store{
{
Id: 1,
State: metapb.StoreState_Up,
LastHeartbeat: time.Now().UnixNano(),
},
{
Id: 2,
State: metapb.StoreState_Up,
LastHeartbeat: time.Now().UnixNano(),
},
{
Id: 3,
State: metapb.StoreState_Up,
LastHeartbeat: time.Now().UnixNano(),
},
{
Id: 4,
State: metapb.StoreState_Up,
LastHeartbeat: time.Now().UnixNano(),
},
}
for _, store := range stores {
pdTests.MustPutStore(re, cluster, store)
}

// note: because pdqsort is an unstable sort algorithm, set ApproximateSize for this region.
pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetApproximateSize(10))

suite.checkDefaultSchedulers(re, cmd, pdAddr)

// case 1: add grant-hot-region-scheduler when balance-hot-region-scheduler is running
echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "grant-hot-region-scheduler", "1", "1,2,3"}, nil)
re.Contains(echo, "balance-hot-region-scheduler is running, please remove it first")

// case 2: add grant-hot-region-scheduler when balance-hot-region-scheduler is paused
echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "pause", "balance-hot-region-scheduler", "60"}, nil)
re.Contains(echo, "Success!")
suite.checkDefaultSchedulers(re, cmd, pdAddr)
echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "grant-hot-region-scheduler", "1", "1,2,3"}, nil)
re.Contains(echo, "balance-hot-region-scheduler is running, please remove it first")

// case 3: add grant-hot-region-scheduler when balance-hot-region-scheduler is disabled
checkSchedulerCommand(re, cmd, pdAddr, []string{"-u", pdAddr, "scheduler", "remove", "balance-hot-region-scheduler"}, map[string]bool{
"balance-region-scheduler": true,
"balance-leader-scheduler": true,
"evict-slow-store-scheduler": true,
})

checkSchedulerCommand(re, cmd, pdAddr, []string{"-u", pdAddr, "scheduler", "add", "grant-hot-region-scheduler", "1", "1,2,3"}, map[string]bool{
"balance-region-scheduler": true,
"balance-leader-scheduler": true,
"grant-hot-region-scheduler": true,
"evict-slow-store-scheduler": true,
})

// case 4: test grant-hot-region-scheduler config
var conf3 map[string]any
expected3 := map[string]any{
"store-id": []any{float64(1), float64(2), float64(3)},
"store-leader-id": float64(1),
}
mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3)
re.Equal(expected3, conf3)

echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler", "set", "2", "1,2,3"}, nil)
re.Contains(echo, "Success!")
expected3["store-leader-id"] = float64(2)
testutil.Eventually(re, func() bool {
mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3)
return reflect.DeepEqual(expected3, conf3)
})

// case 5: remove grant-hot-region-scheduler
echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-hot-region-scheduler"}, nil)
re.Contains(echo, "grant-hot-region-scheduler is running, please remove it first")

checkSchedulerCommand(re, cmd, pdAddr, []string{"-u", pdAddr, "scheduler", "remove", "grant-hot-region-scheduler"}, map[string]bool{
"balance-region-scheduler": true,
"balance-leader-scheduler": true,
"evict-slow-store-scheduler": true,
})

echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-hot-region-scheduler"}, nil)
re.Contains(echo, "Success!")
suite.checkDefaultSchedulers(re, cmd, pdAddr)
}

func (suite *schedulerTestSuite) TestHotRegionSchedulerConfig() {
suite.env.RunTestBasedOnMode(suite.checkHotRegionSchedulerConfig)
}
Expand Down Expand Up @@ -650,14 +719,6 @@ func (suite *schedulerTestSuite) checkHotRegionSchedulerConfig(cluster *pdTests.
return reflect.DeepEqual(expect, conf1)
})
}
// scheduler show command
expected := map[string]bool{
"balance-region-scheduler": true,
"balance-leader-scheduler": true,
"balance-hot-region-scheduler": true,
"evict-slow-store-scheduler": true,
}
checkSchedulerCommand(re, cmd, pdAddr, nil, expected)
var conf map[string]any
mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "list"}, &conf)
re.Equal(expected1, conf)
Expand Down
Loading