From 181fdc95be65fd8c83155c76f0a69ddb2cf143bf Mon Sep 17 00:00:00 2001 From: Yongbo Jiang Date: Wed, 15 Nov 2023 14:45:46 +0800 Subject: [PATCH 01/10] makefile: support build with `boringcrypto` to support Fips (#7275) close tikv/pd#7274 Signed-off-by: Cabinfever_B Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Makefile | 28 +++++++++++++++++++++++----- pkg/versioninfo/fips.go | 26 ++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 5 deletions(-) create mode 100644 pkg/versioninfo/fips.go diff --git a/Makefile b/Makefile index 54ad331aea4..906dd9414f9 100644 --- a/Makefile +++ b/Makefile @@ -15,6 +15,8 @@ dev-basic: build check basic-test BUILD_FLAGS ?= BUILD_TAGS ?= BUILD_CGO_ENABLED := 0 +BUILD_TOOL_CGO_ENABLED := 0 +BUILD_GOEXPERIMENT ?= PD_EDITION ?= Community # Ensure PD_EDITION is set to Community or Enterprise before running build process. ifneq "$(PD_EDITION)" "Community" @@ -46,6 +48,13 @@ ifeq ($(PLUGIN), 1) BUILD_TAGS += with_plugin endif +ifeq ($(ENABLE_FIPS), 1) + BUILD_TAGS+=boringcrypto + BUILD_GOEXPERIMENT=boringcrypto + BUILD_CGO_ENABLED := 1 + BUILD_TOOL_CGO_ENABLED := 1 +endif + LDFLAGS += -X "$(PD_PKG)/pkg/versioninfo.PDReleaseVersion=$(shell git describe --tags --dirty --always)" LDFLAGS += -X "$(PD_PKG)/pkg/versioninfo.PDBuildTS=$(shell date -u '+%Y-%m-%d %I:%M:%S')" LDFLAGS += -X "$(PD_PKG)/pkg/versioninfo.PDGitHash=$(shell git rev-parse HEAD)" @@ -66,6 +75,8 @@ BUILD_BIN_PATH := $(ROOT_PATH)/bin build: pd-server pd-ctl pd-recover +build-fips: pd-server-fips pd-ctl-fips pd-recover-fips + tools: pd-tso-bench pd-heartbeat-bench regions-dump stores-dump pd-api-bench PD_SERVER_DEP := @@ -79,7 +90,7 @@ endif PD_SERVER_DEP += dashboard-ui pd-server: ${PD_SERVER_DEP} - CGO_ENABLED=$(BUILD_CGO_ENABLED) go build $(BUILD_FLAGS) -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -tags "$(BUILD_TAGS)" -o $(BUILD_BIN_PATH)/pd-server cmd/pd-server/main.go + GOEXPERIMENT=$(BUILD_GOEXPERIMENT) CGO_ENABLED=$(BUILD_CGO_ENABLED) go build $(BUILD_FLAGS) -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -tags "$(BUILD_TAGS)" -o $(BUILD_BIN_PATH)/pd-server cmd/pd-server/main.go pd-server-failpoint: @$(FAILPOINT_ENABLE) @@ -89,18 +100,25 @@ pd-server-failpoint: pd-server-basic: SWAGGER=0 DASHBOARD=0 $(MAKE) pd-server -.PHONY: build tools pd-server pd-server-basic +pd-server-fips: + ENABLE_FIPS=1 $(MAKE) pd-server + +.PHONY: build tools pd-server pd-server-basic pd-server-fips # Tools pd-ctl: - CGO_ENABLED=0 go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/pd-ctl tools/pd-ctl/main.go + GOEXPERIMENT=$(BUILD_GOEXPERIMENT) CGO_ENABLED=$(BUILD_TOOL_CGO_ENABLED) go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/pd-ctl tools/pd-ctl/main.go +pd-ctl-fips: + ENABLE_FIPS=1 $(MAKE) pd-ctl pd-tso-bench: cd tools/pd-tso-bench && CGO_ENABLED=0 go build -o $(BUILD_BIN_PATH)/pd-tso-bench main.go pd-api-bench: cd tools/pd-api-bench && CGO_ENABLED=0 go build -o $(BUILD_BIN_PATH)/pd-api-bench main.go pd-recover: - CGO_ENABLED=0 go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/pd-recover tools/pd-recover/main.go + GOEXPERIMENT=$(BUILD_GOEXPERIMENT) CGO_ENABLED=$(BUILD_TOOL_CGO_ENABLED) go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/pd-recover tools/pd-recover/main.go +pd-recover-fips: + ENABLE_FIPS=1 $(MAKE) pd-recover pd-analysis: CGO_ENABLED=0 go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/pd-analysis tools/pd-analysis/main.go pd-heartbeat-bench: @@ -112,7 +130,7 @@ regions-dump: stores-dump: CGO_ENABLED=0 go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/stores-dump tools/stores-dump/main.go -.PHONY: pd-ctl pd-tso-bench pd-recover pd-analysis pd-heartbeat-bench simulator regions-dump stores-dump pd-api-bench +.PHONY: pd-ctl pd-ctl-fips pd-tso-bench pd-recover pd-recover-fips pd-analysis pd-heartbeat-bench simulator regions-dump stores-dump pd-api-bench #### Docker image #### diff --git a/pkg/versioninfo/fips.go b/pkg/versioninfo/fips.go new file mode 100644 index 00000000000..02478b103fa --- /dev/null +++ b/pkg/versioninfo/fips.go @@ -0,0 +1,26 @@ +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build boringcrypto +// +build boringcrypto + +package versioninfo + +import ( + _ "crypto/tls/fipsonly" +) + +func init() { + PDReleaseVersion += "-fips" +} From 0ebf4b26421f8347e02ca7a8f73f168f34b1ad0d Mon Sep 17 00:00:00 2001 From: Hu# Date: Thu, 16 Nov 2023 10:09:46 +0800 Subject: [PATCH 02/10] member: avoid frequent campaign times (#7301) close tikv/pd#7251, ref tikv/pd#7377 when pd leader frequently campaign leader, but etcd leader did not change. We need to prevent this pd leader campaign and resign to another member. Signed-off-by: husharp Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- client/client.go | 2 +- pkg/election/leadership.go | 32 ++++++++++++++++++++---- pkg/mcs/resourcemanager/server/server.go | 2 +- pkg/mcs/scheduling/server/server.go | 2 +- pkg/member/member.go | 12 ++++++++- pkg/member/participant.go | 2 +- pkg/tso/allocator_manager.go | 8 +++--- pkg/tso/global_allocator.go | 2 +- server/server.go | 2 +- tests/cluster.go | 7 ++++++ tests/server/member/member_test.go | 24 ++++++++++++++++++ 11 files changed, 79 insertions(+), 16 deletions(-) diff --git a/client/client.go b/client/client.go index 56923b697e2..2d30d9fb6c4 100644 --- a/client/client.go +++ b/client/client.go @@ -136,7 +136,7 @@ type Client interface { LoadGlobalConfig(ctx context.Context, names []string, configPath string) ([]GlobalConfigItem, int64, error) // StoreGlobalConfig set the config from etcd StoreGlobalConfig(ctx context.Context, configPath string, items []GlobalConfigItem) error - // WatchGlobalConfig returns an stream with all global config and updates + // WatchGlobalConfig returns a stream with all global config and updates WatchGlobalConfig(ctx context.Context, configPath string, revision int64) (chan []GlobalConfigItem, error) // UpdateOption updates the client option. UpdateOption(option DynamicOption, value interface{}) error diff --git a/pkg/election/leadership.go b/pkg/election/leadership.go index 8cfdcf423ac..572dae132b6 100644 --- a/pkg/election/leadership.go +++ b/pkg/election/leadership.go @@ -32,7 +32,10 @@ import ( "go.uber.org/zap" ) -const watchLoopUnhealthyTimeout = 60 * time.Second +const ( + watchLoopUnhealthyTimeout = 60 * time.Second + campaignTimesRecordTimeout = 5 * time.Minute +) // GetLeader gets the corresponding leader from etcd by given leaderPath (as the key). func GetLeader(c *clientv3.Client, leaderPath string) (*pdpb.Member, int64, error) { @@ -62,20 +65,24 @@ type Leadership struct { keepAliveCtx context.Context keepAliveCancelFunc context.CancelFunc keepAliveCancelFuncLock syncutil.Mutex + // CampaignTimes is used to record the campaign times of the leader within `campaignTimesRecordTimeout`. + // It is ordered by time to prevent the leader from campaigning too frequently. + CampaignTimes []time.Time } // NewLeadership creates a new Leadership. func NewLeadership(client *clientv3.Client, leaderKey, purpose string) *Leadership { leadership := &Leadership{ - purpose: purpose, - client: client, - leaderKey: leaderKey, + purpose: purpose, + client: client, + leaderKey: leaderKey, + CampaignTimes: make([]time.Time, 0, 10), } return leadership } // getLease gets the lease of leadership, only if leadership is valid, -// i.e the owner is a true leader, the lease is not nil. +// i.e. the owner is a true leader, the lease is not nil. func (ls *Leadership) getLease() *lease { l := ls.lease.Load() if l == nil { @@ -104,8 +111,23 @@ func (ls *Leadership) GetLeaderKey() string { return ls.leaderKey } +// addCampaignTimes is used to add the campaign times of the leader. +func (ls *Leadership) addCampaignTimes() { + for i := len(ls.CampaignTimes) - 1; i >= 0; i-- { + if time.Since(ls.CampaignTimes[i]) > campaignTimesRecordTimeout { + // remove the time which is more than `campaignTimesRecordTimeout` + // array is sorted by time + ls.CampaignTimes = ls.CampaignTimes[i:] + break + } + } + + ls.CampaignTimes = append(ls.CampaignTimes, time.Now()) +} + // Campaign is used to campaign the leader with given lease and returns a leadership func (ls *Leadership) Campaign(leaseTimeout int64, leaderData string, cmps ...clientv3.Cmp) error { + ls.addCampaignTimes() ls.leaderValue = leaderData // Create a new lease to campaign newLease := &lease{ diff --git a/pkg/mcs/resourcemanager/server/server.go b/pkg/mcs/resourcemanager/server/server.go index 7b660c07605..2a1be3e0ca5 100644 --- a/pkg/mcs/resourcemanager/server/server.go +++ b/pkg/mcs/resourcemanager/server/server.go @@ -152,7 +152,7 @@ func (s *Server) primaryElectionLoop() { func (s *Server) campaignLeader() { log.Info("start to campaign the primary/leader", zap.String("campaign-resource-manager-primary-name", s.participant.Name())) - if err := s.participant.CampaignLeader(s.cfg.LeaderLease); err != nil { + if err := s.participant.CampaignLeader(s.Context(), s.cfg.LeaderLease); err != nil { if err.Error() == errs.ErrEtcdTxnConflict.Error() { log.Info("campaign resource manager primary meets error due to txn conflict, another server may campaign successfully", zap.String("campaign-resource-manager-primary-name", s.participant.Name())) diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 4304ffb218a..32b241fee91 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -241,7 +241,7 @@ func (s *Server) primaryElectionLoop() { func (s *Server) campaignLeader() { log.Info("start to campaign the primary/leader", zap.String("campaign-scheduling-primary-name", s.participant.Name())) - if err := s.participant.CampaignLeader(s.cfg.LeaderLease); err != nil { + if err := s.participant.CampaignLeader(s.Context(), s.cfg.LeaderLease); err != nil { if err.Error() == errs.ErrEtcdTxnConflict.Error() { log.Info("campaign scheduling primary meets error due to txn conflict, another server may campaign successfully", zap.String("campaign-scheduling-primary-name", s.participant.Name())) diff --git a/pkg/member/member.go b/pkg/member/member.go index 80332a65f94..6eddf9a7c77 100644 --- a/pkg/member/member.go +++ b/pkg/member/member.go @@ -42,6 +42,8 @@ const ( // The timeout to wait transfer etcd leader to complete. moveLeaderTimeout = 5 * time.Second dcLocationConfigEtcdPrefix = "dc-location" + // If the campaign times is more than this value in `campaignTimesRecordTimeout`, the PD will resign and campaign again. + campaignLeaderFrequencyTimes = 3 ) // EmbeddedEtcdMember is used for the election related logic. It implements Member interface. @@ -177,7 +179,15 @@ func (m *EmbeddedEtcdMember) GetLastLeaderUpdatedTime() time.Time { // CampaignLeader is used to campaign a PD member's leadership // and make it become a PD leader. -func (m *EmbeddedEtcdMember) CampaignLeader(leaseTimeout int64) error { +// leader should be changed when campaign leader frequently. +func (m *EmbeddedEtcdMember) CampaignLeader(ctx context.Context, leaseTimeout int64) error { + if len(m.leadership.CampaignTimes) >= campaignLeaderFrequencyTimes { + log.Warn("campaign times is too frequent, resign and campaign again", + zap.String("leader-name", m.Name()), zap.String("leader-key", m.GetLeaderPath())) + // remove all campaign times + m.leadership.CampaignTimes = nil + return m.ResignEtcdLeader(ctx, m.Name(), "") + } return m.leadership.Campaign(leaseTimeout, m.MemberValue()) } diff --git a/pkg/member/participant.go b/pkg/member/participant.go index b3034a86807..82cd7e05f5e 100644 --- a/pkg/member/participant.go +++ b/pkg/member/participant.go @@ -196,7 +196,7 @@ func (m *Participant) GetLeadership() *election.Leadership { } // CampaignLeader is used to campaign the leadership and make it become a leader. -func (m *Participant) CampaignLeader(leaseTimeout int64) error { +func (m *Participant) CampaignLeader(_ context.Context, leaseTimeout int64) error { if !m.campaignCheck() { return errs.ErrCheckCampaign } diff --git a/pkg/tso/allocator_manager.go b/pkg/tso/allocator_manager.go index df0ca0affc9..251a3aaf2e6 100644 --- a/pkg/tso/allocator_manager.go +++ b/pkg/tso/allocator_manager.go @@ -101,13 +101,13 @@ func (info *DCLocationInfo) clone() DCLocationInfo { type ElectionMember interface { // ID returns the unique ID in the election group. For example, it can be unique // server id of a cluster or the unique keyspace group replica id of the election - // group comprised of the replicas of a keyspace group. + // group composed of the replicas of a keyspace group. ID() uint64 - // ID returns the unique name in the election group. + // Name returns the unique name in the election group. Name() string // MemberValue returns the member value. MemberValue() string - // GetMember() returns the current member + // GetMember returns the current member GetMember() interface{} // Client returns the etcd client. Client() *clientv3.Client @@ -124,7 +124,7 @@ type ElectionMember interface { // KeepLeader is used to keep the leader's leadership. KeepLeader(ctx context.Context) // CampaignLeader is used to campaign the leadership and make it become a leader in an election group. - CampaignLeader(leaseTimeout int64) error + CampaignLeader(ctx context.Context, leaseTimeout int64) error // ResetLeader is used to reset the member's current leadership. // Basically it will reset the leader lease and unset leader info. ResetLeader() diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index 613ceb3eafc..a37bcc73881 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -568,7 +568,7 @@ func (gta *GlobalTSOAllocator) campaignLeader() { log.Info("start to campaign the primary", logutil.CondUint32("keyspace-group-id", gta.getGroupID(), gta.getGroupID() > 0), zap.String("campaign-tso-primary-name", gta.member.Name())) - if err := gta.am.member.CampaignLeader(gta.am.leaderLease); err != nil { + if err := gta.am.member.CampaignLeader(gta.ctx, gta.am.leaderLease); err != nil { if errors.Is(err, errs.ErrEtcdTxnConflict) { log.Info("campaign tso primary meets error due to txn conflict, another tso server may campaign successfully", logutil.CondUint32("keyspace-group-id", gta.getGroupID(), gta.getGroupID() > 0), diff --git a/server/server.go b/server/server.go index a2c99d0cbec..38064a3b92f 100644 --- a/server/server.go +++ b/server/server.go @@ -1636,7 +1636,7 @@ func (s *Server) leaderLoop() { func (s *Server) campaignLeader() { log.Info(fmt.Sprintf("start to campaign %s leader", s.mode), zap.String("campaign-leader-name", s.Name())) - if err := s.member.CampaignLeader(s.cfg.LeaderLease); err != nil { + if err := s.member.CampaignLeader(s.ctx, s.cfg.LeaderLease); err != nil { if err.Error() == errs.ErrEtcdTxnConflict.Error() { log.Info(fmt.Sprintf("campaign %s leader meets error due to txn conflict, another PD/API server may campaign successfully", s.mode), zap.String("campaign-leader-name", s.Name())) diff --git a/tests/cluster.go b/tests/cluster.go index ae1ae331856..41efc2b045d 100644 --- a/tests/cluster.go +++ b/tests/cluster.go @@ -155,6 +155,13 @@ func (s *TestServer) Destroy() error { return nil } +// ResetPDLeader resigns the leader of the server. +func (s *TestServer) ResetPDLeader() { + s.Lock() + defer s.Unlock() + s.server.GetMember().ResetLeader() +} + // ResignLeader resigns the leader of the server. func (s *TestServer) ResignLeader() error { s.Lock() diff --git a/tests/server/member/member_test.go b/tests/server/member/member_test.go index 26d4fa2a904..5965f9e22a6 100644 --- a/tests/server/member/member_test.go +++ b/tests/server/member/member_test.go @@ -323,6 +323,30 @@ func TestMoveLeader(t *testing.T) { } } +func TestCampaignLeaderFrequently(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + cluster, err := tests.NewTestCluster(ctx, 5) + defer cluster.Destroy() + re.NoError(err) + + err = cluster.RunInitialServers() + re.NoError(err) + cluster.WaitLeader() + leader := cluster.GetLeader() + re.NotEmpty(cluster.GetLeader()) + + for i := 0; i < 3; i++ { + cluster.GetServers()[cluster.GetLeader()].ResetPDLeader() + cluster.WaitLeader() + } + // leader should be changed when campaign leader frequently + cluster.WaitLeader() + re.NotEmpty(cluster.GetLeader()) + re.NotEqual(leader, cluster.GetLeader()) +} + func TestGetLeader(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) From 112d6dc1cb7b57eba1639e6b465297de4490d8dd Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 16 Nov 2023 11:09:46 +0800 Subject: [PATCH 03/10] mcs: make scheduling server test stable (#7367) close tikv/pd#7362 Signed-off-by: lhy1024 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- pkg/utils/testutil/api_check.go | 15 ++ server/api/diagnostic_test.go | 3 + .../mcs/scheduling/config_test.go | 3 + tests/pdctl/config/config_test.go | 183 ++++++++++-------- tests/server/api/rule_test.go | 69 ++++--- tests/server/api/scheduler_test.go | 17 +- tests/server/config/config_test.go | 10 +- 7 files changed, 177 insertions(+), 123 deletions(-) diff --git a/pkg/utils/testutil/api_check.go b/pkg/utils/testutil/api_check.go index 4ce5e859f3f..ea91654b149 100644 --- a/pkg/utils/testutil/api_check.go +++ b/pkg/utils/testutil/api_check.go @@ -114,6 +114,21 @@ func CheckGetJSON(client *http.Client, url string, data []byte, checkOpts ...fun return checkResp(resp, checkOpts...) } +// CheckGetUntilStatusCode is used to do get request and do check options. +func CheckGetUntilStatusCode(re *require.Assertions, client *http.Client, url string, code int) error { + var err error + Eventually(re, func() bool { + resp, err2 := apiutil.GetJSON(client, url, nil) + if err2 != nil { + err = err2 + return true + } + defer resp.Body.Close() + return resp.StatusCode == code + }) + return err +} + // CheckPatchJSON is used to do patch request and do check options. func CheckPatchJSON(client *http.Client, url string, data []byte, checkOpts ...func([]byte, int, http.Header)) error { resp, err := apiutil.PatchJSON(client, url, data) diff --git a/server/api/diagnostic_test.go b/server/api/diagnostic_test.go index 1774c221539..4e08426ea43 100644 --- a/server/api/diagnostic_test.go +++ b/server/api/diagnostic_test.go @@ -17,6 +17,7 @@ package api import ( "encoding/json" "fmt" + "net/http" "testing" "time" @@ -63,6 +64,8 @@ func (suite *diagnosticTestSuite) TearDownSuite() { func (suite *diagnosticTestSuite) checkStatus(status string, url string) { re := suite.Require() + err := tu.CheckGetUntilStatusCode(re, testDialClient, url, http.StatusOK) + suite.NoError(err) suite.Eventually(func() bool { result := &schedulers.DiagnosticResult{} err := tu.ReadGetJSON(re, testDialClient, url, result) diff --git a/tests/integrations/mcs/scheduling/config_test.go b/tests/integrations/mcs/scheduling/config_test.go index 42ba051eb84..06d73caf130 100644 --- a/tests/integrations/mcs/scheduling/config_test.go +++ b/tests/integrations/mcs/scheduling/config_test.go @@ -93,6 +93,9 @@ func (suite *configTestSuite) TestConfigWatch() { re.Equal(sc.DefaultSplitMergeInterval, watcher.GetScheduleConfig().SplitMergeInterval.Duration) re.Equal("0.0.0", watcher.GetClusterVersion().String()) // Update the config and check if the scheduling config watcher can get the latest value. + testutil.Eventually(re, func() bool { + return watcher.GetReplicationConfig().MaxReplicas == 3 + }) persistOpts := suite.pdLeaderServer.GetPersistOptions() persistOpts.SetMaxReplicas(5) persistConfig(re, suite.pdLeaderServer) diff --git a/tests/pdctl/config/config_test.go b/tests/pdctl/config/config_test.go index 2cc8427911a..315ec3cf7c7 100644 --- a/tests/pdctl/config/config_test.go +++ b/tests/pdctl/config/config_test.go @@ -315,12 +315,7 @@ func (suite *configTestSuite) checkPlacementRules(cluster *tests.TestCluster) { re.Contains(string(output), "Success!") // test show - var rules []placement.Rule - output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "show") - re.NoError(err) - re.NoError(json.Unmarshal(output, &rules)) - re.Len(rules, 1) - re.Equal([2]string{"pd", "default"}, rules[0].Key()) + suite.checkShowRuleKey(pdAddr, [][2]string{{"pd", "default"}}) f, _ := os.CreateTemp("/tmp", "pd_tests") fname := f.Name() @@ -328,12 +323,7 @@ func (suite *configTestSuite) checkPlacementRules(cluster *tests.TestCluster) { defer os.RemoveAll(fname) // test load - _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "load", "--out="+fname) - re.NoError(err) - b, _ := os.ReadFile(fname) - re.NoError(json.Unmarshal(b, &rules)) - re.Len(rules, 1) - re.Equal([2]string{"pd", "default"}, rules[0].Key()) + rules := suite.checkLoadRule(pdAddr, fname, [][2]string{{"pd", "default"}}) // test save rules = append(rules, placement.Rule{ @@ -347,42 +337,26 @@ func (suite *configTestSuite) checkPlacementRules(cluster *tests.TestCluster) { Role: "voter", Count: 2, }) - b, _ = json.Marshal(rules) + b, _ := json.Marshal(rules) os.WriteFile(fname, b, 0600) _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "save", "--in="+fname) re.NoError(err) // test show group - var rules2 []placement.Rule - output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "show", "--group=pd") - re.NoError(err) - re.NoError(json.Unmarshal(output, &rules2)) - re.Len(rules2, 2) - re.Equal([2]string{"pd", "default"}, rules2[0].Key()) - re.Equal([2]string{"pd", "test1"}, rules2[1].Key()) + suite.checkShowRuleKey(pdAddr, [][2]string{{"pd", "default"}, {"pd", "test1"}}, "--group=pd") // test rule region detail tests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b")) - fit := &placement.RegionFit{} - // need clear up args, so create new a cobra.Command. Otherwise gourp still exists. - cmd2 := pdctlCmd.GetRootCmd() - output, err = pdctl.ExecuteCommand(cmd2, "-u", pdAddr, "config", "placement-rules", "show", "--region=1", "--detail") - re.NoError(err) - re.NoError(json.Unmarshal(output, fit)) - re.Len(fit.RuleFits, 3) - re.Equal([2]string{"pd", "default"}, fit.RuleFits[0].Rule.Key()) + suite.checkShowRuleKey(pdAddr, [][2]string{{"pd", "default"}}, "--region=1", "--detail") // test delete + // need clear up args, so create new a cobra.Command. Otherwise gourp still exists. rules[0].Count = 0 b, _ = json.Marshal(rules) os.WriteFile(fname, b, 0600) _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "save", "--in="+fname) re.NoError(err) - output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "show", "--group=pd") - re.NoError(err) - re.NoError(json.Unmarshal(output, &rules)) - re.Len(rules, 1) - re.Equal([2]string{"pd", "test1"}, rules[0].Key()) + suite.checkShowRuleKey(pdAddr, [][2]string{{"pd", "test1"}}, "--group=pd") } func (suite *configTestSuite) TestPlacementRuleGroups() { @@ -431,14 +405,16 @@ func (suite *configTestSuite) checkPlacementRuleGroups(cluster *tests.TestCluste // show all var groups []placement.RuleGroup - output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show") - re.NoError(err) - re.NoError(json.Unmarshal(output, &groups)) - re.Equal([]placement.RuleGroup{ - {ID: "pd", Index: 42, Override: true}, - {ID: "group2", Index: 100, Override: false}, - {ID: "group3", Index: 200, Override: false}, - }, groups) + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show") + re.NoError(err) + re.NoError(json.Unmarshal(output, &groups)) + return reflect.DeepEqual([]placement.RuleGroup{ + {ID: "pd", Index: 42, Override: true}, + {ID: "group2", Index: 100, Override: false}, + {ID: "group3", Index: 200, Override: false}, + }, groups) + }) // delete output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "delete", "group2") @@ -446,17 +422,21 @@ func (suite *configTestSuite) checkPlacementRuleGroups(cluster *tests.TestCluste re.Contains(string(output), "Delete group and rules successfully.") // show again - output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show", "group2") - re.NoError(err) - re.Contains(string(output), "404") + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show", "group2") + re.NoError(err) + return strings.Contains(string(output), "404") + }) // delete using regex _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "delete", "--regexp", ".*3") re.NoError(err) - _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show", "group3") - re.NoError(err) - re.Contains(string(output), "404") + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show", "group3") + re.NoError(err) + return strings.Contains(string(output), "404") + }) } func (suite *configTestSuite) TestPlacementRuleBundle() { @@ -496,28 +476,19 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *tests.TestCluste defer os.RemoveAll(fname) // test load - var bundles []placement.GroupBundle - _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "load", "--out="+fname) - re.NoError(err) - b, _ := os.ReadFile(fname) - re.NoError(json.Unmarshal(b, &bundles)) - re.Len(bundles, 1) - re.Equal(placement.GroupBundle{ID: "pd", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pd", ID: "default", Role: "voter", Count: 3}}}, bundles[0]) + suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ + {ID: "pd", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pd", ID: "default", Role: "voter", Count: 3}}}, + }) // test set bundle.ID = "pe" bundle.Rules[0].GroupID = "pe" - b, err = json.Marshal(bundle) + b, err := json.Marshal(bundle) re.NoError(err) re.NoError(os.WriteFile(fname, b, 0600)) _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "set", "--in="+fname) re.NoError(err) - - _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "load", "--out="+fname) - re.NoError(err) - b, _ = os.ReadFile(fname) - re.NoError(json.Unmarshal(b, &bundles)) - assertBundles(re, bundles, []placement.GroupBundle{ + suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ {ID: "pd", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pd", ID: "default", Role: "voter", Count: 3}}}, {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: "default", Role: "voter", Count: 3}}}, }) @@ -526,11 +497,7 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *tests.TestCluste _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "delete", "pd") re.NoError(err) - _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "load", "--out="+fname) - re.NoError(err) - b, _ = os.ReadFile(fname) - re.NoError(json.Unmarshal(b, &bundles)) - assertBundles(re, bundles, []placement.GroupBundle{ + suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: "default", Role: "voter", Count: 3}}}, }) @@ -542,17 +509,18 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *tests.TestCluste re.NoError(os.WriteFile(fname, b, 0600)) _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "set", "--in="+fname) re.NoError(err) + suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ + {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: "default", Role: "voter", Count: 3}}}, + {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: "default", Role: "voter", Count: 3}}}, + }) _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "delete", "--regexp", ".*f") re.NoError(err) - _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "load", "--out="+fname) - re.NoError(err) - b, _ = os.ReadFile(fname) - re.NoError(json.Unmarshal(b, &bundles)) - assertBundles(re, bundles, []placement.GroupBundle{ + bundles := []placement.GroupBundle{ {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: "default", Role: "voter", Count: 3}}}, - }) + } + suite.checkLoadRuleBundle(pdAddr, fname, bundles) // test save bundle.Rules = []*placement.Rule{{GroupID: "pf", ID: "default", Role: "voter", Count: 3}} @@ -562,13 +530,7 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *tests.TestCluste re.NoError(os.WriteFile(fname, b, 0600)) _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "save", "--in="+fname) re.NoError(err) - - _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "load", "--out="+fname) - re.NoError(err) - b, err = os.ReadFile(fname) - re.NoError(err) - re.NoError(json.Unmarshal(b, &bundles)) - assertBundles(re, bundles, []placement.GroupBundle{ + suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: "default", Role: "voter", Count: 3}}}, {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: "default", Role: "voter", Count: 3}}}, }) @@ -581,16 +543,67 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *tests.TestCluste _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "save", "--in="+fname, "--partial") re.NoError(err) - _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "load", "--out="+fname) - re.NoError(err) - b, err = os.ReadFile(fname) - re.NoError(err) - re.NoError(json.Unmarshal(b, &bundles)) - assertBundles(re, bundles, []placement.GroupBundle{ + suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: "default", Role: "voter", Count: 3}}}, }) } +func (suite *configTestSuite) checkLoadRuleBundle(pdAddr string, fname string, expectValues []placement.GroupBundle) { + var bundles []placement.GroupBundle + cmd := pdctlCmd.GetRootCmd() + testutil.Eventually(suite.Require(), func() bool { // wait for the config to be synced to the scheduling server + _, err := pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "load", "--out="+fname) + suite.NoError(err) + b, _ := os.ReadFile(fname) + suite.NoError(json.Unmarshal(b, &bundles)) + return len(bundles) == len(expectValues) + }) + assertBundles(suite.Require(), bundles, expectValues) +} + +func (suite *configTestSuite) checkLoadRule(pdAddr string, fname string, expectValues [][2]string) []placement.Rule { + var rules []placement.Rule + cmd := pdctlCmd.GetRootCmd() + testutil.Eventually(suite.Require(), func() bool { // wait for the config to be synced to the scheduling server + _, err := pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "load", "--out="+fname) + suite.NoError(err) + b, _ := os.ReadFile(fname) + suite.NoError(json.Unmarshal(b, &rules)) + return len(rules) == len(expectValues) + }) + for i, v := range expectValues { + suite.Equal(v, rules[i].Key()) + } + return rules +} + +func (suite *configTestSuite) checkShowRuleKey(pdAddr string, expectValues [][2]string, opts ...string) { + var rules []placement.Rule + var fit placement.RegionFit + cmd := pdctlCmd.GetRootCmd() + testutil.Eventually(suite.Require(), func() bool { // wait for the config to be synced to the scheduling server + args := []string{"-u", pdAddr, "config", "placement-rules", "show"} + output, err := pdctl.ExecuteCommand(cmd, append(args, opts...)...) + suite.NoError(err) + err = json.Unmarshal(output, &rules) + if err == nil { + return len(rules) == len(expectValues) + } + suite.NoError(json.Unmarshal(output, &fit)) + return len(fit.RuleFits) != 0 + }) + if len(rules) != 0 { + for i, v := range expectValues { + suite.Equal(v, rules[i].Key()) + } + } + if len(fit.RuleFits) != 0 { + for i, v := range expectValues { + suite.Equal(v, fit.RuleFits[i].Rule.Key()) + } + } +} + func TestReplicationMode(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) diff --git a/tests/server/api/rule_test.go b/tests/server/api/rule_test.go index 3ee3357e031..6d292021767 100644 --- a/tests/server/api/rule_test.go +++ b/tests/server/api/rule_test.go @@ -210,8 +210,10 @@ func (suite *ruleTestSuite) checkGet(cluster *tests.TestCluster) { var resp placement.Rule url := fmt.Sprintf("%s/rule/%s/%s", urlPrefix, testCase.rule.GroupID, testCase.rule.ID) if testCase.found { - err = tu.ReadGetJSON(re, testDialClient, url, &resp) - suite.compareRule(&resp, &testCase.rule) + tu.Eventually(suite.Require(), func() bool { + err = tu.ReadGetJSON(re, testDialClient, url, &resp) + return suite.compareRule(&resp, &testCase.rule) + }) } else { err = tu.CheckGetJSON(testDialClient, url, nil, tu.Status(re, testCase.code)) } @@ -421,13 +423,17 @@ func (suite *ruleTestSuite) checkGetAllByGroup(cluster *tests.TestCluster) { suite.T().Log(testCase.name) var resp []*placement.Rule url := fmt.Sprintf("%s/rules/group/%s", urlPrefix, testCase.groupID) - err = tu.ReadGetJSON(re, testDialClient, url, &resp) - suite.NoError(err) - suite.Len(resp, testCase.count) - if testCase.count == 2 { - suite.compareRule(resp[0], &rule) - suite.compareRule(resp[1], &rule1) - } + tu.Eventually(re, func() bool { + err = tu.ReadGetJSON(re, testDialClient, url, &resp) + suite.NoError(err) + if len(resp) != testCase.count { + return false + } + if testCase.count == 2 { + return suite.compareRule(resp[0], &rule) && suite.compareRule(resp[1], &rule1) + } + return true + }) } } @@ -487,12 +493,15 @@ func (suite *ruleTestSuite) checkGetAllByRegion(cluster *tests.TestCluster) { url := fmt.Sprintf("%s/rules/region/%s", urlPrefix, testCase.regionID) if testCase.success { - err = tu.ReadGetJSON(re, testDialClient, url, &resp) - for _, r := range resp { - if r.GroupID == "e" { - suite.compareRule(r, &rule) + tu.Eventually(suite.Require(), func() bool { + err = tu.ReadGetJSON(re, testDialClient, url, &resp) + for _, r := range resp { + if r.GroupID == "e" { + return suite.compareRule(r, &rule) + } } - } + return true + }) } else { err = tu.CheckGetJSON(testDialClient, url, nil, tu.Status(re, testCase.code)) } @@ -956,22 +965,26 @@ func (suite *ruleTestSuite) checkBundleBadRequest(cluster *tests.TestCluster) { } func (suite *ruleTestSuite) compareBundle(b1, b2 placement.GroupBundle) { - suite.Equal(b2.ID, b1.ID) - suite.Equal(b2.Index, b1.Index) - suite.Equal(b2.Override, b1.Override) - suite.Len(b2.Rules, len(b1.Rules)) - for i := range b1.Rules { - suite.compareRule(b1.Rules[i], b2.Rules[i]) - } + tu.Eventually(suite.Require(), func() bool { + if b2.ID != b1.ID || b2.Index != b1.Index || b2.Override != b1.Override || len(b2.Rules) != len(b1.Rules) { + return false + } + for i := range b1.Rules { + if !suite.compareRule(b1.Rules[i], b2.Rules[i]) { + return false + } + } + return true + }) } -func (suite *ruleTestSuite) compareRule(r1 *placement.Rule, r2 *placement.Rule) { - suite.Equal(r2.GroupID, r1.GroupID) - suite.Equal(r2.ID, r1.ID) - suite.Equal(r2.StartKeyHex, r1.StartKeyHex) - suite.Equal(r2.EndKeyHex, r1.EndKeyHex) - suite.Equal(r2.Role, r1.Role) - suite.Equal(r2.Count, r1.Count) +func (suite *ruleTestSuite) compareRule(r1 *placement.Rule, r2 *placement.Rule) bool { + return r2.GroupID == r1.GroupID && + r2.ID == r1.ID && + r2.StartKeyHex == r1.StartKeyHex && + r2.EndKeyHex == r1.EndKeyHex && + r2.Role == r1.Role && + r2.Count == r1.Count } type regionRuleTestSuite struct { diff --git a/tests/server/api/scheduler_test.go b/tests/server/api/scheduler_test.go index 38f691a4eda..4d6dde6f2b9 100644 --- a/tests/server/api/scheduler_test.go +++ b/tests/server/api/scheduler_test.go @@ -18,6 +18,7 @@ import ( "encoding/json" "fmt" "net/http" + "reflect" "testing" "time" @@ -447,18 +448,22 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { suite.NoError(err) suite.NoError(tu.CheckPostJSON(testDialClient, updateURL, body, tu.StatusOK(re))) resp = make(map[string]interface{}) - suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - exceptMap["4"] = []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}} - suite.Equal(exceptMap, resp["store-id-ranges"]) + tu.Eventually(re, func() bool { + suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) + exceptMap["4"] = []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}} + return reflect.DeepEqual(exceptMap, resp["store-id-ranges"]) + }) // using /pd/v1/schedule-config/evict-leader-scheduler/config to delete exist store from evict-leader-scheduler deleteURL := fmt.Sprintf("%s%s%s/%s/delete/%s", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, name, "4") err = tu.CheckDelete(testDialClient, deleteURL, tu.StatusOK(re)) suite.NoError(err) resp = make(map[string]interface{}) - suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - delete(exceptMap, "4") - suite.Equal(exceptMap, resp["store-id-ranges"]) + tu.Eventually(re, func() bool { + suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) + delete(exceptMap, "4") + return reflect.DeepEqual(exceptMap, resp["store-id-ranges"]) + }) err = tu.CheckDelete(testDialClient, deleteURL, tu.Status(re, http.StatusNotFound)) suite.NoError(err) }, diff --git a/tests/server/config/config_test.go b/tests/server/config/config_test.go index 8d8cf40e692..4a4a91f2661 100644 --- a/tests/server/config/config_test.go +++ b/tests/server/config/config_test.go @@ -20,6 +20,7 @@ import ( "encoding/json" "fmt" "net/http" + "reflect" "testing" "time" @@ -272,10 +273,11 @@ func (suite *configTestSuite) checkConfigReplication(cluster *tests.TestCluster) suite.NoError(err) rc4 := &sc.ReplicationConfig{} - err = tu.ReadGetJSON(re, testDialClient, addr, rc4) - suite.NoError(err) - - suite.Equal(*rc4, *rc) + tu.Eventually(re, func() bool { + err = tu.ReadGetJSON(re, testDialClient, addr, rc4) + suite.NoError(err) + return reflect.DeepEqual(*rc4, *rc) + }) } func (suite *configTestSuite) TestConfigLabelProperty() { From 4e600c227e83b0b9f92693ae21ff04e2028e1e7d Mon Sep 17 00:00:00 2001 From: glorv Date: Thu, 16 Nov 2023 11:44:17 +0800 Subject: [PATCH 04/10] resourcemanager: return resource-group priority in OnRequestWait (#7378) close tikv/pd#7379, ref tikv/tikv#15994 Signed-off-by: glorv Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../resource_group/controller/controller.go | 12 +++---- .../controller/controller_test.go | 8 +++-- .../resourcemanager/resource_manager_test.go | 34 +++++++++---------- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/client/resource_group/controller/controller.go b/client/resource_group/controller/controller.go index b528351bedf..01011c2c30a 100755 --- a/client/resource_group/controller/controller.go +++ b/client/resource_group/controller/controller.go @@ -57,7 +57,7 @@ const ( // ResourceGroupKVInterceptor is used as quota limit controller for resource group using kv store. type ResourceGroupKVInterceptor interface { // OnRequestWait is used to check whether resource group has enough tokens. It maybe needs to wait some time. - OnRequestWait(ctx context.Context, resourceGroupName string, info RequestInfo) (*rmpb.Consumption, *rmpb.Consumption, error) + OnRequestWait(ctx context.Context, resourceGroupName string, info RequestInfo) (*rmpb.Consumption, *rmpb.Consumption, uint32, error) // OnResponse is used to consume tokens after receiving response. OnResponse(resourceGroupName string, req RequestInfo, resp ResponseInfo) (*rmpb.Consumption, error) // IsBackgroundRequest If the resource group has background jobs, we should not record consumption and wait for it. @@ -526,10 +526,10 @@ func (c *ResourceGroupsController) sendTokenBucketRequests(ctx context.Context, // OnRequestWait is used to check whether resource group has enough tokens. It maybe needs to wait some time. func (c *ResourceGroupsController) OnRequestWait( ctx context.Context, resourceGroupName string, info RequestInfo, -) (*rmpb.Consumption, *rmpb.Consumption, error) { +) (*rmpb.Consumption, *rmpb.Consumption, uint32, error) { gc, err := c.tryGetResourceGroup(ctx, resourceGroupName) if err != nil { - return nil, nil, err + return nil, nil, 0, err } return gc.onRequestWait(ctx, info) } @@ -1176,7 +1176,7 @@ func (gc *groupCostController) calcRequest(counter *tokenCounter) float64 { func (gc *groupCostController) onRequestWait( ctx context.Context, info RequestInfo, -) (*rmpb.Consumption, *rmpb.Consumption, error) { +) (*rmpb.Consumption, *rmpb.Consumption, uint32, error) { delta := &rmpb.Consumption{} for _, calc := range gc.calculators { calc.BeforeKVRequest(delta, info) @@ -1226,7 +1226,7 @@ func (gc *groupCostController) onRequestWait( failpoint.Inject("triggerUpdate", func() { gc.lowRUNotifyChan <- struct{}{} }) - return nil, nil, err + return nil, nil, 0, err } gc.successfulRequestDuration.Observe(d.Seconds()) } @@ -1245,7 +1245,7 @@ func (gc *groupCostController) onRequestWait( *gc.mu.storeCounter[info.StoreID()] = *gc.mu.globalCounter gc.mu.Unlock() - return delta, penalty, nil + return delta, penalty, gc.meta.Priority, nil } func (gc *groupCostController) onResponse( diff --git a/client/resource_group/controller/controller_test.go b/client/resource_group/controller/controller_test.go index 6877f8206f3..1db19787a81 100644 --- a/client/resource_group/controller/controller_test.go +++ b/client/resource_group/controller/controller_test.go @@ -30,8 +30,9 @@ import ( func createTestGroupCostController(re *require.Assertions) *groupCostController { group := &rmpb.ResourceGroup{ - Name: "test", - Mode: rmpb.GroupMode_RUMode, + Name: "test", + Mode: rmpb.GroupMode_RUMode, + Priority: 1, RUSettings: &rmpb.GroupRequestUnitSettings{ RU: &rmpb.TokenBucket{ Settings: &rmpb.TokenLimitSettings{ @@ -100,8 +101,9 @@ func TestRequestAndResponseConsumption(t *testing.T) { kvCalculator := gc.getKVCalculator() for idx, testCase := range testCases { caseNum := fmt.Sprintf("case %d", idx) - consumption, _, err := gc.onRequestWait(context.TODO(), testCase.req) + consumption, _, priority, err := gc.onRequestWait(context.TODO(), testCase.req) re.NoError(err, caseNum) + re.Equal(priority, gc.meta.Priority) expectedConsumption := &rmpb.Consumption{} if testCase.req.IsWrite() { kvCalculator.calculateWriteCost(expectedConsumption, testCase.req) diff --git a/tests/integrations/mcs/resourcemanager/resource_manager_test.go b/tests/integrations/mcs/resourcemanager/resource_manager_test.go index ed6a3ee501c..91a21caf91b 100644 --- a/tests/integrations/mcs/resourcemanager/resource_manager_test.go +++ b/tests/integrations/mcs/resourcemanager/resource_manager_test.go @@ -438,9 +438,9 @@ func (suite *resourceManagerClientTestSuite) TestResourceGroupController() { rres := cas.tcs[i].makeReadResponse() wres := cas.tcs[i].makeWriteResponse() startTime := time.Now() - _, _, err := controller.OnRequestWait(suite.ctx, cas.resourceGroupName, rreq) + _, _, _, err := controller.OnRequestWait(suite.ctx, cas.resourceGroupName, rreq) re.NoError(err) - _, _, err = controller.OnRequestWait(suite.ctx, cas.resourceGroupName, wreq) + _, _, _, err = controller.OnRequestWait(suite.ctx, cas.resourceGroupName, wreq) re.NoError(err) sum += time.Since(startTime) controller.OnResponse(cas.resourceGroupName, rreq, rres) @@ -457,7 +457,7 @@ func (suite *resourceManagerClientTestSuite) TestResourceGroupController() { re.NoError(failpoint.Enable("github.com/tikv/pd/client/resource_group/controller/triggerUpdate", "return(true)")) tcs := tokenConsumptionPerSecond{rruTokensAtATime: 1, wruTokensAtATime: 900000000, times: 1, waitDuration: 0} wreq := tcs.makeWriteRequest() - _, _, err = controller.OnRequestWait(suite.ctx, rg.Name, wreq) + _, _, _, err = controller.OnRequestWait(suite.ctx, rg.Name, wreq) re.Error(err) time.Sleep(time.Millisecond * 200) re.NoError(failpoint.Disable("github.com/tikv/pd/client/resource_group/controller/triggerUpdate")) @@ -512,9 +512,9 @@ func (suite *resourceManagerClientTestSuite) TestSwitchBurst() { wreq := tcs.makeWriteRequest() rres := tcs.makeReadResponse() wres := tcs.makeWriteResponse() - _, _, err := controller.OnRequestWait(suite.ctx, resourceGroupName, rreq) + _, _, _, err := controller.OnRequestWait(suite.ctx, resourceGroupName, rreq) re.NoError(err) - _, _, err = controller.OnRequestWait(suite.ctx, resourceGroupName, wreq) + _, _, _, err = controller.OnRequestWait(suite.ctx, resourceGroupName, wreq) re.NoError(err) controller.OnResponse(resourceGroupName, rreq, rres) controller.OnResponse(resourceGroupName, wreq, wres) @@ -551,9 +551,9 @@ func (suite *resourceManagerClientTestSuite) TestSwitchBurst() { rres := cas.tcs[i].makeReadResponse() wres := cas.tcs[i].makeWriteResponse() startTime := time.Now() - _, _, err := controller.OnRequestWait(suite.ctx, resourceGroupName, rreq) + _, _, _, err := controller.OnRequestWait(suite.ctx, resourceGroupName, rreq) re.NoError(err) - _, _, err = controller.OnRequestWait(suite.ctx, resourceGroupName, wreq) + _, _, _, err = controller.OnRequestWait(suite.ctx, resourceGroupName, wreq) re.NoError(err) sum += time.Since(startTime) controller.OnResponse(resourceGroupName, rreq, rres) @@ -571,14 +571,14 @@ func (suite *resourceManagerClientTestSuite) TestSwitchBurst() { resourceGroupName2 := suite.initGroups[2].Name tcs = tokenConsumptionPerSecond{rruTokensAtATime: 1, wruTokensAtATime: 100000, times: 1, waitDuration: 0} wreq := tcs.makeWriteRequest() - _, _, err := controller.OnRequestWait(suite.ctx, resourceGroupName2, wreq) + _, _, _, err := controller.OnRequestWait(suite.ctx, resourceGroupName2, wreq) re.NoError(err) re.NoError(failpoint.Enable("github.com/tikv/pd/client/resource_group/controller/acceleratedSpeedTrend", "return(true)")) resourceGroupName3 := suite.initGroups[3].Name tcs = tokenConsumptionPerSecond{rruTokensAtATime: 1, wruTokensAtATime: 1000, times: 1, waitDuration: 0} wreq = tcs.makeWriteRequest() - _, _, err = controller.OnRequestWait(suite.ctx, resourceGroupName3, wreq) + _, _, _, err = controller.OnRequestWait(suite.ctx, resourceGroupName3, wreq) re.NoError(err) time.Sleep(110 * time.Millisecond) tcs = tokenConsumptionPerSecond{rruTokensAtATime: 1, wruTokensAtATime: 10, times: 1010, waitDuration: 0} @@ -586,7 +586,7 @@ func (suite *resourceManagerClientTestSuite) TestSwitchBurst() { for i := 0; i < tcs.times; i++ { wreq = tcs.makeWriteRequest() startTime := time.Now() - _, _, err = controller.OnRequestWait(suite.ctx, resourceGroupName3, wreq) + _, _, _, err = controller.OnRequestWait(suite.ctx, resourceGroupName3, wreq) duration += time.Since(startTime) re.NoError(err) } @@ -635,7 +635,7 @@ func (suite *resourceManagerClientTestSuite) TestResourcePenalty() { // init req := controller.NewTestRequestInfo(false, 0, 2 /* store2 */) resp := controller.NewTestResponseInfo(0, time.Duration(30), true) - _, penalty, err := c.OnRequestWait(suite.ctx, resourceGroupName, req) + _, penalty, _, err := c.OnRequestWait(suite.ctx, resourceGroupName, req) re.NoError(err) re.Equal(penalty.WriteBytes, 0.0) re.Equal(penalty.TotalCpuTimeMs, 0.0) @@ -644,7 +644,7 @@ func (suite *resourceManagerClientTestSuite) TestResourcePenalty() { req = controller.NewTestRequestInfo(true, 60, 1 /* store1 */) resp = controller.NewTestResponseInfo(0, time.Duration(10), true) - _, penalty, err = c.OnRequestWait(suite.ctx, resourceGroupName, req) + _, penalty, _, err = c.OnRequestWait(suite.ctx, resourceGroupName, req) re.NoError(err) re.Equal(penalty.WriteBytes, 0.0) re.Equal(penalty.TotalCpuTimeMs, 0.0) @@ -654,7 +654,7 @@ func (suite *resourceManagerClientTestSuite) TestResourcePenalty() { // failed request, shouldn't be counted in penalty req = controller.NewTestRequestInfo(true, 20, 1 /* store1 */) resp = controller.NewTestResponseInfo(0, time.Duration(0), false) - _, penalty, err = c.OnRequestWait(suite.ctx, resourceGroupName, req) + _, penalty, _, err = c.OnRequestWait(suite.ctx, resourceGroupName, req) re.NoError(err) re.Equal(penalty.WriteBytes, 0.0) re.Equal(penalty.TotalCpuTimeMs, 0.0) @@ -664,7 +664,7 @@ func (suite *resourceManagerClientTestSuite) TestResourcePenalty() { // from same store, should be zero req1 := controller.NewTestRequestInfo(false, 0, 1 /* store1 */) resp1 := controller.NewTestResponseInfo(0, time.Duration(10), true) - _, penalty, err = c.OnRequestWait(suite.ctx, resourceGroupName, req1) + _, penalty, _, err = c.OnRequestWait(suite.ctx, resourceGroupName, req1) re.NoError(err) re.Equal(penalty.WriteBytes, 0.0) _, err = c.OnResponse(resourceGroupName, req1, resp1) @@ -673,7 +673,7 @@ func (suite *resourceManagerClientTestSuite) TestResourcePenalty() { // from different store, should be non-zero req2 := controller.NewTestRequestInfo(true, 50, 2 /* store2 */) resp2 := controller.NewTestResponseInfo(0, time.Duration(10), true) - _, penalty, err = c.OnRequestWait(suite.ctx, resourceGroupName, req2) + _, penalty, _, err = c.OnRequestWait(suite.ctx, resourceGroupName, req2) re.NoError(err) re.Equal(penalty.WriteBytes, 60.0) re.InEpsilon(penalty.TotalCpuTimeMs, 10.0/1000.0/1000.0, 1e-6) @@ -683,7 +683,7 @@ func (suite *resourceManagerClientTestSuite) TestResourcePenalty() { // from new store, should be zero req3 := controller.NewTestRequestInfo(true, 0, 3 /* store3 */) resp3 := controller.NewTestResponseInfo(0, time.Duration(10), true) - _, penalty, err = c.OnRequestWait(suite.ctx, resourceGroupName, req3) + _, penalty, _, err = c.OnRequestWait(suite.ctx, resourceGroupName, req3) re.NoError(err) re.Equal(penalty.WriteBytes, 0.0) _, err = c.OnResponse(resourceGroupName, req3, resp3) @@ -693,7 +693,7 @@ func (suite *resourceManagerClientTestSuite) TestResourcePenalty() { resourceGroupName = groupNames[1] req4 := controller.NewTestRequestInfo(true, 50, 1 /* store2 */) resp4 := controller.NewTestResponseInfo(0, time.Duration(10), true) - _, penalty, err = c.OnRequestWait(suite.ctx, resourceGroupName, req4) + _, penalty, _, err = c.OnRequestWait(suite.ctx, resourceGroupName, req4) re.NoError(err) re.Equal(penalty.WriteBytes, 0.0) _, err = c.OnResponse(resourceGroupName, req4, resp4) From 95557847375274910fe624ddae9bbae5f7dca003 Mon Sep 17 00:00:00 2001 From: Yongbo Jiang Date: Thu, 16 Nov 2023 17:11:16 +0800 Subject: [PATCH 05/10] makefile: delete redundant build commands (#7376) ref tikv/pd#7274 Signed-off-by: Cabinfever_B Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- Makefile | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 906dd9414f9..bf76fd57f2f 100644 --- a/Makefile +++ b/Makefile @@ -75,8 +75,6 @@ BUILD_BIN_PATH := $(ROOT_PATH)/bin build: pd-server pd-ctl pd-recover -build-fips: pd-server-fips pd-ctl-fips pd-recover-fips - tools: pd-tso-bench pd-heartbeat-bench regions-dump stores-dump pd-api-bench PD_SERVER_DEP := @@ -100,25 +98,18 @@ pd-server-failpoint: pd-server-basic: SWAGGER=0 DASHBOARD=0 $(MAKE) pd-server -pd-server-fips: - ENABLE_FIPS=1 $(MAKE) pd-server - -.PHONY: build tools pd-server pd-server-basic pd-server-fips +.PHONY: build tools pd-server pd-server-basic # Tools pd-ctl: GOEXPERIMENT=$(BUILD_GOEXPERIMENT) CGO_ENABLED=$(BUILD_TOOL_CGO_ENABLED) go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/pd-ctl tools/pd-ctl/main.go -pd-ctl-fips: - ENABLE_FIPS=1 $(MAKE) pd-ctl pd-tso-bench: cd tools/pd-tso-bench && CGO_ENABLED=0 go build -o $(BUILD_BIN_PATH)/pd-tso-bench main.go pd-api-bench: cd tools/pd-api-bench && CGO_ENABLED=0 go build -o $(BUILD_BIN_PATH)/pd-api-bench main.go pd-recover: GOEXPERIMENT=$(BUILD_GOEXPERIMENT) CGO_ENABLED=$(BUILD_TOOL_CGO_ENABLED) go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/pd-recover tools/pd-recover/main.go -pd-recover-fips: - ENABLE_FIPS=1 $(MAKE) pd-recover pd-analysis: CGO_ENABLED=0 go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/pd-analysis tools/pd-analysis/main.go pd-heartbeat-bench: @@ -130,7 +121,7 @@ regions-dump: stores-dump: CGO_ENABLED=0 go build -gcflags '$(GCFLAGS)' -ldflags '$(LDFLAGS)' -o $(BUILD_BIN_PATH)/stores-dump tools/stores-dump/main.go -.PHONY: pd-ctl pd-ctl-fips pd-tso-bench pd-recover pd-recover-fips pd-analysis pd-heartbeat-bench simulator regions-dump stores-dump pd-api-bench +.PHONY: pd-ctl pd-tso-bench pd-recover pd-analysis pd-heartbeat-bench simulator regions-dump stores-dump pd-api-bench #### Docker image #### From a6800a9dfad52f228f242fec2d335ea9bbaa170b Mon Sep 17 00:00:00 2001 From: iosmanthus Date: Thu, 16 Nov 2023 17:42:46 +0800 Subject: [PATCH 06/10] etcdutil: remove stale client endpoints for `healthyChecker` (#7227) close tikv/pd#7226 remove stale client endpoints for `healthyChecker` Signed-off-by: iosmanthus Co-authored-by: lhy1024 --- pkg/utils/etcdutil/etcdutil.go | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/pkg/utils/etcdutil/etcdutil.go b/pkg/utils/etcdutil/etcdutil.go index e004247c6d0..03c2374efc6 100644 --- a/pkg/utils/etcdutil/etcdutil.go +++ b/pkg/utils/etcdutil/etcdutil.go @@ -382,13 +382,18 @@ func (checker *healthyChecker) patrol(ctx context.Context) []string { } func (checker *healthyChecker) update(eps []string) { + epMap := make(map[string]struct{}) for _, ep := range eps { + epMap[ep] = struct{}{} + } + + for ep := range epMap { // check if client exists, if not, create one, if exists, check if it's offline or disconnected. if client, ok := checker.Load(ep); ok { lastHealthy := client.(*healthyClient).lastHealth if time.Since(lastHealthy) > etcdServerOfflineTimeout { log.Info("some etcd server maybe offline", zap.String("endpoint", ep)) - checker.Delete(ep) + checker.removeClient(ep) } if time.Since(lastHealthy) > etcdServerDisconnectedTimeout { // try to reset client endpoint to trigger reconnect @@ -399,6 +404,16 @@ func (checker *healthyChecker) update(eps []string) { } checker.addClient(ep, time.Now()) } + + // check if there are some stale clients, if exists, remove them. + checker.Range(func(key, value interface{}) bool { + ep := key.(string) + if _, ok := epMap[ep]; !ok { + log.Info("remove stale etcd client", zap.String("endpoint", ep)) + checker.removeClient(ep) + } + return true + }) } func (checker *healthyChecker) addClient(ep string, lastHealth time.Time) { @@ -413,6 +428,15 @@ func (checker *healthyChecker) addClient(ep string, lastHealth time.Time) { }) } +func (checker *healthyChecker) removeClient(ep string) { + if client, ok := checker.LoadAndDelete(ep); ok { + err := client.(*healthyClient).Close() + if err != nil { + log.Error("failed to close etcd healthy client", zap.Error(err)) + } + } +} + func syncUrls(client *clientv3.Client) []string { // See https://github.com/etcd-io/etcd/blob/85b640cee793e25f3837c47200089d14a8392dc7/clientv3/client.go#L170-L183 ctx, cancel := context.WithTimeout(clientv3.WithRequireLeader(client.Ctx()), DefaultRequestTimeout) From f2eaf23e94d2ea267fb423a1058eab17ed5ab754 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 16 Nov 2023 18:45:46 +0800 Subject: [PATCH 07/10] tests: make TestUpdateAfterResetTSO stable (#7385) close tikv/pd#7381 Signed-off-by: lhy1024 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- pkg/member/member.go | 4 ++++ tests/integrations/tso/client_test.go | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pkg/member/member.go b/pkg/member/member.go index 6eddf9a7c77..dd36214a595 100644 --- a/pkg/member/member.go +++ b/pkg/member/member.go @@ -27,6 +27,7 @@ import ( "time" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" "github.com/tikv/pd/pkg/election" @@ -181,6 +182,9 @@ func (m *EmbeddedEtcdMember) GetLastLeaderUpdatedTime() time.Time { // and make it become a PD leader. // leader should be changed when campaign leader frequently. func (m *EmbeddedEtcdMember) CampaignLeader(ctx context.Context, leaseTimeout int64) error { + failpoint.Inject("skipCampaignLeaderCheck", func() { + failpoint.Return(m.leadership.Campaign(leaseTimeout, m.MemberValue())) + }) if len(m.leadership.CampaignTimes) >= campaignLeaderFrequencyTimes { log.Warn("campaign times is too frequent, resign and campaign again", zap.String("leader-name", m.Name()), zap.String("leader-key", m.GetLeaderPath())) diff --git a/tests/integrations/tso/client_test.go b/tests/integrations/tso/client_test.go index 63243214e81..73198690966 100644 --- a/tests/integrations/tso/client_test.go +++ b/tests/integrations/tso/client_test.go @@ -300,7 +300,10 @@ func (suite *tsoClientTestSuite) TestUpdateAfterResetTSO() { re := suite.Require() ctx, cancel := context.WithCancel(suite.ctx) defer cancel() - + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/member/skipCampaignLeaderCheck", "return(true)")) + defer func() { + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/member/skipCampaignLeaderCheck")) + }() for i := 0; i < len(suite.clients); i++ { client := suite.clients[i] testutil.Eventually(re, func() bool { From dda748abe55dffbb9b0b67fa582eb5e7231918f2 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Fri, 17 Nov 2023 12:17:18 +0800 Subject: [PATCH 08/10] client/http: implement more HTTP APIs (#7371) ref tikv/pd#7300 - Implement more HTTP APIs. - Use consts more in `Rule` structure. Signed-off-by: JmPotato --- client/http/api.go | 112 +++++++++++++-- client/http/client.go | 126 ++++++++++++++--- client/http/types.go | 70 ++++++++++ pkg/mock/mockcluster/config.go | 4 +- pkg/schedule/checker/merge_checker_test.go | 4 +- pkg/schedule/checker/rule_checker_test.go | 130 +++++++++--------- pkg/schedule/filter/filters_test.go | 2 +- pkg/schedule/operator/create_operator_test.go | 4 +- pkg/schedule/placement/fit_region_test.go | 46 +++---- .../placement/region_rule_cache_test.go | 12 +- pkg/schedule/placement/rule_manager.go | 21 ++- pkg/schedule/placement/rule_manager_test.go | 118 ++++++++-------- pkg/schedule/placement/rule_test.go | 16 +-- pkg/schedule/scatter/region_scatterer_test.go | 8 +- .../schedulers/balance_witness_test.go | 4 +- pkg/schedule/schedulers/hot_region_test.go | 8 +- pkg/schedule/schedulers/scheduler_test.go | 18 +-- server/api/cluster_test.go | 5 +- server/api/region_test.go | 6 +- server/api/rule.go | 2 +- server/cluster/cluster_test.go | 16 +-- server/server.go | 2 +- tests/integrations/client/http_client_test.go | 43 ++++++ tests/integrations/mcs/scheduling/api_test.go | 6 +- .../integrations/mcs/scheduling/rule_test.go | 10 +- tests/pdctl/config/config_test.go | 62 ++++----- tests/server/api/operator_test.go | 2 +- tests/server/api/rule_test.go | 67 ++++----- .../simulator/cases/diagnose_rule.go | 4 +- 29 files changed, 616 insertions(+), 312 deletions(-) diff --git a/client/http/api.go b/client/http/api.go index 5326919561d..2fae562dd20 100644 --- a/client/http/api.go +++ b/client/http/api.go @@ -17,24 +17,58 @@ package http import ( "fmt" "net/url" + "time" ) // The following constants are the paths of PD HTTP APIs. const ( - HotRead = "/pd/api/v1/hotspot/regions/read" - HotWrite = "/pd/api/v1/hotspot/regions/write" - Regions = "/pd/api/v1/regions" - regionByID = "/pd/api/v1/region/id" - regionByKey = "/pd/api/v1/region/key" - regionsByKey = "/pd/api/v1/regions/key" - regionsByStoreID = "/pd/api/v1/regions/store" - Stores = "/pd/api/v1/stores" + // Metadata + HotRead = "/pd/api/v1/hotspot/regions/read" + HotWrite = "/pd/api/v1/hotspot/regions/write" + HotHistory = "/pd/api/v1/hotspot/regions/history" + RegionByIDPrefix = "/pd/api/v1/region/id" + regionByKey = "/pd/api/v1/region/key" + Regions = "/pd/api/v1/regions" + regionsByKey = "/pd/api/v1/regions/key" + RegionsByStoreIDPrefix = "/pd/api/v1/regions/store" + EmptyRegions = "/pd/api/v1/regions/check/empty-region" + accelerateSchedule = "/pd/api/v1/regions/accelerate-schedule" + store = "/pd/api/v1/store" + Stores = "/pd/api/v1/stores" + StatsRegion = "/pd/api/v1/stats/region" + // Config + Config = "/pd/api/v1/config" + ClusterVersion = "/pd/api/v1/config/cluster-version" + ScheduleConfig = "/pd/api/v1/config/schedule" + ReplicateConfig = "/pd/api/v1/config/replicate" + // Rule + PlacementRule = "/pd/api/v1/config/rule" + PlacementRules = "/pd/api/v1/config/rules" + placementRulesByGroup = "/pd/api/v1/config/rules/group" + RegionLabelRule = "/pd/api/v1/config/region-label/rule" + // Scheduler + Schedulers = "/pd/api/v1/schedulers" + scatterRangeScheduler = "/pd/api/v1/schedulers/scatter-range-" + // Admin + ResetTS = "/pd/api/v1/admin/reset-ts" + BaseAllocID = "/pd/api/v1/admin/base-alloc-id" + SnapshotRecoveringMark = "/pd/api/v1/admin/cluster/markers/snapshot-recovering" + // Debug + PProfProfile = "/pd/api/v1/debug/pprof/profile" + PProfHeap = "/pd/api/v1/debug/pprof/heap" + PProfMutex = "/pd/api/v1/debug/pprof/mutex" + PProfAllocs = "/pd/api/v1/debug/pprof/allocs" + PProfBlock = "/pd/api/v1/debug/pprof/block" + PProfGoroutine = "/pd/api/v1/debug/pprof/goroutine" + // Others MinResolvedTSPrefix = "/pd/api/v1/min-resolved-ts" + Status = "/pd/api/v1/status" + Version = "/pd/api/v1/version" ) // RegionByID returns the path of PD HTTP API to get region by ID. func RegionByID(regionID uint64) string { - return fmt.Sprintf("%s/%d", regionByID, regionID) + return fmt.Sprintf("%s/%d", RegionByIDPrefix, regionID) } // RegionByKey returns the path of PD HTTP API to get region by key. @@ -45,10 +79,66 @@ func RegionByKey(key []byte) string { // RegionsByKey returns the path of PD HTTP API to scan regions with given start key, end key and limit parameters. func RegionsByKey(startKey, endKey []byte, limit int) string { return fmt.Sprintf("%s?start_key=%s&end_key=%s&limit=%d", - regionsByKey, url.QueryEscape(string(startKey)), url.QueryEscape(string(endKey)), limit) + regionsByKey, + url.QueryEscape(string(startKey)), + url.QueryEscape(string(endKey)), + limit) } // RegionsByStoreID returns the path of PD HTTP API to get regions by store ID. func RegionsByStoreID(storeID uint64) string { - return fmt.Sprintf("%s/%d", regionsByStoreID, storeID) + return fmt.Sprintf("%s/%d", RegionsByStoreIDPrefix, storeID) +} + +// RegionStatsByKeyRange returns the path of PD HTTP API to get region stats by start key and end key. +func RegionStatsByKeyRange(startKey, endKey []byte) string { + return fmt.Sprintf("%s?start_key=%s&end_key=%s", + StatsRegion, + url.QueryEscape(string(startKey)), + url.QueryEscape(string(endKey))) +} + +// StoreByID returns the store API with store ID parameter. +func StoreByID(id uint64) string { + return fmt.Sprintf("%s/%d", store, id) +} + +// StoreLabelByID returns the store label API with store ID parameter. +func StoreLabelByID(id uint64) string { + return fmt.Sprintf("%s/%d/label", store, id) +} + +// ConfigWithTTLSeconds returns the config API with the TTL seconds parameter. +func ConfigWithTTLSeconds(ttlSeconds float64) string { + return fmt.Sprintf("%s?ttlSecond=%.0f", Config, ttlSeconds) +} + +// PlacementRulesByGroup returns the path of PD HTTP API to get placement rules by group. +func PlacementRulesByGroup(group string) string { + return fmt.Sprintf("%s/%s", placementRulesByGroup, group) +} + +// PlacementRuleByGroupAndID returns the path of PD HTTP API to get placement rule by group and ID. +func PlacementRuleByGroupAndID(group, id string) string { + return fmt.Sprintf("%s/%s/%s", PlacementRule, group, id) +} + +// SchedulerByName returns the scheduler API with the given scheduler name. +func SchedulerByName(name string) string { + return fmt.Sprintf("%s/%s", Schedulers, name) +} + +// ScatterRangeSchedulerWithName returns the scatter range scheduler API with name parameter. +func ScatterRangeSchedulerWithName(name string) string { + return fmt.Sprintf("%s%s", scatterRangeScheduler, name) +} + +// PProfProfileAPIWithInterval returns the pprof profile API with interval parameter. +func PProfProfileAPIWithInterval(interval time.Duration) string { + return fmt.Sprintf("%s?seconds=%d", PProfProfile, interval/time.Second) +} + +// PProfGoroutineWithDebugLevel returns the pprof goroutine API with debug level parameter. +func PProfGoroutineWithDebugLevel(level int) string { + return fmt.Sprintf("%s?debug=%d", PProfGoroutine, level) } diff --git a/client/http/client.go b/client/http/client.go index 6cb1277dfcb..6fa2dd8cdfd 100644 --- a/client/http/client.go +++ b/client/http/client.go @@ -15,12 +15,14 @@ package http import ( + "bytes" "context" "crypto/tls" "encoding/json" "fmt" "io" "net/http" + "net/url" "strings" "time" @@ -43,12 +45,17 @@ type Client interface { GetRegionByID(context.Context, uint64) (*RegionInfo, error) GetRegionByKey(context.Context, []byte) (*RegionInfo, error) GetRegions(context.Context) (*RegionsInfo, error) - GetRegionsByKey(context.Context, []byte, []byte, int) (*RegionsInfo, error) + GetRegionsByKeyRange(context.Context, []byte, []byte, int) (*RegionsInfo, error) GetRegionsByStoreID(context.Context, uint64) (*RegionsInfo, error) GetHotReadRegions(context.Context) (*StoreHotPeersInfos, error) GetHotWriteRegions(context.Context) (*StoreHotPeersInfos, error) + GetRegionStatusByKeyRange(context.Context, []byte, []byte) (*RegionStats, error) GetStores(context.Context) (*StoresInfo, error) + GetPlacementRulesByGroup(context.Context, string) ([]*Rule, error) + SetPlacementRule(context.Context, *Rule) error + DeletePlacementRule(context.Context, string, string) error GetMinResolvedTSByStoresIDs(context.Context, []uint64) (uint64, map[uint64]uint64, error) + AccelerateSchedule(context.Context, []byte, []byte) error Close() } @@ -154,8 +161,8 @@ func (c *client) execDuration(name string, duration time.Duration) { // it consistent with the current implementation of some clients (e.g. TiDB). func (c *client) requestWithRetry( ctx context.Context, - name, uri string, - res interface{}, + name, uri, method string, + body io.Reader, res interface{}, ) error { var ( err error @@ -163,7 +170,7 @@ func (c *client) requestWithRetry( ) for idx := 0; idx < len(c.pdAddrs); idx++ { addr = c.pdAddrs[idx] - err = c.request(ctx, name, addr, uri, res) + err = c.request(ctx, name, fmt.Sprintf("%s%s", addr, uri), method, body, res) if err == nil { break } @@ -175,16 +182,15 @@ func (c *client) requestWithRetry( func (c *client) request( ctx context.Context, - name, addr, uri string, - res interface{}, + name, url, method string, + body io.Reader, res interface{}, ) error { - reqURL := fmt.Sprintf("%s%s", addr, uri) logFields := []zap.Field{ zap.String("name", name), - zap.String("url", reqURL), + zap.String("url", url), } log.Debug("[pd] request the http url", logFields...) - req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) + req, err := http.NewRequestWithContext(ctx, method, url, body) if err != nil { log.Error("[pd] create http request failed", append(logFields, zap.Error(err))...) return errors.Trace(err) @@ -219,6 +225,10 @@ func (c *client) request( return errors.Errorf("request pd http api failed with status: '%s'", resp.Status) } + if res == nil { + return nil + } + err = json.NewDecoder(resp.Body).Decode(res) if err != nil { return errors.Trace(err) @@ -229,7 +239,9 @@ func (c *client) request( // GetRegionByID gets the region info by ID. func (c *client) GetRegionByID(ctx context.Context, regionID uint64) (*RegionInfo, error) { var region RegionInfo - err := c.requestWithRetry(ctx, "GetRegionByID", RegionByID(regionID), ®ion) + err := c.requestWithRetry(ctx, + "GetRegionByID", RegionByID(regionID), + http.MethodGet, nil, ®ion) if err != nil { return nil, err } @@ -239,7 +251,9 @@ func (c *client) GetRegionByID(ctx context.Context, regionID uint64) (*RegionInf // GetRegionByKey gets the region info by key. func (c *client) GetRegionByKey(ctx context.Context, key []byte) (*RegionInfo, error) { var region RegionInfo - err := c.requestWithRetry(ctx, "GetRegionByKey", RegionByKey(key), ®ion) + err := c.requestWithRetry(ctx, + "GetRegionByKey", RegionByKey(key), + http.MethodGet, nil, ®ion) if err != nil { return nil, err } @@ -249,17 +263,21 @@ func (c *client) GetRegionByKey(ctx context.Context, key []byte) (*RegionInfo, e // GetRegions gets the regions info. func (c *client) GetRegions(ctx context.Context) (*RegionsInfo, error) { var regions RegionsInfo - err := c.requestWithRetry(ctx, "GetRegions", Regions, ®ions) + err := c.requestWithRetry(ctx, + "GetRegions", Regions, + http.MethodGet, nil, ®ions) if err != nil { return nil, err } return ®ions, nil } -// GetRegionsByKey gets the regions info by key range. If the limit is -1, it will return all regions within the range. -func (c *client) GetRegionsByKey(ctx context.Context, startKey, endKey []byte, limit int) (*RegionsInfo, error) { +// GetRegionsByKeyRange gets the regions info by key range. If the limit is -1, it will return all regions within the range. +func (c *client) GetRegionsByKeyRange(ctx context.Context, startKey, endKey []byte, limit int) (*RegionsInfo, error) { var regions RegionsInfo - err := c.requestWithRetry(ctx, "GetRegionsByKey", RegionsByKey(startKey, endKey, limit), ®ions) + err := c.requestWithRetry(ctx, + "GetRegionsByKeyRange", RegionsByKey(startKey, endKey, limit), + http.MethodGet, nil, ®ions) if err != nil { return nil, err } @@ -269,7 +287,9 @@ func (c *client) GetRegionsByKey(ctx context.Context, startKey, endKey []byte, l // GetRegionsByStoreID gets the regions info by store ID. func (c *client) GetRegionsByStoreID(ctx context.Context, storeID uint64) (*RegionsInfo, error) { var regions RegionsInfo - err := c.requestWithRetry(ctx, "GetRegionsByStoreID", RegionsByStoreID(storeID), ®ions) + err := c.requestWithRetry(ctx, + "GetRegionsByStoreID", RegionsByStoreID(storeID), + http.MethodGet, nil, ®ions) if err != nil { return nil, err } @@ -279,7 +299,9 @@ func (c *client) GetRegionsByStoreID(ctx context.Context, storeID uint64) (*Regi // GetHotReadRegions gets the hot read region statistics info. func (c *client) GetHotReadRegions(ctx context.Context) (*StoreHotPeersInfos, error) { var hotReadRegions StoreHotPeersInfos - err := c.requestWithRetry(ctx, "GetHotReadRegions", HotRead, &hotReadRegions) + err := c.requestWithRetry(ctx, + "GetHotReadRegions", HotRead, + http.MethodGet, nil, &hotReadRegions) if err != nil { return nil, err } @@ -289,23 +311,70 @@ func (c *client) GetHotReadRegions(ctx context.Context) (*StoreHotPeersInfos, er // GetHotWriteRegions gets the hot write region statistics info. func (c *client) GetHotWriteRegions(ctx context.Context) (*StoreHotPeersInfos, error) { var hotWriteRegions StoreHotPeersInfos - err := c.requestWithRetry(ctx, "GetHotWriteRegions", HotWrite, &hotWriteRegions) + err := c.requestWithRetry(ctx, + "GetHotWriteRegions", HotWrite, + http.MethodGet, nil, &hotWriteRegions) if err != nil { return nil, err } return &hotWriteRegions, nil } +// GetRegionStatusByKeyRange gets the region status by key range. +func (c *client) GetRegionStatusByKeyRange(ctx context.Context, startKey, endKey []byte) (*RegionStats, error) { + var regionStats RegionStats + err := c.requestWithRetry(ctx, + "GetRegionStatusByKeyRange", RegionStatsByKeyRange(startKey, endKey), + http.MethodGet, nil, ®ionStats, + ) + if err != nil { + return nil, err + } + return ®ionStats, nil +} + // GetStores gets the stores info. func (c *client) GetStores(ctx context.Context) (*StoresInfo, error) { var stores StoresInfo - err := c.requestWithRetry(ctx, "GetStores", Stores, &stores) + err := c.requestWithRetry(ctx, + "GetStores", Stores, + http.MethodGet, nil, &stores) if err != nil { return nil, err } return &stores, nil } +// GetPlacementRulesByGroup gets the placement rules by group. +func (c *client) GetPlacementRulesByGroup(ctx context.Context, group string) ([]*Rule, error) { + var rules []*Rule + err := c.requestWithRetry(ctx, + "GetPlacementRulesByGroup", PlacementRulesByGroup(group), + http.MethodGet, nil, &rules) + if err != nil { + return nil, err + } + return rules, nil +} + +// SetPlacementRule sets the placement rule. +func (c *client) SetPlacementRule(ctx context.Context, rule *Rule) error { + ruleJSON, err := json.Marshal(rule) + if err != nil { + return errors.Trace(err) + } + return c.requestWithRetry(ctx, + "SetPlacementRule", PlacementRule, + http.MethodPost, bytes.NewBuffer(ruleJSON), nil) +} + +// DeletePlacementRule deletes the placement rule. +func (c *client) DeletePlacementRule(ctx context.Context, group, id string) error { + return c.requestWithRetry(ctx, + "DeletePlacementRule", PlacementRuleByGroupAndID(group, id), + http.MethodDelete, nil, nil) +} + // GetMinResolvedTSByStoresIDs get min-resolved-ts by stores IDs. func (c *client) GetMinResolvedTSByStoresIDs(ctx context.Context, storeIDs []uint64) (uint64, map[uint64]uint64, error) { uri := MinResolvedTSPrefix @@ -326,7 +395,9 @@ func (c *client) GetMinResolvedTSByStoresIDs(ctx context.Context, storeIDs []uin IsRealTime bool `json:"is_real_time,omitempty"` StoresMinResolvedTS map[uint64]uint64 `json:"stores_min_resolved_ts"` }{} - err := c.requestWithRetry(ctx, "GetMinResolvedTSByStoresIDs", uri, &resp) + err := c.requestWithRetry(ctx, + "GetMinResolvedTSByStoresIDs", uri, + http.MethodGet, nil, &resp) if err != nil { return 0, nil, err } @@ -335,3 +406,18 @@ func (c *client) GetMinResolvedTSByStoresIDs(ctx context.Context, storeIDs []uin } return resp.MinResolvedTS, resp.StoresMinResolvedTS, nil } + +// AccelerateSchedule accelerates the scheduling of the regions within the given key range. +func (c *client) AccelerateSchedule(ctx context.Context, startKey, endKey []byte) error { + input := map[string]string{ + "start_key": url.QueryEscape(string(startKey)), + "end_key": url.QueryEscape(string(endKey)), + } + inputJSON, err := json.Marshal(input) + if err != nil { + return errors.Trace(err) + } + return c.requestWithRetry(ctx, + "AccelerateSchedule", accelerateSchedule, + http.MethodPost, bytes.NewBuffer(inputJSON), nil) +} diff --git a/client/http/types.go b/client/http/types.go index 66eb31ec3a1..c6bb0256c14 100644 --- a/client/http/types.go +++ b/client/http/types.go @@ -176,3 +176,73 @@ type StoreStatus struct { LastHeartbeatTS time.Time `json:"last_heartbeat_ts"` Uptime string `json:"uptime"` } + +// RegionStats stores the statistics of regions. +type RegionStats struct { + Count int `json:"count"` + EmptyCount int `json:"empty_count"` + StorageSize int64 `json:"storage_size"` + StorageKeys int64 `json:"storage_keys"` + StoreLeaderCount map[uint64]int `json:"store_leader_count"` + StorePeerCount map[uint64]int `json:"store_peer_count"` +} + +// PeerRoleType is the expected peer type of the placement rule. +type PeerRoleType string + +const ( + // Voter can either match a leader peer or follower peer + Voter PeerRoleType = "voter" + // Leader matches a leader. + Leader PeerRoleType = "leader" + // Follower matches a follower. + Follower PeerRoleType = "follower" + // Learner matches a learner. + Learner PeerRoleType = "learner" +) + +// LabelConstraint is used to filter store when trying to place peer of a region. +type LabelConstraint struct { + Key string `json:"key,omitempty"` + Op LabelConstraintOp `json:"op,omitempty"` + Values []string `json:"values,omitempty"` +} + +// LabelConstraintOp defines how a LabelConstraint matches a store. It can be one of +// 'in', 'notIn', 'exists', or 'notExists'. +type LabelConstraintOp string + +const ( + // In restricts the store label value should in the value list. + // If label does not exist, `in` is always false. + In LabelConstraintOp = "in" + // NotIn restricts the store label value should not in the value list. + // If label does not exist, `notIn` is always true. + NotIn LabelConstraintOp = "notIn" + // Exists restricts the store should have the label. + Exists LabelConstraintOp = "exists" + // NotExists restricts the store should not have the label. + NotExists LabelConstraintOp = "notExists" +) + +// Rule is the placement rule that can be checked against a region. When +// applying rules (apply means schedule regions to match selected rules), the +// apply order is defined by the tuple [GroupIndex, GroupID, Index, ID]. +type Rule struct { + GroupID string `json:"group_id"` // mark the source that add the rule + ID string `json:"id"` // unique ID within a group + Index int `json:"index,omitempty"` // rule apply order in a group, rule with less ID is applied first when indexes are equal + Override bool `json:"override,omitempty"` // when it is true, all rules with less indexes are disabled + StartKey []byte `json:"-"` // range start key + StartKeyHex string `json:"start_key"` // hex format start key, for marshal/unmarshal + EndKey []byte `json:"-"` // range end key + EndKeyHex string `json:"end_key"` // hex format end key, for marshal/unmarshal + Role PeerRoleType `json:"role"` // expected role of the peers + IsWitness bool `json:"is_witness"` // when it is true, it means the role is also a witness + Count int `json:"count"` // expected count of the peers + LabelConstraints []LabelConstraint `json:"label_constraints,omitempty"` // used to select stores to place peers + LocationLabels []string `json:"location_labels,omitempty"` // used to make peers isolated physically + IsolationLevel string `json:"isolation_level,omitempty"` // used to isolate replicas explicitly and forcibly + Version uint64 `json:"version,omitempty"` // only set at runtime, add 1 each time rules updated, begin from 0. + CreateTimestamp uint64 `json:"create_timestamp,omitempty"` // only set at runtime, recorded rule create timestamp +} diff --git a/pkg/mock/mockcluster/config.go b/pkg/mock/mockcluster/config.go index 6febba026e8..a2e11b43deb 100644 --- a/pkg/mock/mockcluster/config.go +++ b/pkg/mock/mockcluster/config.go @@ -154,8 +154,8 @@ func (mc *Cluster) SetMaxReplicasWithLabel(enablePlacementRules bool, num int, l } if enablePlacementRules { rule := &placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Index: 1, StartKey: []byte(""), EndKey: []byte(""), diff --git a/pkg/schedule/checker/merge_checker_test.go b/pkg/schedule/checker/merge_checker_test.go index 6478eb0b2c4..5e9311c76cd 100644 --- a/pkg/schedule/checker/merge_checker_test.go +++ b/pkg/schedule/checker/merge_checker_test.go @@ -188,7 +188,7 @@ func (suite *mergeCheckerTestSuite) TestBasic() { // merge cannot across rule key. suite.cluster.SetEnablePlacementRules(true) suite.cluster.RuleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 1, Override: true, @@ -202,7 +202,7 @@ func (suite *mergeCheckerTestSuite) TestBasic() { suite.NotNil(ops) suite.Equal(suite.regions[2].GetID(), ops[0].RegionID()) suite.Equal(suite.regions[1].GetID(), ops[1].RegionID()) - suite.cluster.RuleManager.DeleteRule("pd", "test") + suite.cluster.RuleManager.DeleteRule(placement.DefaultGroupID, "test") // check 'merge_option' label suite.cluster.GetRegionLabeler().SetLabelRule(&labeler.LabelRule{ diff --git a/pkg/schedule/checker/rule_checker_test.go b/pkg/schedule/checker/rule_checker_test.go index 4185ce6c167..e77830fac49 100644 --- a/pkg/schedule/checker/rule_checker_test.go +++ b/pkg/schedule/checker/rule_checker_test.go @@ -88,7 +88,7 @@ func (suite *ruleCheckerTestSuite) TestAddRulePeerWithIsolationLevel() { suite.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z1", "rack": "r3", "host": "h1"}) suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 100, Override: true, @@ -101,7 +101,7 @@ func (suite *ruleCheckerTestSuite) TestAddRulePeerWithIsolationLevel() { suite.Nil(op) suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 3) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 100, Override: true, @@ -125,9 +125,9 @@ func (suite *ruleCheckerTestSuite) TestReplaceDownPeerWithIsolationLevel() { suite.cluster.AddLabelsStore(5, 1, map[string]string{"zone": "z3", "host": "h5"}) suite.cluster.AddLabelsStore(6, 1, map[string]string{"zone": "z3", "host": "h6"}) suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 3, 5) - suite.ruleManager.DeleteRule("pd", "default") + suite.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 100, Override: true, @@ -331,7 +331,7 @@ func (suite *ruleCheckerTestSuite) TestFixOrphanPeers2() { suite.cluster.AddLabelsStore(3, 1, map[string]string{"foo": "baz"}) suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 3) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Override: true, @@ -367,7 +367,7 @@ func (suite *ruleCheckerTestSuite) TestFixRoleLeader() { suite.cluster.AddLabelsStore(3, 1, map[string]string{"role": "voter"}) suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2, 3) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Override: true, @@ -378,7 +378,7 @@ func (suite *ruleCheckerTestSuite) TestFixRoleLeader() { }, }) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r2", Index: 101, Role: placement.Follower, @@ -398,7 +398,7 @@ func (suite *ruleCheckerTestSuite) TestFixRoleLeaderIssue3130() { suite.cluster.AddLabelsStore(2, 1, map[string]string{"role": "leader"}) suite.cluster.AddLeaderRegion(1, 1, 2) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Override: true, @@ -471,7 +471,7 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness() { suite.cluster.AddLeaderRegion(1, 1) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Override: true, @@ -497,7 +497,7 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness2() { suite.cluster.AddLeaderRegion(1, 1, 2, 3, 4) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Override: false, @@ -544,8 +544,8 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness4() { err := suite.ruleManager.SetRules([]*placement.Rule{ { - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Index: 100, Override: true, Role: placement.Voter, @@ -553,7 +553,7 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness4() { IsWitness: false, }, { - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Override: false, @@ -580,7 +580,7 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness5() { suite.cluster.AddLeaderRegion(1, 1, 2, 3) err := suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Override: true, @@ -603,15 +603,15 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness6() { err := suite.ruleManager.SetRules([]*placement.Rule{ { - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Index: 100, Role: placement.Voter, IsWitness: false, Count: 2, }, { - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Role: placement.Voter, @@ -641,15 +641,15 @@ func (suite *ruleCheckerTestSuite) TestDisableWitness() { err := suite.ruleManager.SetRules([]*placement.Rule{ { - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Index: 100, Role: placement.Voter, IsWitness: false, Count: 2, }, { - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Role: placement.Voter, @@ -680,7 +680,7 @@ func (suite *ruleCheckerTestSuite) TestBetterReplacement() { suite.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host3"}) suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2, 3) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 100, Override: true, @@ -704,7 +704,7 @@ func (suite *ruleCheckerTestSuite) TestBetterReplacement2() { suite.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z2", "host": "host1"}) suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2, 3) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 100, Override: true, @@ -727,7 +727,7 @@ func (suite *ruleCheckerTestSuite) TestNoBetterReplacement() { suite.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host2"}) suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2, 3) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 100, Override: true, @@ -835,8 +835,8 @@ func (suite *ruleCheckerTestSuite) TestFixOrphanPeerWithDisconnectedStoreAndRule suite.cluster.AddLabelsStore(5, 1, map[string]string{"host": "host5"}) suite.cluster.AddLeaderRegionWithRange(1, "", "", leader, followers...) rule := &placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 5, StartKey: []byte{}, @@ -853,8 +853,8 @@ func (suite *ruleCheckerTestSuite) TestFixOrphanPeerWithDisconnectedStoreAndRule // change rule to 3 replicas rule = &placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3, StartKey: []byte{}, @@ -941,8 +941,8 @@ func (suite *ruleCheckerTestSuite) TestFixOrphanPeerWithDisconnectedStoreAndRule suite.cluster.AddLeaderRegionWithRange(1, "", "", leader, voterFollowers...) err := suite.ruleManager.SetRules([]*placement.Rule{ { - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Index: 100, Override: true, Role: placement.Voter, @@ -950,7 +950,7 @@ func (suite *ruleCheckerTestSuite) TestFixOrphanPeerWithDisconnectedStoreAndRule IsWitness: false, }, { - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Index: 100, Override: false, @@ -975,10 +975,10 @@ func (suite *ruleCheckerTestSuite) TestFixOrphanPeerWithDisconnectedStoreAndRule suite.cluster.SetStoreDisconnect(testCase[2]) // change rule to 3 replicas - suite.ruleManager.DeleteRule("pd", "r1") + suite.ruleManager.DeleteRule(placement.DefaultGroupID, "r1") suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3, StartKey: []byte{}, @@ -1106,13 +1106,13 @@ func (suite *ruleCheckerTestSuite) TestPriorityFitHealthPeersAndTiFlash() { suite.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4", "engine": "tiflash"}) suite.cluster.AddRegionWithLearner(1, 1, []uint64{2, 3}, []uint64{4}) rule := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Role: placement.Voter, Count: 3, } rule2 := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test2", Role: placement.Learner, Count: 1, @@ -1126,7 +1126,7 @@ func (suite *ruleCheckerTestSuite) TestPriorityFitHealthPeersAndTiFlash() { } suite.ruleManager.SetRule(rule) suite.ruleManager.SetRule(rule2) - suite.ruleManager.DeleteRule("pd", "default") + suite.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) r1 := suite.cluster.GetRegion(1) // set peer3 to pending and down @@ -1177,12 +1177,12 @@ func (suite *ruleCheckerTestSuite) TestIssue3293() { suite.cluster.DeleteStore(suite.cluster.GetStore(5)) err = suite.ruleManager.SetRule(&placement.Rule{ GroupID: "TiDB_DDL_51", - ID: "default", + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3, }) suite.NoError(err) - err = suite.ruleManager.DeleteRule("pd", "default") + err = suite.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) suite.NoError(err) op := suite.rc.Check(suite.cluster.GetRegion(1)) suite.NotNil(op) @@ -1290,7 +1290,7 @@ func (suite *ruleCheckerTestSuite) TestFixDownPeer() { suite.cluster.AddLabelsStore(5, 1, map[string]string{"zone": "z3"}) suite.cluster.AddLeaderRegion(1, 1, 3, 4) rule := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 100, Override: true, @@ -1346,13 +1346,13 @@ func (suite *ruleCheckerTestSuite) TestFixDownWitnessPeer() { r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(2)})) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 2, }) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Role: placement.Voter, Count: 1, @@ -1379,13 +1379,13 @@ func (suite *ruleCheckerTestSuite) TestFixDownPeerWithAvailableWitness() { r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(3)})) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 2, }) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Role: placement.Voter, Count: 1, @@ -1417,13 +1417,13 @@ func (suite *ruleCheckerTestSuite) TestFixDownPeerWithAvailableWitness2() { r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(3)})) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 2, }) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Role: placement.Voter, Count: 1, @@ -1451,13 +1451,13 @@ func (suite *ruleCheckerTestSuite) TestFixDownPeerWithAvailableWitness3() { r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(3)})) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 2, }) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Role: placement.Voter, Count: 1, @@ -1508,7 +1508,7 @@ func (suite *ruleCheckerTestSuite) TestFixOfflinePeer() { suite.cluster.AddLabelsStore(5, 1, map[string]string{"zone": "z3"}) suite.cluster.AddLeaderRegion(1, 1, 3, 4) rule := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 100, Override: true, @@ -1543,13 +1543,13 @@ func (suite *ruleCheckerTestSuite) TestFixOfflinePeerWithAvaliableWitness() { r := suite.cluster.GetRegion(1) r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(2)})) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 2, }) suite.ruleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "r1", Role: placement.Voter, Count: 1, @@ -1573,7 +1573,7 @@ func (suite *ruleCheckerTestSuite) TestRuleCache() { suite.cluster.AddRegionStore(999, 1) suite.cluster.AddLeaderRegion(1, 1, 3, 4) rule := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Index: 100, Override: true, @@ -1592,7 +1592,7 @@ func (suite *ruleCheckerTestSuite) TestRuleCache() { stillCached bool }{ { - name: "default", + name: placement.DefaultRuleID, region: region, stillCached: true, }, @@ -1718,7 +1718,7 @@ func (suite *ruleCheckerTestSuite) TestDemoteVoter() { suite.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z4"}) region := suite.cluster.AddLeaderRegion(1, 1, 4) rule := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test", Role: placement.Voter, Count: 1, @@ -1731,7 +1731,7 @@ func (suite *ruleCheckerTestSuite) TestDemoteVoter() { }, } rule2 := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test2", Role: placement.Learner, Count: 1, @@ -1745,7 +1745,7 @@ func (suite *ruleCheckerTestSuite) TestDemoteVoter() { } suite.ruleManager.SetRule(rule) suite.ruleManager.SetRule(rule2) - suite.ruleManager.DeleteRule("pd", "default") + suite.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) op := suite.rc.Check(region) suite.NotNil(op) suite.Equal("fix-demote-voter", op.Desc()) @@ -1807,7 +1807,7 @@ func (suite *ruleCheckerTestSuite) TestLocationLabels() { suite.cluster.AddLabelsStore(6, 1, map[string]string{"zone": "z2", "rack": "r3", "host": "h2"}) suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2, 5) rule1 := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test1", Role: placement.Leader, Count: 1, @@ -1821,7 +1821,7 @@ func (suite *ruleCheckerTestSuite) TestLocationLabels() { LocationLabels: []string{"rack"}, } rule2 := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test2", Role: placement.Voter, Count: 1, @@ -1835,7 +1835,7 @@ func (suite *ruleCheckerTestSuite) TestLocationLabels() { LocationLabels: []string{"rack"}, } rule3 := &placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test3", Role: placement.Voter, Count: 1, @@ -1851,7 +1851,7 @@ func (suite *ruleCheckerTestSuite) TestLocationLabels() { suite.ruleManager.SetRule(rule1) suite.ruleManager.SetRule(rule2) suite.ruleManager.SetRule(rule3) - suite.ruleManager.DeleteRule("pd", "default") + suite.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) op := suite.rc.Check(suite.cluster.GetRegion(1)) suite.NotNil(op) suite.Equal("move-to-better-location", op.Desc()) @@ -1882,7 +1882,7 @@ func (suite *ruleCheckerTestSuite) TestTiFlashLocationLabels() { }, } suite.ruleManager.SetRule(rule1) - rule := suite.ruleManager.GetRule("pd", "default") + rule := suite.ruleManager.GetRule(placement.DefaultGroupID, placement.DefaultRuleID) rule.LocationLabels = []string{"zone", "rack", "host"} suite.ruleManager.SetRule(rule) op := suite.rc.Check(suite.cluster.GetRegion(1)) diff --git a/pkg/schedule/filter/filters_test.go b/pkg/schedule/filter/filters_test.go index fa085890694..f030dff81a4 100644 --- a/pkg/schedule/filter/filters_test.go +++ b/pkg/schedule/filter/filters_test.go @@ -159,7 +159,7 @@ func TestRuleFitFilterWithPlacementRule(t *testing.T) { testCluster := mockcluster.NewCluster(ctx, opt) testCluster.SetEnablePlacementRules(true) ruleManager := testCluster.RuleManager - ruleManager.DeleteRule("pd", "default") + ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) err := ruleManager.SetRules([]*placement.Rule{ { GroupID: "test", diff --git a/pkg/schedule/operator/create_operator_test.go b/pkg/schedule/operator/create_operator_test.go index 08a30680303..2fcd45d11f2 100644 --- a/pkg/schedule/operator/create_operator_test.go +++ b/pkg/schedule/operator/create_operator_test.go @@ -1145,8 +1145,8 @@ func TestCreateLeaveJointStateOperatorWithoutFitRules(t *testing.T) { cluster := mockcluster.NewCluster(ctx, opts) re.NoError(cluster.SetRules([]*placement.Rule{ { - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, StartKeyHex: hex.EncodeToString([]byte("")), EndKeyHex: hex.EncodeToString([]byte("")), Role: placement.Voter, diff --git a/pkg/schedule/placement/fit_region_test.go b/pkg/schedule/placement/fit_region_test.go index 0ec67b2a2aa..5bc62d9cc12 100644 --- a/pkg/schedule/placement/fit_region_test.go +++ b/pkg/schedule/placement/fit_region_test.go @@ -55,8 +55,8 @@ func (ms mockStoresSet) GetStore(id uint64) *core.StoreInfo { func addExtraRules(extraRules int) []*Rule { rules := make([]*Rule, 0) rules = append(rules, &Rule{ - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Voter, Count: 3, LocationLabels: []string{}, @@ -110,8 +110,8 @@ func BenchmarkFitRegion(b *testing.B) { region := mockRegion(3, 0) rules := []*Rule{ { - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Voter, Count: 3, LocationLabels: []string{}, @@ -129,8 +129,8 @@ func BenchmarkFitRegionMoreStores(b *testing.B) { region := mockRegion(3, 0) rules := []*Rule{ { - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Voter, Count: 3, LocationLabels: []string{}, @@ -148,8 +148,8 @@ func BenchmarkFitRegionMorePeers(b *testing.B) { region := mockRegion(5, 0) rules := []*Rule{ { - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Voter, Count: 5, LocationLabels: []string{}, @@ -167,14 +167,14 @@ func BenchmarkFitRegionMorePeersEquals(b *testing.B) { region := mockRegion(3, 0) rules := []*Rule{ { - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Leader, Count: 1, LocationLabels: []string{}, }, { - GroupID: "pd", + GroupID: DefaultGroupID, ID: "default-2", Role: Follower, Count: 4, @@ -193,8 +193,8 @@ func BenchmarkFitRegionMorePeersSplitRules(b *testing.B) { region := mockRegion(3, 0) rules := []*Rule{ { - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Leader, Count: 1, LocationLabels: []string{}, @@ -202,7 +202,7 @@ func BenchmarkFitRegionMorePeersSplitRules(b *testing.B) { } for i := 0; i < 4; i++ { rules = append(rules, &Rule{ - GroupID: "pd", + GroupID: DefaultGroupID, ID: fmt.Sprintf("%v", i), Role: Follower, Count: 1, @@ -221,8 +221,8 @@ func BenchmarkFitRegionMoreVotersSplitRules(b *testing.B) { region := mockRegion(5, 0) rules := []*Rule{ { - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Voter, Count: 1, LocationLabels: []string{}, @@ -230,7 +230,7 @@ func BenchmarkFitRegionMoreVotersSplitRules(b *testing.B) { } for i := 0; i < 4; i++ { rules = append(rules, &Rule{ - GroupID: "pd", + GroupID: DefaultGroupID, ID: fmt.Sprintf("%v", i), Role: Voter, Count: 1, @@ -260,7 +260,7 @@ func BenchmarkFitRegionCrossRegion(b *testing.B) { region := mockRegion(5, 0) rules := make([]*Rule, 0) rules = append(rules, &Rule{ - GroupID: "pd", + GroupID: DefaultGroupID, ID: "1", Role: Leader, Count: 1, @@ -268,7 +268,7 @@ func BenchmarkFitRegionCrossRegion(b *testing.B) { }) for i := 0; i < 2; i++ { rules = append(rules, &Rule{ - GroupID: "pd", + GroupID: DefaultGroupID, ID: fmt.Sprintf("%v", i), Role: Follower, Count: 1, @@ -289,7 +289,7 @@ func BenchmarkFitRegionWithMoreRulesAndStoreLabels(b *testing.B) { // create 100 rules, with each rule has 101 LabelConstraints. for i := 0; i < 100; i++ { rule := &Rule{ - GroupID: "pd", + GroupID: DefaultGroupID, ID: fmt.Sprintf("%v", i), Role: Follower, Count: 3, @@ -351,7 +351,7 @@ func BenchmarkFitRegionWithLocationLabels(b *testing.B) { region := mockRegion(5, 5) rules := []*Rule{} rule := &Rule{ - GroupID: "pd", + GroupID: DefaultGroupID, ID: "followers", Role: Follower, Count: 3, @@ -360,7 +360,7 @@ func BenchmarkFitRegionWithLocationLabels(b *testing.B) { } rules = append(rules, rule) rule = &Rule{ - GroupID: "pd", + GroupID: DefaultGroupID, ID: "learner", Role: Learner, Count: 3, @@ -369,7 +369,7 @@ func BenchmarkFitRegionWithLocationLabels(b *testing.B) { } rules = append(rules, rule) rule = &Rule{ - GroupID: "pd", + GroupID: DefaultGroupID, ID: "voters", Role: Voter, Count: 4, diff --git a/pkg/schedule/placement/region_rule_cache_test.go b/pkg/schedule/placement/region_rule_cache_test.go index b4164e85530..835203bed26 100644 --- a/pkg/schedule/placement/region_rule_cache_test.go +++ b/pkg/schedule/placement/region_rule_cache_test.go @@ -99,8 +99,8 @@ func TestRegionRuleFitCache(t *testing.T) { region: mockRegion(3, 0), rules: []*Rule{ { - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Voter, Count: 4, Version: 1, @@ -114,8 +114,8 @@ func TestRegionRuleFitCache(t *testing.T) { region: mockRegion(3, 0), rules: []*Rule{ { - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Voter, Count: 3, CreateTimestamp: 1, @@ -141,7 +141,7 @@ func TestRegionRuleFitCache(t *testing.T) { region: mockRegion(3, 0), rules: []*Rule{ { - GroupID: "pd", + GroupID: DefaultGroupID, ID: "default-2", Role: Voter, Count: 3, @@ -155,7 +155,7 @@ func TestRegionRuleFitCache(t *testing.T) { region: nil, rules: []*Rule{ { - GroupID: "pd", + GroupID: DefaultGroupID, ID: "default-2", Role: Voter, Count: 3, diff --git a/pkg/schedule/placement/rule_manager.go b/pkg/schedule/placement/rule_manager.go index a7e169b74aa..e25b8802b45 100644 --- a/pkg/schedule/placement/rule_manager.go +++ b/pkg/schedule/placement/rule_manager.go @@ -37,6 +37,15 @@ import ( "golang.org/x/exp/slices" ) +const ( + // DefaultGroupID is the default rule group ID. + DefaultGroupID = "pd" + // DefaultRuleID is the default rule ID. + DefaultRuleID = "default" + // defaultWitnessRuleID is the default witness rule ID. + defaultWitnessRuleID = "witness" +) + // RuleManager is responsible for the lifecycle of all placement Rules. // It is thread safe. type RuleManager struct { @@ -88,16 +97,16 @@ func (m *RuleManager) Initialize(maxReplica int, locationLabels []string, isolat defaultRules = append(defaultRules, []*Rule{ { - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Voter, Count: maxReplica - witnessCount, LocationLabels: locationLabels, IsolationLevel: isolationLevel, }, { - GroupID: "pd", - ID: "witness", + GroupID: DefaultGroupID, + ID: defaultWitnessRuleID, Role: Voter, Count: witnessCount, IsWitness: true, @@ -108,8 +117,8 @@ func (m *RuleManager) Initialize(maxReplica int, locationLabels []string, isolat ) } else { defaultRules = append(defaultRules, &Rule{ - GroupID: "pd", - ID: "default", + GroupID: DefaultGroupID, + ID: DefaultRuleID, Role: Voter, Count: maxReplica, LocationLabels: locationLabels, diff --git a/pkg/schedule/placement/rule_manager_test.go b/pkg/schedule/placement/rule_manager_test.go index dad50a2d881..68a18b538d4 100644 --- a/pkg/schedule/placement/rule_manager_test.go +++ b/pkg/schedule/placement/rule_manager_test.go @@ -44,8 +44,8 @@ func TestDefault(t *testing.T) { _, manager := newTestManager(t, false) rules := manager.GetAllRules() re.Len(rules, 1) - re.Equal("pd", rules[0].GroupID) - re.Equal("default", rules[0].ID) + re.Equal(DefaultGroupID, rules[0].GroupID) + re.Equal(DefaultRuleID, rules[0].ID) re.Equal(0, rules[0].Index) re.Empty(rules[0].StartKey) re.Empty(rules[0].EndKey) @@ -58,15 +58,15 @@ func TestDefault2(t *testing.T) { _, manager := newTestManager(t, true) rules := manager.GetAllRules() re.Len(rules, 2) - re.Equal("pd", rules[0].GroupID) - re.Equal("default", rules[0].ID) + re.Equal(DefaultGroupID, rules[0].GroupID) + re.Equal(DefaultRuleID, rules[0].ID) re.Equal(0, rules[0].Index) re.Empty(rules[0].StartKey) re.Empty(rules[0].EndKey) re.Equal(Voter, rules[0].Role) re.Equal([]string{"zone", "rack", "host"}, rules[0].LocationLabels) - re.Equal("pd", rules[1].GroupID) - re.Equal("witness", rules[1].ID) + re.Equal(DefaultGroupID, rules[1].GroupID) + re.Equal(defaultWitnessRuleID, rules[1].ID) re.Equal(0, rules[1].Index) re.Empty(rules[1].StartKey) re.Empty(rules[1].EndKey) @@ -79,16 +79,16 @@ func TestAdjustRule(t *testing.T) { re := require.New(t) _, manager := newTestManager(t, false) rules := []Rule{ - {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: 3}, - {GroupID: "", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: 3}, - {GroupID: "group", ID: "", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: 3}, - {GroupID: "group", ID: "id", StartKeyHex: "123ab", EndKeyHex: "123abf", Role: "voter", Count: 3}, - {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "1123abf", Role: "voter", Count: 3}, - {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123aaa", Role: "voter", Count: 3}, + {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: 3}, + {GroupID: "", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: 3}, + {GroupID: "group", ID: "", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: 3}, + {GroupID: "group", ID: "id", StartKeyHex: "123ab", EndKeyHex: "123abf", Role: Voter, Count: 3}, + {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "1123abf", Role: Voter, Count: 3}, + {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123aaa", Role: Voter, Count: 3}, {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "master", Count: 3}, - {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: 0}, - {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: -1}, - {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: 3, LabelConstraints: []LabelConstraint{{Op: "foo"}}}, + {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: 0}, + {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: -1}, + {GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: 3, LabelConstraints: []LabelConstraint{{Op: "foo"}}}, } re.NoError(manager.adjustRule(&rules[0], "group")) @@ -101,17 +101,17 @@ func TestAdjustRule(t *testing.T) { } manager.SetKeyType(constant.Table.String()) - re.Error(manager.adjustRule(&Rule{GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: 3}, "group")) + re.Error(manager.adjustRule(&Rule{GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: 3}, "group")) manager.SetKeyType(constant.Txn.String()) - re.Error(manager.adjustRule(&Rule{GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: 3}, "group")) + re.Error(manager.adjustRule(&Rule{GroupID: "group", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: 3}, "group")) re.Error(manager.adjustRule(&Rule{ GroupID: "group", ID: "id", StartKeyHex: hex.EncodeToString(codec.EncodeBytes([]byte{0})), EndKeyHex: "123abf", - Role: "voter", + Role: Voter, Count: 3, }, "group")) @@ -120,7 +120,7 @@ func TestAdjustRule(t *testing.T) { ID: "id", StartKeyHex: hex.EncodeToString(codec.EncodeBytes([]byte{0})), EndKeyHex: hex.EncodeToString(codec.EncodeBytes([]byte{1})), - Role: "learner", + Role: Learner, Count: 1, IsWitness: true, LabelConstraints: []LabelConstraint{{Key: "engine", Op: "in", Values: []string{"tiflash"}}}, @@ -130,15 +130,15 @@ func TestAdjustRule(t *testing.T) { func TestLeaderCheck(t *testing.T) { re := require.New(t) _, manager := newTestManager(t, false) - re.Regexp(".*needs at least one leader or voter.*", manager.SetRule(&Rule{GroupID: "pd", ID: "default", Role: "learner", Count: 3}).Error()) - re.Regexp(".*define multiple leaders by count 2.*", manager.SetRule(&Rule{GroupID: "g2", ID: "33", Role: "leader", Count: 2}).Error()) + re.Regexp(".*needs at least one leader or voter.*", manager.SetRule(&Rule{GroupID: DefaultGroupID, ID: DefaultRuleID, Role: Learner, Count: 3}).Error()) + re.Regexp(".*define multiple leaders by count 2.*", manager.SetRule(&Rule{GroupID: "g2", ID: "33", Role: Leader, Count: 2}).Error()) re.Regexp(".*multiple leader replicas.*", manager.Batch([]RuleOp{ { - Rule: &Rule{GroupID: "g2", ID: "foo1", Role: "leader", Count: 1}, + Rule: &Rule{GroupID: "g2", ID: "foo1", Role: Leader, Count: 1}, Action: RuleOpAdd, }, { - Rule: &Rule{GroupID: "g2", ID: "foo2", Role: "leader", Count: 1}, + Rule: &Rule{GroupID: "g2", ID: "foo2", Role: Leader, Count: 1}, Action: RuleOpAdd, }, }).Error()) @@ -148,9 +148,9 @@ func TestSaveLoad(t *testing.T) { re := require.New(t) store, manager := newTestManager(t, false) rules := []*Rule{ - {GroupID: "pd", ID: "default", Role: "voter", Count: 5}, - {GroupID: "foo", ID: "baz", StartKeyHex: "", EndKeyHex: "abcd", Role: "voter", Count: 1}, - {GroupID: "foo", ID: "bar", Role: "learner", Count: 1}, + {GroupID: DefaultGroupID, ID: DefaultRuleID, Role: Voter, Count: 5}, + {GroupID: "foo", ID: "baz", StartKeyHex: "", EndKeyHex: "abcd", Role: Voter, Count: 1}, + {GroupID: "foo", ID: "bar", Role: Learner, Count: 1}, } for _, r := range rules { re.NoError(manager.SetRule(r.Clone())) @@ -160,7 +160,7 @@ func TestSaveLoad(t *testing.T) { err := m2.Initialize(3, []string{"no", "labels"}, "") re.NoError(err) re.Len(m2.GetAllRules(), 3) - re.Equal(rules[0].String(), m2.GetRule("pd", "default").String()) + re.Equal(rules[0].String(), m2.GetRule(DefaultGroupID, DefaultRuleID).String()) re.Equal(rules[1].String(), m2.GetRule("foo", "baz").String()) re.Equal(rules[2].String(), m2.GetRule("foo", "bar").String()) re.Equal(manager.GetRulesCount(), 3) @@ -170,14 +170,14 @@ func TestSaveLoad(t *testing.T) { func TestSetAfterGet(t *testing.T) { re := require.New(t) store, manager := newTestManager(t, false) - rule := manager.GetRule("pd", "default") + rule := manager.GetRule(DefaultGroupID, DefaultRuleID) rule.Count = 1 manager.SetRule(rule) m2 := NewRuleManager(store, nil, nil) err := m2.Initialize(100, []string{}, "") re.NoError(err) - rule = m2.GetRule("pd", "default") + rule = m2.GetRule(DefaultGroupID, DefaultRuleID) re.Equal(1, rule.Count) } @@ -193,9 +193,9 @@ func TestKeys(t *testing.T) { re := require.New(t) _, manager := newTestManager(t, false) rules := []*Rule{ - {GroupID: "1", ID: "1", Role: "voter", Count: 1, StartKeyHex: "", EndKeyHex: ""}, - {GroupID: "2", ID: "2", Role: "voter", Count: 1, StartKeyHex: "11", EndKeyHex: "ff"}, - {GroupID: "2", ID: "3", Role: "voter", Count: 1, StartKeyHex: "22", EndKeyHex: "dd"}, + {GroupID: "1", ID: "1", Role: Voter, Count: 1, StartKeyHex: "", EndKeyHex: ""}, + {GroupID: "2", ID: "2", Role: Voter, Count: 1, StartKeyHex: "11", EndKeyHex: "ff"}, + {GroupID: "2", ID: "3", Role: Voter, Count: 1, StartKeyHex: "22", EndKeyHex: "dd"}, } toDelete := []RuleOp{} @@ -207,16 +207,16 @@ func TestKeys(t *testing.T) { DeleteByIDPrefix: false, }) } - checkRules(t, manager.GetAllRules(), [][2]string{{"1", "1"}, {"2", "2"}, {"2", "3"}, {"pd", "default"}}) + checkRules(t, manager.GetAllRules(), [][2]string{{"1", "1"}, {"2", "2"}, {"2", "3"}, {DefaultGroupID, DefaultRuleID}}) manager.Batch(toDelete) - checkRules(t, manager.GetAllRules(), [][2]string{{"pd", "default"}}) + checkRules(t, manager.GetAllRules(), [][2]string{{DefaultGroupID, DefaultRuleID}}) - rules = append(rules, &Rule{GroupID: "3", ID: "4", Role: "voter", Count: 1, StartKeyHex: "44", EndKeyHex: "ee"}, - &Rule{GroupID: "3", ID: "5", Role: "voter", Count: 1, StartKeyHex: "44", EndKeyHex: "dd"}) + rules = append(rules, &Rule{GroupID: "3", ID: "4", Role: Voter, Count: 1, StartKeyHex: "44", EndKeyHex: "ee"}, + &Rule{GroupID: "3", ID: "5", Role: Voter, Count: 1, StartKeyHex: "44", EndKeyHex: "dd"}) manager.SetRules(rules) - checkRules(t, manager.GetAllRules(), [][2]string{{"1", "1"}, {"2", "2"}, {"2", "3"}, {"3", "4"}, {"3", "5"}, {"pd", "default"}}) + checkRules(t, manager.GetAllRules(), [][2]string{{"1", "1"}, {"2", "2"}, {"2", "3"}, {"3", "4"}, {"3", "5"}, {DefaultGroupID, DefaultRuleID}}) - manager.DeleteRule("pd", "default") + manager.DeleteRule(DefaultGroupID, DefaultRuleID) checkRules(t, manager.GetAllRules(), [][2]string{{"1", "1"}, {"2", "2"}, {"2", "3"}, {"3", "4"}, {"3", "5"}}) splitKeys := [][]string{ @@ -282,12 +282,12 @@ func TestKeys(t *testing.T) { func TestDeleteByIDPrefix(t *testing.T) { _, manager := newTestManager(t, false) manager.SetRules([]*Rule{ - {GroupID: "g1", ID: "foo1", Role: "voter", Count: 1}, - {GroupID: "g2", ID: "foo1", Role: "voter", Count: 1}, - {GroupID: "g2", ID: "foobar", Role: "voter", Count: 1}, - {GroupID: "g2", ID: "baz2", Role: "voter", Count: 1}, + {GroupID: "g1", ID: "foo1", Role: Voter, Count: 1}, + {GroupID: "g2", ID: "foo1", Role: Voter, Count: 1}, + {GroupID: "g2", ID: "foobar", Role: Voter, Count: 1}, + {GroupID: "g2", ID: "baz2", Role: Voter, Count: 1}, }) - manager.DeleteRule("pd", "default") + manager.DeleteRule(DefaultGroupID, DefaultRuleID) checkRules(t, manager.GetAllRules(), [][2]string{{"g1", "foo1"}, {"g2", "baz2"}, {"g2", "foo1"}, {"g2", "foobar"}}) manager.Batch([]RuleOp{{ @@ -301,40 +301,40 @@ func TestDeleteByIDPrefix(t *testing.T) { func TestRangeGap(t *testing.T) { re := require.New(t) _, manager := newTestManager(t, false) - err := manager.DeleteRule("pd", "default") + err := manager.DeleteRule(DefaultGroupID, DefaultRuleID) re.Error(err) - err = manager.SetRule(&Rule{GroupID: "pd", ID: "foo", StartKeyHex: "", EndKeyHex: "abcd", Role: "voter", Count: 1}) + err = manager.SetRule(&Rule{GroupID: DefaultGroupID, ID: "foo", StartKeyHex: "", EndKeyHex: "abcd", Role: Voter, Count: 1}) re.NoError(err) // |-- default --| // |-- foo --| // still cannot delete default since it will cause ("abcd", "") has no rules inside. - err = manager.DeleteRule("pd", "default") + err = manager.DeleteRule(DefaultGroupID, DefaultRuleID) re.Error(err) - err = manager.SetRule(&Rule{GroupID: "pd", ID: "bar", StartKeyHex: "abcd", EndKeyHex: "", Role: "voter", Count: 1}) + err = manager.SetRule(&Rule{GroupID: DefaultGroupID, ID: "bar", StartKeyHex: "abcd", EndKeyHex: "", Role: Voter, Count: 1}) re.NoError(err) // now default can be deleted. - err = manager.DeleteRule("pd", "default") + err = manager.DeleteRule(DefaultGroupID, DefaultRuleID) re.NoError(err) // cannot change range since it will cause ("abaa", "abcd") has no rules inside. - err = manager.SetRule(&Rule{GroupID: "pd", ID: "foo", StartKeyHex: "", EndKeyHex: "abaa", Role: "voter", Count: 1}) + err = manager.SetRule(&Rule{GroupID: DefaultGroupID, ID: "foo", StartKeyHex: "", EndKeyHex: "abaa", Role: Voter, Count: 1}) re.Error(err) } func TestGroupConfig(t *testing.T) { re := require.New(t) _, manager := newTestManager(t, false) - pd1 := &RuleGroup{ID: "pd"} - re.Equal(pd1, manager.GetRuleGroup("pd")) + pd1 := &RuleGroup{ID: DefaultGroupID} + re.Equal(pd1, manager.GetRuleGroup(DefaultGroupID)) // update group pd - pd2 := &RuleGroup{ID: "pd", Index: 100, Override: true} + pd2 := &RuleGroup{ID: DefaultGroupID, Index: 100, Override: true} err := manager.SetRuleGroup(pd2) re.NoError(err) - re.Equal(pd2, manager.GetRuleGroup("pd")) + re.Equal(pd2, manager.GetRuleGroup(DefaultGroupID)) // new group g without config - err = manager.SetRule(&Rule{GroupID: "g", ID: "1", Role: "voter", Count: 1}) + err = manager.SetRule(&Rule{GroupID: "g", ID: "1", Role: Voter, Count: 1}) re.NoError(err) g1 := &RuleGroup{ID: "g"} re.Equal(g1, manager.GetRuleGroup("g")) @@ -347,12 +347,12 @@ func TestGroupConfig(t *testing.T) { re.Equal([]*RuleGroup{g2, pd2}, manager.GetRuleGroups()) // delete pd group, restore to default config - err = manager.DeleteRuleGroup("pd") + err = manager.DeleteRuleGroup(DefaultGroupID) re.NoError(err) re.Equal([]*RuleGroup{pd1, g2}, manager.GetRuleGroups()) // delete rule, the group is removed too - err = manager.DeleteRule("pd", "default") + err = manager.DeleteRule(DefaultGroupID, DefaultRuleID) re.NoError(err) re.Equal([]*RuleGroup{g2}, manager.GetRuleGroups()) } @@ -360,16 +360,16 @@ func TestGroupConfig(t *testing.T) { func TestRuleVersion(t *testing.T) { re := require.New(t) _, manager := newTestManager(t, false) - rule1 := manager.GetRule("pd", "default") + rule1 := manager.GetRule(DefaultGroupID, DefaultRuleID) re.Equal(uint64(0), rule1.Version) // create new rule - newRule := &Rule{GroupID: "g1", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: 3} + newRule := &Rule{GroupID: "g1", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: 3} err := manager.SetRule(newRule) re.NoError(err) newRule = manager.GetRule("g1", "id") re.Equal(uint64(0), newRule.Version) // update rule - newRule = &Rule{GroupID: "g1", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: "voter", Count: 2} + newRule = &Rule{GroupID: "g1", ID: "id", StartKeyHex: "123abc", EndKeyHex: "123abf", Role: Voter, Count: 2} err = manager.SetRule(newRule) re.NoError(err) newRule = manager.GetRule("g1", "id") diff --git a/pkg/schedule/placement/rule_test.go b/pkg/schedule/placement/rule_test.go index b91a1f22d65..75d7bab23c9 100644 --- a/pkg/schedule/placement/rule_test.go +++ b/pkg/schedule/placement/rule_test.go @@ -110,9 +110,9 @@ func TestGroupProperties(t *testing.T) { func TestBuildRuleList(t *testing.T) { re := require.New(t) defaultRule := &Rule{ - GroupID: "pd", - ID: "default", - Role: "voter", + GroupID: DefaultGroupID, + ID: DefaultRuleID, + Role: Voter, StartKey: []byte{}, EndKey: []byte{}, Count: 3, @@ -122,13 +122,13 @@ func TestBuildRuleList(t *testing.T) { byteEnd, err := hex.DecodeString("a2") re.NoError(err) ruleMeta := &Rule{ - GroupID: "pd", + GroupID: DefaultGroupID, ID: "meta", Index: 1, Override: true, StartKey: byteStart, EndKey: byteEnd, - Role: "voter", + Role: Voter, Count: 5, } @@ -140,7 +140,7 @@ func TestBuildRuleList(t *testing.T) { { name: "default rule", rules: map[[2]string]*Rule{ - {"pd", "default"}: defaultRule, + {DefaultGroupID, DefaultRuleID}: defaultRule, }, expect: ruleList{ ranges: []rangeRules{ @@ -155,8 +155,8 @@ func TestBuildRuleList(t *testing.T) { { name: "metadata case", rules: map[[2]string]*Rule{ - {"pd", "default"}: defaultRule, - {"pd", "meta"}: ruleMeta, + {DefaultGroupID, DefaultRuleID}: defaultRule, + {DefaultGroupID, "meta"}: ruleMeta, }, expect: ruleList{ranges: []rangeRules{ { diff --git a/pkg/schedule/scatter/region_scatterer_test.go b/pkg/schedule/scatter/region_scatterer_test.go index 681b863aea6..70517d23fee 100644 --- a/pkg/schedule/scatter/region_scatterer_test.go +++ b/pkg/schedule/scatter/region_scatterer_test.go @@ -185,7 +185,7 @@ func scatterSpecial(re *require.Assertions, numOrdinaryStores, numSpecialStores, } tc.SetEnablePlacementRules(true) re.NoError(tc.RuleManager.SetRule(&placement.Rule{ - GroupID: "pd", ID: "learner", Role: placement.Learner, Count: 3, + GroupID: placement.DefaultGroupID, ID: "learner", Role: placement.Learner, Count: 3, LabelConstraints: []placement.LabelConstraint{{Key: "engine", Op: placement.In, Values: []string{"tiflash"}}}})) // Region 1 has the same distribution with the Region 2, which is used to test selectPeerToReplace. @@ -575,8 +575,8 @@ func TestRegionHasLearner(t *testing.T) { tc.AddLabelsStore(i, 0, map[string]string{"zone": "z2"}) } tc.RuleManager.SetRule(&placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3, LabelConstraints: []placement.LabelConstraint{ @@ -588,7 +588,7 @@ func TestRegionHasLearner(t *testing.T) { }, }) tc.RuleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "learner", Role: placement.Learner, Count: 1, diff --git a/pkg/schedule/schedulers/balance_witness_test.go b/pkg/schedule/schedulers/balance_witness_test.go index abd4a3b3bba..59bf04c2303 100644 --- a/pkg/schedule/schedulers/balance_witness_test.go +++ b/pkg/schedule/schedulers/balance_witness_test.go @@ -43,8 +43,8 @@ func (suite *balanceWitnessSchedulerTestSuite) SetupTest() { suite.cancel, suite.conf, suite.tc, suite.oc = prepareSchedulersTest() suite.tc.RuleManager.SetRules([]*placement.Rule{ { - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 4, }, diff --git a/pkg/schedule/schedulers/hot_region_test.go b/pkg/schedule/schedulers/hot_region_test.go index d8f9bbc532c..15c037ddd22 100644 --- a/pkg/schedule/schedulers/hot_region_test.go +++ b/pkg/schedule/schedulers/hot_region_test.go @@ -582,8 +582,8 @@ func TestHotWriteRegionScheduleByteRateOnlyWithTiFlash(t *testing.T) { tc.SetHotRegionCacheHitsThreshold(0) re.NoError(tc.RuleManager.SetRules([]*placement.Rule{ { - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3, LocationLabels: []string{"zone", "host"}, @@ -1143,7 +1143,7 @@ func TestHotWriteRegionScheduleWithRuleEnabled(t *testing.T) { tc.AddRegionStore(3, 20) err = tc.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "leader", Index: 1, Override: true, @@ -1161,7 +1161,7 @@ func TestHotWriteRegionScheduleWithRuleEnabled(t *testing.T) { }) re.NoError(err) err = tc.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "voter", Index: 2, Override: false, diff --git a/pkg/schedule/schedulers/scheduler_test.go b/pkg/schedule/schedulers/scheduler_test.go index 12ab9f8aa2f..57f1fcf1e3f 100644 --- a/pkg/schedule/schedulers/scheduler_test.go +++ b/pkg/schedule/schedulers/scheduler_test.go @@ -261,13 +261,13 @@ func TestShuffleRegionRole(t *testing.T) { // update rule to 1leader+1follower+1learner tc.SetEnablePlacementRules(true) tc.RuleManager.SetRule(&placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Role: placement.Voter, Count: 2, }) tc.RuleManager.SetRule(&placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "learner", Role: placement.Learner, Count: 1, @@ -428,8 +428,8 @@ func TestBalanceLeaderWithConflictRule(t *testing.T) { { name: "default Rule", rule: &placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Index: 1, StartKey: []byte(""), EndKey: []byte(""), @@ -442,8 +442,8 @@ func TestBalanceLeaderWithConflictRule(t *testing.T) { { name: "single store allowed to be placed leader", rule: &placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Index: 1, StartKey: []byte(""), EndKey: []byte(""), @@ -463,8 +463,8 @@ func TestBalanceLeaderWithConflictRule(t *testing.T) { { name: "2 store allowed to be placed leader", rule: &placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, Index: 1, StartKey: []byte(""), EndKey: []byte(""), diff --git a/server/api/cluster_test.go b/server/api/cluster_test.go index 01aa6ba5f24..d6d8effa365 100644 --- a/server/api/cluster_test.go +++ b/server/api/cluster_test.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/stretchr/testify/suite" sc "github.com/tikv/pd/pkg/schedule/config" + "github.com/tikv/pd/pkg/schedule/placement" tu "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/server" "github.com/tikv/pd/server/cluster" @@ -57,7 +58,7 @@ func (suite *clusterTestSuite) TestCluster() { suite.svr.GetPersistOptions().SetPlacementRuleEnabled(true) suite.svr.GetPersistOptions().GetReplicationConfig().LocationLabels = []string{"host"} rm := suite.svr.GetRaftCluster().GetRuleManager() - rule := rm.GetRule("pd", "default") + rule := rm.GetRule(placement.DefaultGroupID, placement.DefaultRuleID) rule.LocationLabels = []string{"host"} rule.Count = 1 rm.SetRule(rule) @@ -81,7 +82,7 @@ func (suite *clusterTestSuite) TestCluster() { c1.MaxPeerCount = 6 suite.Equal(c2, c1) - suite.Equal(int(r.MaxReplicas), suite.svr.GetRaftCluster().GetRuleManager().GetRule("pd", "default").Count) + suite.Equal(int(r.MaxReplicas), suite.svr.GetRaftCluster().GetRuleManager().GetRule(placement.DefaultGroupID, placement.DefaultRuleID).Count) } func (suite *clusterTestSuite) testGetClusterStatus() { diff --git a/server/api/region_test.go b/server/api/region_test.go index 379fcf7d463..0f8f84bfc37 100644 --- a/server/api/region_test.go +++ b/server/api/region_test.go @@ -697,7 +697,7 @@ func (suite *regionsReplicatedTestSuite) TestCheckRegionsReplicated() { Index: 5, Rules: []*placement.Rule{ { - ID: "foo", Index: 1, Role: "voter", Count: 1, + ID: "foo", Index: 1, Role: placement.Voter, Count: 1, }, }, }, @@ -738,7 +738,7 @@ func (suite *regionsReplicatedTestSuite) TestCheckRegionsReplicated() { mustRegionHeartbeat(re, suite.svr, r1) bundle[0].Rules = append(bundle[0].Rules, &placement.Rule{ - ID: "bar", Index: 1, Role: "voter", Count: 1, + ID: "bar", Index: 1, Role: placement.Voter, Count: 1, }) data, err = json.Marshal(bundle) suite.NoError(err) @@ -755,7 +755,7 @@ func (suite *regionsReplicatedTestSuite) TestCheckRegionsReplicated() { Index: 6, Rules: []*placement.Rule{ { - ID: "foo", Index: 1, Role: "voter", Count: 2, + ID: "foo", Index: 1, Role: placement.Voter, Count: 2, }, }, }) diff --git a/server/api/rule.go b/server/api/rule.go index 47964d594be..bdb3db2016d 100644 --- a/server/api/rule.go +++ b/server/api/rule.go @@ -273,7 +273,7 @@ func (h *ruleHandler) SetRule(w http.ResponseWriter, r *http.Request) { // sync replicate config with default-rule func (h *ruleHandler) syncReplicateConfigWithDefaultRule(rule *placement.Rule) error { // sync default rule with replicate config - if rule.GroupID == "pd" && rule.ID == "default" { + if rule.GroupID == placement.DefaultGroupID && rule.ID == placement.DefaultRuleID { cfg := h.svr.GetReplicationConfig().Clone() cfg.MaxReplicas = uint64(rule.Count) if err := h.svr.SetReplicationConfig(*cfg); err != nil { diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index 4b9b401e0c9..d424ea98e7b 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -1669,7 +1669,7 @@ func TestCalculateStoreSize1(t *testing.T) { } cluster.ruleManager.SetRule( - &placement.Rule{GroupID: "pd", ID: "zone1", StartKey: []byte(""), EndKey: []byte(""), Role: "voter", Count: 2, + &placement.Rule{GroupID: placement.DefaultGroupID, ID: "zone1", StartKey: []byte(""), EndKey: []byte(""), Role: placement.Voter, Count: 2, LabelConstraints: []placement.LabelConstraint{ {Key: "zone", Op: "in", Values: []string{"zone1"}}, }, @@ -1677,7 +1677,7 @@ func TestCalculateStoreSize1(t *testing.T) { ) cluster.ruleManager.SetRule( - &placement.Rule{GroupID: "pd", ID: "zone2", StartKey: []byte(""), EndKey: []byte(""), Role: "voter", Count: 2, + &placement.Rule{GroupID: placement.DefaultGroupID, ID: "zone2", StartKey: []byte(""), EndKey: []byte(""), Role: placement.Voter, Count: 2, LabelConstraints: []placement.LabelConstraint{ {Key: "zone", Op: "in", Values: []string{"zone2"}}, }, @@ -1685,13 +1685,13 @@ func TestCalculateStoreSize1(t *testing.T) { ) cluster.ruleManager.SetRule( - &placement.Rule{GroupID: "pd", ID: "zone3", StartKey: []byte(""), EndKey: []byte(""), Role: "follower", Count: 1, + &placement.Rule{GroupID: placement.DefaultGroupID, ID: "zone3", StartKey: []byte(""), EndKey: []byte(""), Role: placement.Follower, Count: 1, LabelConstraints: []placement.LabelConstraint{ {Key: "zone", Op: "in", Values: []string{"zone3"}}, }, LocationLabels: []string{"rack", "host"}}, ) - cluster.ruleManager.DeleteRule("pd", "default") + cluster.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) regions := newTestRegions(100, 10, 5) for _, region := range regions { @@ -1753,7 +1753,7 @@ func TestCalculateStoreSize2(t *testing.T) { } cluster.ruleManager.SetRule( - &placement.Rule{GroupID: "pd", ID: "dc1", StartKey: []byte(""), EndKey: []byte(""), Role: "voter", Count: 2, + &placement.Rule{GroupID: placement.DefaultGroupID, ID: "dc1", StartKey: []byte(""), EndKey: []byte(""), Role: placement.Voter, Count: 2, LabelConstraints: []placement.LabelConstraint{ {Key: "dc", Op: "in", Values: []string{"dc1"}}, }, @@ -1761,7 +1761,7 @@ func TestCalculateStoreSize2(t *testing.T) { ) cluster.ruleManager.SetRule( - &placement.Rule{GroupID: "pd", ID: "logic3", StartKey: []byte(""), EndKey: []byte(""), Role: "voter", Count: 1, + &placement.Rule{GroupID: placement.DefaultGroupID, ID: "logic3", StartKey: []byte(""), EndKey: []byte(""), Role: placement.Voter, Count: 1, LabelConstraints: []placement.LabelConstraint{ {Key: "logic", Op: "in", Values: []string{"logic3"}}, }, @@ -1769,13 +1769,13 @@ func TestCalculateStoreSize2(t *testing.T) { ) cluster.ruleManager.SetRule( - &placement.Rule{GroupID: "pd", ID: "logic4", StartKey: []byte(""), EndKey: []byte(""), Role: "learner", Count: 1, + &placement.Rule{GroupID: placement.DefaultGroupID, ID: "logic4", StartKey: []byte(""), EndKey: []byte(""), Role: placement.Learner, Count: 1, LabelConstraints: []placement.LabelConstraint{ {Key: "logic", Op: "in", Values: []string{"logic4"}}, }, LocationLabels: []string{"dc", "logic", "rack", "host"}}, ) - cluster.ruleManager.DeleteRule("pd", "default") + cluster.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) regions := newTestRegions(100, 10, 5) for _, region := range regions { diff --git a/server/server.go b/server/server.go index 38064a3b92f..d4b40af9c18 100644 --- a/server/server.go +++ b/server/server.go @@ -1043,7 +1043,7 @@ func (s *Server) SetReplicationConfig(cfg sc.ReplicationConfig) error { return errs.ErrNotBootstrapped.GenWithStackByArgs() } // replication.MaxReplicas won't work when placement rule is enabled and not only have one default rule. - defaultRule := rc.GetRuleManager().GetRule("pd", "default") + defaultRule := rc.GetRuleManager().GetRule(placement.DefaultGroupID, placement.DefaultRuleID) CheckInDefaultRule := func() error { // replication config won't work when placement rule is enabled and exceeds one default rule diff --git a/tests/integrations/client/http_client_test.go b/tests/integrations/client/http_client_test.go index 03d90c6cd32..d2c88d01f09 100644 --- a/tests/integrations/client/http_client_test.go +++ b/tests/integrations/client/http_client_test.go @@ -21,6 +21,7 @@ import ( "github.com/stretchr/testify/suite" pd "github.com/tikv/pd/client/http" + "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/tests" ) @@ -85,3 +86,45 @@ func (suite *httpClientTestSuite) TestGetMinResolvedTSByStoresIDs() { re.Equal(minResolvedTS, storeMinResolvedTSMap[1]) re.Equal(uint64(math.MaxUint64), storeMinResolvedTSMap[2]) } + +func (suite *httpClientTestSuite) TestRule() { + re := suite.Require() + rules, err := suite.client.GetPlacementRulesByGroup(suite.ctx, placement.DefaultGroupID) + re.NoError(err) + re.Len(rules, 1) + re.Equal(placement.DefaultGroupID, rules[0].GroupID) + re.Equal(placement.DefaultRuleID, rules[0].ID) + re.Equal(pd.Voter, rules[0].Role) + re.Equal(3, rules[0].Count) + err = suite.client.SetPlacementRule(suite.ctx, &pd.Rule{ + GroupID: placement.DefaultGroupID, + ID: "test", + Role: pd.Learner, + Count: 3, + }) + re.NoError(err) + rules, err = suite.client.GetPlacementRulesByGroup(suite.ctx, placement.DefaultGroupID) + re.NoError(err) + re.Len(rules, 2) + re.Equal(placement.DefaultGroupID, rules[1].GroupID) + re.Equal("test", rules[1].ID) + re.Equal(pd.Learner, rules[1].Role) + re.Equal(3, rules[1].Count) + err = suite.client.DeletePlacementRule(suite.ctx, placement.DefaultGroupID, "test") + re.NoError(err) + rules, err = suite.client.GetPlacementRulesByGroup(suite.ctx, placement.DefaultGroupID) + re.NoError(err) + re.Len(rules, 1) + re.Equal(placement.DefaultGroupID, rules[0].GroupID) + re.Equal(placement.DefaultRuleID, rules[0].ID) +} + +func (suite *httpClientTestSuite) TestAccelerateSchedule() { + re := suite.Require() + suspectRegions := suite.cluster.GetLeaderServer().GetRaftCluster().GetSuspectRegions() + re.Len(suspectRegions, 0) + err := suite.client.AccelerateSchedule(suite.ctx, []byte("a1"), []byte("a2")) + re.NoError(err) + suspectRegions = suite.cluster.GetLeaderServer().GetRaftCluster().GetSuspectRegions() + re.Len(suspectRegions, 1) +} diff --git a/tests/integrations/mcs/scheduling/api_test.go b/tests/integrations/mcs/scheduling/api_test.go index cfeaa4db033..f6a7f66a66f 100644 --- a/tests/integrations/mcs/scheduling/api_test.go +++ b/tests/integrations/mcs/scheduling/api_test.go @@ -241,9 +241,9 @@ func (suite *apiTestSuite) TestAPIForward() { tests.MustPutRegion(re, suite.cluster, 2, 1, []byte("a"), []byte("b"), core.SetApproximateSize(60)) rules = []*placement.Rule{ { - GroupID: "pd", - ID: "default", - Role: "voter", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, + Role: placement.Voter, Count: 3, LocationLabels: []string{}, }, diff --git a/tests/integrations/mcs/scheduling/rule_test.go b/tests/integrations/mcs/scheduling/rule_test.go index bffa58d0fe6..761e9b1ecbc 100644 --- a/tests/integrations/mcs/scheduling/rule_test.go +++ b/tests/integrations/mcs/scheduling/rule_test.go @@ -76,8 +76,8 @@ func (suite *ruleTestSuite) TestRuleWatch() { // Check the default rule and rule group. rules := ruleManager.GetAllRules() re.Len(rules, 1) - re.Equal("pd", rules[0].GroupID) - re.Equal("default", rules[0].ID) + re.Equal(placement.DefaultGroupID, rules[0].GroupID) + re.Equal(placement.DefaultRuleID, rules[0].ID) re.Equal(0, rules[0].Index) re.Empty(rules[0].StartKey) re.Empty(rules[0].EndKey) @@ -85,7 +85,7 @@ func (suite *ruleTestSuite) TestRuleWatch() { re.Empty(rules[0].LocationLabels) ruleGroups := ruleManager.GetRuleGroups() re.Len(ruleGroups, 1) - re.Equal("pd", ruleGroups[0].ID) + re.Equal(placement.DefaultGroupID, ruleGroups[0].ID) re.Equal(0, ruleGroups[0].Index) re.False(ruleGroups[0].Override) // Set a new rule via the PD API server. @@ -93,7 +93,7 @@ func (suite *ruleTestSuite) TestRuleWatch() { rule := &placement.Rule{ GroupID: "2", ID: "3", - Role: "voter", + Role: placement.Voter, Count: 1, StartKeyHex: "22", EndKeyHex: "dd", @@ -122,7 +122,7 @@ func (suite *ruleTestSuite) TestRuleWatch() { return len(rules) == 1 }) re.Len(rules, 1) - re.Equal("pd", rules[0].GroupID) + re.Equal(placement.DefaultGroupID, rules[0].GroupID) // Create a new rule group. ruleGroup := &placement.RuleGroup{ ID: "2", diff --git a/tests/pdctl/config/config_test.go b/tests/pdctl/config/config_test.go index 315ec3cf7c7..91d6723c2ac 100644 --- a/tests/pdctl/config/config_test.go +++ b/tests/pdctl/config/config_test.go @@ -315,7 +315,7 @@ func (suite *configTestSuite) checkPlacementRules(cluster *tests.TestCluster) { re.Contains(string(output), "Success!") // test show - suite.checkShowRuleKey(pdAddr, [][2]string{{"pd", "default"}}) + suite.checkShowRuleKey(pdAddr, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}}) f, _ := os.CreateTemp("/tmp", "pd_tests") fname := f.Name() @@ -323,18 +323,18 @@ func (suite *configTestSuite) checkPlacementRules(cluster *tests.TestCluster) { defer os.RemoveAll(fname) // test load - rules := suite.checkLoadRule(pdAddr, fname, [][2]string{{"pd", "default"}}) + rules := suite.checkLoadRule(pdAddr, fname, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}}) // test save rules = append(rules, placement.Rule{ - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test1", - Role: "voter", + Role: placement.Voter, Count: 1, }, placement.Rule{ GroupID: "test-group", ID: "test2", - Role: "voter", + Role: placement.Voter, Count: 2, }) b, _ := json.Marshal(rules) @@ -343,11 +343,11 @@ func (suite *configTestSuite) checkPlacementRules(cluster *tests.TestCluster) { re.NoError(err) // test show group - suite.checkShowRuleKey(pdAddr, [][2]string{{"pd", "default"}, {"pd", "test1"}}, "--group=pd") + suite.checkShowRuleKey(pdAddr, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}, {placement.DefaultGroupID, "test1"}}, "--group=pd") // test rule region detail tests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b")) - suite.checkShowRuleKey(pdAddr, [][2]string{{"pd", "default"}}, "--region=1", "--detail") + suite.checkShowRuleKey(pdAddr, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}}, "--region=1", "--detail") // test delete // need clear up args, so create new a cobra.Command. Otherwise gourp still exists. @@ -356,7 +356,7 @@ func (suite *configTestSuite) checkPlacementRules(cluster *tests.TestCluster) { os.WriteFile(fname, b, 0600) _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "save", "--in="+fname) re.NoError(err) - suite.checkShowRuleKey(pdAddr, [][2]string{{"pd", "test1"}}, "--group=pd") + suite.checkShowRuleKey(pdAddr, [][2]string{{placement.DefaultGroupID, "test1"}}, "--group=pd") } func (suite *configTestSuite) TestPlacementRuleGroups() { @@ -385,15 +385,15 @@ func (suite *configTestSuite) checkPlacementRuleGroups(cluster *tests.TestCluste // test show var group placement.RuleGroup testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server - output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show", "pd") + output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show", placement.DefaultGroupID) re.NoError(err) return !strings.Contains(string(output), "404") }) re.NoError(json.Unmarshal(output, &group), string(output)) - re.Equal(placement.RuleGroup{ID: "pd"}, group) + re.Equal(placement.RuleGroup{ID: placement.DefaultGroupID}, group) // test set - output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "set", "pd", "42", "true") + output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "set", placement.DefaultGroupID, "42", "true") re.NoError(err) re.Contains(string(output), "Success!") output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "set", "group2", "100", "false") @@ -410,7 +410,7 @@ func (suite *configTestSuite) checkPlacementRuleGroups(cluster *tests.TestCluste re.NoError(err) re.NoError(json.Unmarshal(output, &groups)) return reflect.DeepEqual([]placement.RuleGroup{ - {ID: "pd", Index: 42, Override: true}, + {ID: placement.DefaultGroupID, Index: 42, Override: true}, {ID: "group2", Index: 100, Override: false}, {ID: "group3", Index: 200, Override: false}, }, groups) @@ -464,10 +464,10 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *tests.TestCluste // test get var bundle placement.GroupBundle - output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "get", "pd") + output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "get", placement.DefaultGroupID) re.NoError(err) re.NoError(json.Unmarshal(output, &bundle)) - re.Equal(placement.GroupBundle{ID: "pd", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pd", ID: "default", Role: "voter", Count: 3}}}, bundle) + re.Equal(placement.GroupBundle{ID: placement.DefaultGroupID, Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, bundle) f, err := os.CreateTemp("/tmp", "pd_tests") re.NoError(err) @@ -477,7 +477,7 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *tests.TestCluste // test load suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ - {ID: "pd", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pd", ID: "default", Role: "voter", Count: 3}}}, + {ID: placement.DefaultGroupID, Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, }) // test set @@ -489,41 +489,41 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *tests.TestCluste _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "set", "--in="+fname) re.NoError(err) suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ - {ID: "pd", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pd", ID: "default", Role: "voter", Count: 3}}}, - {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: "default", Role: "voter", Count: 3}}}, + {ID: placement.DefaultGroupID, Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, }) // test delete - _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "delete", "pd") + _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "delete", placement.DefaultGroupID) re.NoError(err) suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ - {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: "default", Role: "voter", Count: 3}}}, + {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, }) // test delete regexp bundle.ID = "pf" - bundle.Rules = []*placement.Rule{{GroupID: "pf", ID: "default", Role: "voter", Count: 3}} + bundle.Rules = []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}} b, err = json.Marshal(bundle) re.NoError(err) re.NoError(os.WriteFile(fname, b, 0600)) _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "set", "--in="+fname) re.NoError(err) suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ - {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: "default", Role: "voter", Count: 3}}}, - {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: "default", Role: "voter", Count: 3}}}, + {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, }) _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "delete", "--regexp", ".*f") re.NoError(err) bundles := []placement.GroupBundle{ - {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: "default", Role: "voter", Count: 3}}}, + {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, } suite.checkLoadRuleBundle(pdAddr, fname, bundles) // test save - bundle.Rules = []*placement.Rule{{GroupID: "pf", ID: "default", Role: "voter", Count: 3}} + bundle.Rules = []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}} bundles = append(bundles, bundle) b, err = json.Marshal(bundles) re.NoError(err) @@ -531,8 +531,8 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *tests.TestCluste _, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "save", "--in="+fname) re.NoError(err) suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ - {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: "default", Role: "voter", Count: 3}}}, - {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: "default", Role: "voter", Count: 3}}}, + {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, }) // partial update, so still one group is left, no error @@ -544,7 +544,7 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *tests.TestCluste re.NoError(err) suite.checkLoadRuleBundle(pdAddr, fname, []placement.GroupBundle{ - {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: "default", Role: "voter", Count: 3}}}, + {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, }) } @@ -715,7 +715,7 @@ func (suite *configTestSuite) checkUpdateDefaultReplicaConfig(cluster *tests.Tes } checkRuleCount := func(expect int) { - args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--group", "pd", "--id", "default"} + args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--group", placement.DefaultGroupID, "--id", placement.DefaultRuleID} output, err := pdctl.ExecuteCommand(cmd, args...) re.NoError(err) rule := placement.Rule{} @@ -726,7 +726,7 @@ func (suite *configTestSuite) checkUpdateDefaultReplicaConfig(cluster *tests.Tes } checkRuleLocationLabels := func(expect int) { - args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--group", "pd", "--id", "default"} + args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--group", placement.DefaultGroupID, "--id", placement.DefaultRuleID} output, err := pdctl.ExecuteCommand(cmd, args...) re.NoError(err) rule := placement.Rule{} @@ -737,7 +737,7 @@ func (suite *configTestSuite) checkUpdateDefaultReplicaConfig(cluster *tests.Tes } checkRuleIsolationLevel := func(expect string) { - args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--group", "pd", "--id", "default"} + args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--group", placement.DefaultGroupID, "--id", placement.DefaultRuleID} output, err := pdctl.ExecuteCommand(cmd, args...) re.NoError(err) rule := placement.Rule{} @@ -791,7 +791,7 @@ func (suite *configTestSuite) checkUpdateDefaultReplicaConfig(cluster *tests.Tes fname := suite.T().TempDir() rules := []placement.Rule{ { - GroupID: "pd", + GroupID: placement.DefaultGroupID, ID: "test1", Role: "voter", Count: 1, diff --git a/tests/server/api/operator_test.go b/tests/server/api/operator_test.go index 908daf21aac..e36ead7e44d 100644 --- a/tests/server/api/operator_test.go +++ b/tests/server/api/operator_test.go @@ -461,7 +461,7 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te // add customized rule first and then remove default rule err := manager.SetRules(testCase.rules) suite.NoError(err) - err = manager.DeleteRule("pd", "default") + err = manager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) suite.NoError(err) } if testCase.expectedError == nil { diff --git a/tests/server/api/rule_test.go b/tests/server/api/rule_test.go index 6d292021767..861fbe5cf32 100644 --- a/tests/server/api/rule_test.go +++ b/tests/server/api/rule_test.go @@ -56,7 +56,7 @@ func (suite *ruleTestSuite) checkSet(cluster *tests.TestCluster) { pdAddr := leaderServer.GetAddr() urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) - rule := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1} + rule := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} successData, err := json.Marshal(rule) suite.NoError(err) oldStartKey, err := hex.DecodeString(rule.StartKeyHex) @@ -64,13 +64,13 @@ func (suite *ruleTestSuite) checkSet(cluster *tests.TestCluster) { oldEndKey, err := hex.DecodeString(rule.EndKeyHex) suite.NoError(err) parseErrData := []byte("foo") - rule1 := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "XXXX", EndKeyHex: "3333", Role: "voter", Count: 1} + rule1 := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "XXXX", EndKeyHex: "3333", Role: placement.Voter, Count: 1} checkErrData, err := json.Marshal(rule1) suite.NoError(err) - rule2 := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: -1} + rule2 := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: -1} setErrData, err := json.Marshal(rule2) suite.NoError(err) - rule3 := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "1111", EndKeyHex: "3333", Role: "follower", Count: 3} + rule3 := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Follower, Count: 3} updateData, err := json.Marshal(rule3) suite.NoError(err) newStartKey, err := hex.DecodeString(rule.StartKeyHex) @@ -179,7 +179,7 @@ func (suite *ruleTestSuite) checkGet(cluster *tests.TestCluster) { pdAddr := leaderServer.GetAddr() urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) - rule := placement.Rule{GroupID: "a", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1} + rule := placement.Rule{GroupID: "a", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} data, err := json.Marshal(rule) suite.NoError(err) re := suite.Require() @@ -200,7 +200,7 @@ func (suite *ruleTestSuite) checkGet(cluster *tests.TestCluster) { }, { name: "not found", - rule: placement.Rule{GroupID: "a", ID: "30", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1}, + rule: placement.Rule{GroupID: "a", ID: "30", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1}, found: false, code: http.StatusNotFound, }, @@ -237,7 +237,7 @@ func (suite *ruleTestSuite) checkGetAll(cluster *tests.TestCluster) { pdAddr := leaderServer.GetAddr() urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) - rule := placement.Rule{GroupID: "b", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1} + rule := placement.Rule{GroupID: "b", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} data, err := json.Marshal(rule) suite.NoError(err) re := suite.Require() @@ -266,16 +266,16 @@ func (suite *ruleTestSuite) checkSetAll(cluster *tests.TestCluster) { pdAddr := leaderServer.GetAddr() urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) - rule1 := placement.Rule{GroupID: "a", ID: "12", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1} - rule2 := placement.Rule{GroupID: "b", ID: "12", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1} - rule3 := placement.Rule{GroupID: "a", ID: "12", StartKeyHex: "XXXX", EndKeyHex: "3333", Role: "voter", Count: 1} - rule4 := placement.Rule{GroupID: "a", ID: "12", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: -1} - rule5 := placement.Rule{GroupID: "pd", ID: "default", StartKeyHex: "", EndKeyHex: "", Role: "voter", Count: 1, + rule1 := placement.Rule{GroupID: "a", ID: "12", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} + rule2 := placement.Rule{GroupID: "b", ID: "12", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} + rule3 := placement.Rule{GroupID: "a", ID: "12", StartKeyHex: "XXXX", EndKeyHex: "3333", Role: placement.Voter, Count: 1} + rule4 := placement.Rule{GroupID: "a", ID: "12", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: -1} + rule5 := placement.Rule{GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, StartKeyHex: "", EndKeyHex: "", Role: placement.Voter, Count: 1, LocationLabels: []string{"host"}} - rule6 := placement.Rule{GroupID: "pd", ID: "default", StartKeyHex: "", EndKeyHex: "", Role: "voter", Count: 3} + rule6 := placement.Rule{GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, StartKeyHex: "", EndKeyHex: "", Role: placement.Voter, Count: 3} leaderServer.GetPersistOptions().GetReplicationConfig().LocationLabels = []string{"host"} - defaultRule := leaderServer.GetRaftCluster().GetRuleManager().GetRule("pd", "default") + defaultRule := leaderServer.GetRaftCluster().GetRuleManager().GetRule(placement.DefaultGroupID, placement.DefaultRuleID) defaultRule.LocationLabels = []string{"host"} leaderServer.GetRaftCluster().GetRuleManager().SetRule(defaultRule) @@ -390,13 +390,13 @@ func (suite *ruleTestSuite) checkGetAllByGroup(cluster *tests.TestCluster) { urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) re := suite.Require() - rule := placement.Rule{GroupID: "c", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1} + rule := placement.Rule{GroupID: "c", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} data, err := json.Marshal(rule) suite.NoError(err) err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) suite.NoError(err) - rule1 := placement.Rule{GroupID: "c", ID: "30", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1} + rule1 := placement.Rule{GroupID: "c", ID: "30", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} data, err = json.Marshal(rule1) suite.NoError(err) err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) @@ -453,7 +453,7 @@ func (suite *ruleTestSuite) checkGetAllByRegion(cluster *tests.TestCluster) { pdAddr := leaderServer.GetAddr() urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) - rule := placement.Rule{GroupID: "e", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1} + rule := placement.Rule{GroupID: "e", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} data, err := json.Marshal(rule) suite.NoError(err) re := suite.Require() @@ -525,7 +525,7 @@ func (suite *ruleTestSuite) checkGetAllByKey(cluster *tests.TestCluster) { pdAddr := leaderServer.GetAddr() urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) - rule := placement.Rule{GroupID: "f", ID: "40", StartKeyHex: "8888", EndKeyHex: "9111", Role: "voter", Count: 1} + rule := placement.Rule{GroupID: "f", ID: "40", StartKeyHex: "8888", EndKeyHex: "9111", Role: placement.Voter, Count: 1} data, err := json.Marshal(rule) suite.NoError(err) re := suite.Require() @@ -589,7 +589,7 @@ func (suite *ruleTestSuite) checkDelete(cluster *tests.TestCluster) { pdAddr := leaderServer.GetAddr() urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) - rule := placement.Rule{GroupID: "g", ID: "10", StartKeyHex: "8888", EndKeyHex: "9111", Role: "voter", Count: 1} + rule := placement.Rule{GroupID: "g", ID: "10", StartKeyHex: "8888", EndKeyHex: "9111", Role: placement.Voter, Count: 1} data, err := json.Marshal(rule) suite.NoError(err) err = tu.CheckPostJSON(testDialClient, urlPrefix+"/rule", data, tu.StatusOK(suite.Require())) @@ -663,19 +663,19 @@ func (suite *ruleTestSuite) checkBatch(cluster *tests.TestCluster) { opt1 := placement.RuleOp{ Action: placement.RuleOpAdd, - Rule: &placement.Rule{GroupID: "a", ID: "13", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1}, + Rule: &placement.Rule{GroupID: "a", ID: "13", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1}, } opt2 := placement.RuleOp{ Action: placement.RuleOpAdd, - Rule: &placement.Rule{GroupID: "b", ID: "13", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1}, + Rule: &placement.Rule{GroupID: "b", ID: "13", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1}, } opt3 := placement.RuleOp{ Action: placement.RuleOpAdd, - Rule: &placement.Rule{GroupID: "a", ID: "14", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1}, + Rule: &placement.Rule{GroupID: "a", ID: "14", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1}, } opt4 := placement.RuleOp{ Action: placement.RuleOpAdd, - Rule: &placement.Rule{GroupID: "a", ID: "15", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: 1}, + Rule: &placement.Rule{GroupID: "a", ID: "15", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1}, } opt5 := placement.RuleOp{ Action: placement.RuleOpDel, @@ -692,11 +692,11 @@ func (suite *ruleTestSuite) checkBatch(cluster *tests.TestCluster) { } opt8 := placement.RuleOp{ Action: placement.RuleOpAdd, - Rule: &placement.Rule{GroupID: "a", ID: "16", StartKeyHex: "XXXX", EndKeyHex: "3333", Role: "voter", Count: 1}, + Rule: &placement.Rule{GroupID: "a", ID: "16", StartKeyHex: "XXXX", EndKeyHex: "3333", Role: placement.Voter, Count: 1}, } opt9 := placement.RuleOp{ Action: placement.RuleOpAdd, - Rule: &placement.Rule{GroupID: "a", ID: "17", StartKeyHex: "1111", EndKeyHex: "3333", Role: "voter", Count: -1}, + Rule: &placement.Rule{GroupID: "a", ID: "17", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: -1}, } successData1, err := json.Marshal([]placement.RuleOp{opt1, opt2, opt3}) @@ -800,9 +800,14 @@ func (suite *ruleTestSuite) checkBundle(cluster *tests.TestCluster) { re := suite.Require() // GetAll b1 := placement.GroupBundle{ - ID: "pd", + ID: placement.DefaultGroupID, Rules: []*placement.Rule{ - {GroupID: "pd", ID: "default", Role: "voter", Count: 3}, + { + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, + Role: placement.Voter, + Count: 3, + }, }, } var bundles []placement.GroupBundle @@ -817,7 +822,7 @@ func (suite *ruleTestSuite) checkBundle(cluster *tests.TestCluster) { Index: 42, Override: true, Rules: []*placement.Rule{ - {GroupID: "foo", ID: "bar", Index: 1, Override: true, Role: "voter", Count: 1}, + {GroupID: "foo", ID: "bar", Index: 1, Override: true, Role: placement.Voter, Count: 1}, }, } data, err := json.Marshal(b2) @@ -849,7 +854,7 @@ func (suite *ruleTestSuite) checkBundle(cluster *tests.TestCluster) { suite.compareBundle(bundles[0], b2) // SetAll - b2.Rules = append(b2.Rules, &placement.Rule{GroupID: "foo", ID: "baz", Index: 2, Role: "follower", Count: 1}) + b2.Rules = append(b2.Rules, &placement.Rule{GroupID: "foo", ID: "baz", Index: 2, Role: placement.Follower, Count: 1}) b2.Index, b2.Override = 0, false b3 := placement.GroupBundle{ID: "foobar", Index: 100} data, err = json.Marshal([]placement.GroupBundle{b1, b2, b3}) @@ -880,7 +885,7 @@ func (suite *ruleTestSuite) checkBundle(cluster *tests.TestCluster) { b4 := placement.GroupBundle{ Index: 4, Rules: []*placement.Rule{ - {ID: "bar", Index: 1, Override: true, Role: "voter", Count: 1}, + {ID: "bar", Index: 1, Override: true, Role: placement.Voter, Count: 1}, }, } data, err = json.Marshal(b4) @@ -908,7 +913,7 @@ func (suite *ruleTestSuite) checkBundle(cluster *tests.TestCluster) { ID: "rule-without-group-id-2", Index: 5, Rules: []*placement.Rule{ - {ID: "bar", Index: 1, Override: true, Role: "voter", Count: 1}, + {ID: "bar", Index: 1, Override: true, Role: placement.Voter, Count: 1}, }, } data, err = json.Marshal([]placement.GroupBundle{b1, b4, b5}) diff --git a/tools/pd-simulator/simulator/cases/diagnose_rule.go b/tools/pd-simulator/simulator/cases/diagnose_rule.go index b4b30fdc772..6cd76c854b7 100644 --- a/tools/pd-simulator/simulator/cases/diagnose_rule.go +++ b/tools/pd-simulator/simulator/cases/diagnose_rule.go @@ -46,8 +46,8 @@ func newRule1() *Case { }, LocationLabels: []string{"host"}, }, &placement.Rule{ - GroupID: "pd", - ID: "default", + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, StartKeyHex: "", EndKeyHex: "", Role: placement.Voter, From 9845c12d2a40ef3e28e0ddf9c803f33994102f81 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Mon, 20 Nov 2023 11:10:11 +0800 Subject: [PATCH 09/10] mcs: dynamic enable scheduling jobs (#7325) ref tikv/pd#5839, close tikv/pd#7375 Signed-off-by: Ryan Leung --- pkg/mcs/scheduling/server/cluster.go | 9 +- pkg/mcs/scheduling/server/server.go | 1 + pkg/schedule/coordinator.go | 6 +- .../schedulers/scheduler_controller.go | 11 +- server/api/middleware.go | 4 +- server/cluster/cluster.go | 103 +++++++------- server/cluster/cluster_test.go | 10 +- server/cluster/cluster_worker.go | 3 + server/cluster/scheduling_controller.go | 127 ++++++++++++++---- .../mcs/scheduling/config_test.go | 3 +- .../mcs/scheduling/server_test.go | 38 ++++++ tests/pdctl/hot/hot_test.go | 1 + tests/pdctl/keyspace/keyspace_group_test.go | 3 +- tests/server/api/operator_test.go | 46 +++---- tests/testutil.go | 3 + 15 files changed, 249 insertions(+), 119 deletions(-) diff --git a/pkg/mcs/scheduling/server/cluster.go b/pkg/mcs/scheduling/server/cluster.go index 028c2a12b37..ac15212553b 100644 --- a/pkg/mcs/scheduling/server/cluster.go +++ b/pkg/mcs/scheduling/server/cluster.go @@ -502,8 +502,8 @@ func (c *Cluster) collectClusterMetrics() { func (c *Cluster) resetMetrics() { statistics.Reset() - c.coordinator.GetSchedulersController().ResetSchedulerMetrics() - c.coordinator.ResetHotSpotMetrics() + schedulers.ResetSchedulerMetrics() + schedule.ResetHotSpotMetrics() c.resetClusterMetrics() } @@ -538,6 +538,11 @@ func (c *Cluster) StopBackgroundJobs() { c.wg.Wait() } +// IsBackgroundJobsRunning returns whether the background jobs are running. Only for test purpose. +func (c *Cluster) IsBackgroundJobsRunning() bool { + return c.running.Load() +} + // HandleRegionHeartbeat processes RegionInfo reports from client. func (c *Cluster) HandleRegionHeartbeat(region *core.RegionInfo) error { if err := c.processRegionHeartbeat(region); err != nil { diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 32b241fee91..c5b73dea5fc 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -405,6 +405,7 @@ func (s *Server) startServer() (err error) { // different service modes provided by the same pd-server binary serverInfo.WithLabelValues(versioninfo.PDReleaseVersion, versioninfo.PDGitHash).Set(float64(time.Now().Unix())) + s.serviceID = &discovery.ServiceRegistryEntry{ServiceAddr: s.cfg.AdvertiseListenAddr} uniqueName := s.cfg.GetAdvertiseListenAddr() uniqueID := memberutil.GenerateUniqueID(uniqueName) log.Info("joining primary election", zap.String("participant-name", uniqueName), zap.Uint64("participant-id", uniqueID)) diff --git a/pkg/schedule/coordinator.go b/pkg/schedule/coordinator.go index 8fb9ec8b286..6a02e68811d 100644 --- a/pkg/schedule/coordinator.go +++ b/pkg/schedule/coordinator.go @@ -88,8 +88,8 @@ type Coordinator struct { } // NewCoordinator creates a new Coordinator. -func NewCoordinator(ctx context.Context, cluster sche.ClusterInformer, hbStreams *hbstream.HeartbeatStreams) *Coordinator { - ctx, cancel := context.WithCancel(ctx) +func NewCoordinator(parentCtx context.Context, cluster sche.ClusterInformer, hbStreams *hbstream.HeartbeatStreams) *Coordinator { + ctx, cancel := context.WithCancel(parentCtx) opController := operator.NewController(ctx, cluster.GetBasicCluster(), cluster.GetSharedConfig(), hbStreams) schedulers := schedulers.NewController(ctx, cluster, cluster.GetStorage(), opController) checkers := checker.NewController(ctx, cluster, cluster.GetCheckerConfig(), cluster.GetRuleManager(), cluster.GetRegionLabeler(), opController) @@ -714,7 +714,7 @@ func collectHotMetrics(cluster sche.ClusterInformer, stores []*core.StoreInfo, t } // ResetHotSpotMetrics resets hot spot metrics. -func (c *Coordinator) ResetHotSpotMetrics() { +func ResetHotSpotMetrics() { hotSpotStatusGauge.Reset() schedulers.HotPendingSum.Reset() } diff --git a/pkg/schedule/schedulers/scheduler_controller.go b/pkg/schedule/schedulers/scheduler_controller.go index 79c8cbfbc92..5097a5f3f1c 100644 --- a/pkg/schedule/schedulers/scheduler_controller.go +++ b/pkg/schedule/schedulers/scheduler_controller.go @@ -38,8 +38,6 @@ const maxScheduleRetries = 10 var ( denySchedulersByLabelerCounter = labeler.LabelerEventCounter.WithLabelValues("schedulers", "deny") - rulesCntStatusGauge = ruleStatusGauge.WithLabelValues("rule_count") - groupsCntStatusGauge = ruleStatusGauge.WithLabelValues("group_count") ) // Controller is used to manage all schedulers. @@ -128,8 +126,8 @@ func (c *Controller) CollectSchedulerMetrics() { } ruleCnt := ruleMgr.GetRulesCount() groupCnt := ruleMgr.GetGroupsCount() - rulesCntStatusGauge.Set(float64(ruleCnt)) - groupsCntStatusGauge.Set(float64(groupCnt)) + ruleStatusGauge.WithLabelValues("rule_count").Set(float64(ruleCnt)) + ruleStatusGauge.WithLabelValues("group_count").Set(float64(groupCnt)) } func (c *Controller) isSchedulingHalted() bool { @@ -137,12 +135,9 @@ func (c *Controller) isSchedulingHalted() bool { } // ResetSchedulerMetrics resets metrics of all schedulers. -func (c *Controller) ResetSchedulerMetrics() { +func ResetSchedulerMetrics() { schedulerStatusGauge.Reset() ruleStatusGauge.Reset() - // create in map again - rulesCntStatusGauge = ruleStatusGauge.WithLabelValues("rule_count") - groupsCntStatusGauge = ruleStatusGauge.WithLabelValues("group_count") } // AddSchedulerHandler adds the HTTP handler for a scheduler. diff --git a/server/api/middleware.go b/server/api/middleware.go index cfeb0844dcf..627d7fecc92 100644 --- a/server/api/middleware.go +++ b/server/api/middleware.go @@ -114,7 +114,7 @@ func newAuditMiddleware(s *server.Server) negroni.Handler { return &auditMiddleware{svr: s} } -// ServeHTTP is used to implememt negroni.Handler for auditMiddleware +// ServeHTTP is used to implement negroni.Handler for auditMiddleware func (s *auditMiddleware) ServeHTTP(w http.ResponseWriter, r *http.Request, next http.HandlerFunc) { if !s.svr.GetServiceMiddlewarePersistOptions().IsAuditEnabled() { next(w, r) @@ -164,7 +164,7 @@ func newRateLimitMiddleware(s *server.Server) negroni.Handler { return &rateLimitMiddleware{svr: s} } -// ServeHTTP is used to implememt negroni.Handler for rateLimitMiddleware +// ServeHTTP is used to implement negroni.Handler for rateLimitMiddleware func (s *rateLimitMiddleware) ServeHTTP(w http.ResponseWriter, r *http.Request, next http.HandlerFunc) { if !s.svr.GetServiceMiddlewarePersistOptions().IsRateLimitEnabled() { next(w, r) diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index 3b50ae16d9b..0df543c96c2 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -41,15 +41,14 @@ import ( "github.com/tikv/pd/pkg/gctuner" "github.com/tikv/pd/pkg/id" "github.com/tikv/pd/pkg/keyspace" + "github.com/tikv/pd/pkg/mcs/discovery" mcsutils "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/memory" "github.com/tikv/pd/pkg/progress" "github.com/tikv/pd/pkg/replication" - "github.com/tikv/pd/pkg/schedule" sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/schedule/hbstream" "github.com/tikv/pd/pkg/schedule/labeler" - "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/statistics" @@ -262,7 +261,7 @@ func (c *RaftCluster) InitCluster( storage storage.Storage, basicCluster *core.BasicCluster, hbstreams *hbstream.HeartbeatStreams, - keyspaceGroupManager *keyspace.GroupManager) { + keyspaceGroupManager *keyspace.GroupManager) error { c.core, c.opt, c.storage, c.id = basicCluster, opt.(*config.PersistOptions), storage, id c.ctx, c.cancel = context.WithCancel(c.serverCtx) c.progressManager = progress.NewManager() @@ -271,7 +270,15 @@ func (c *RaftCluster) InitCluster( c.unsafeRecoveryController = unsaferecovery.NewController(c) c.keyspaceGroupManager = keyspaceGroupManager c.hbstreams = hbstreams - c.schedulingController = newSchedulingController(c.ctx) + c.ruleManager = placement.NewRuleManager(c.storage, c, c.GetOpts()) + if c.opt.IsPlacementRulesEnabled() { + err := c.ruleManager.Initialize(c.opt.GetMaxReplicas(), c.opt.GetLocationLabels(), c.opt.GetIsolationLevel()) + if err != nil { + return err + } + } + c.schedulingController = newSchedulingController(c.ctx, c.core, c.opt, c.ruleManager) + return nil } // Start starts a cluster. @@ -285,7 +292,10 @@ func (c *RaftCluster) Start(s Server) error { } c.isAPIServiceMode = s.IsAPIServiceMode() - c.InitCluster(s.GetAllocator(), s.GetPersistOptions(), s.GetStorage(), s.GetBasicCluster(), s.GetHBStreams(), s.GetKeyspaceGroupManager()) + err := c.InitCluster(s.GetAllocator(), s.GetPersistOptions(), s.GetStorage(), s.GetBasicCluster(), s.GetHBStreams(), s.GetKeyspaceGroupManager()) + if err != nil { + return err + } cluster, err := c.LoadClusterInfo() if err != nil { return err @@ -294,24 +304,21 @@ func (c *RaftCluster) Start(s Server) error { return nil } - c.ruleManager = placement.NewRuleManager(c.storage, c, c.GetOpts()) - if c.opt.IsPlacementRulesEnabled() { - err = c.ruleManager.Initialize(c.opt.GetMaxReplicas(), c.opt.GetLocationLabels(), c.opt.GetIsolationLevel()) - if err != nil { - return err - } - } c.regionLabeler, err = labeler.NewRegionLabeler(c.ctx, c.storage, regionLabelGCInterval) if err != nil { return err } + if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { + for _, store := range c.GetStores() { + storeID := store.GetID() + c.slowStat.ObserveSlowStoreStatus(storeID, store.IsSlow()) + } + } c.replicationMode, err = replication.NewReplicationModeManager(s.GetConfig().ReplicationMode, c.storage, cluster, s) if err != nil { return err } - - c.schedulingController.init(c.core, c.opt, schedule.NewCoordinator(c.ctx, c, c.GetHeartbeatStreams()), c.ruleManager) c.limiter = NewStoreLimiter(s.GetPersistOptions()) c.externalTS, err = c.storage.LoadExternalTS() if err != nil { @@ -326,6 +333,7 @@ func (c *RaftCluster) Start(s Server) error { return err } } + c.checkServices() c.wg.Add(9) go c.runServiceCheckJob() go c.runMetricsCollectionJob() @@ -341,25 +349,39 @@ func (c *RaftCluster) Start(s Server) error { return nil } -func (c *RaftCluster) runServiceCheckJob() { - defer logutil.LogPanic() - defer c.wg.Done() - - var once sync.Once +var once sync.Once - checkFn := func() { - if c.isAPIServiceMode { - once.Do(c.initSchedulers) - c.independentServices.Store(mcsutils.SchedulingServiceName, true) - return - } - if c.startSchedulingJobs() { +func (c *RaftCluster) checkServices() { + if c.isAPIServiceMode { + servers, err := discovery.Discover(c.etcdClient, strconv.FormatUint(c.clusterID, 10), mcsutils.SchedulingServiceName) + if err != nil || len(servers) == 0 { + c.startSchedulingJobs(c, c.hbstreams) c.independentServices.Delete(mcsutils.SchedulingServiceName) + } else { + if c.stopSchedulingJobs() { + c.initCoordinator(c.ctx, c, c.hbstreams) + } else { + once.Do(func() { + c.initCoordinator(c.ctx, c, c.hbstreams) + }) + } + c.independentServices.Store(mcsutils.SchedulingServiceName, true) } + } else { + c.startSchedulingJobs(c, c.hbstreams) + c.independentServices.Delete(mcsutils.SchedulingServiceName) } - checkFn() +} + +func (c *RaftCluster) runServiceCheckJob() { + defer logutil.LogPanic() + defer c.wg.Done() ticker := time.NewTicker(serviceCheckInterval) + failpoint.Inject("highFrequencyClusterJobs", func() { + ticker.Stop() + ticker = time.NewTicker(time.Millisecond * 10) + }) defer ticker.Stop() for { @@ -368,7 +390,7 @@ func (c *RaftCluster) runServiceCheckJob() { log.Info("service check job is stopped") return case <-ticker.C: - checkFn() + c.checkServices() } } } @@ -621,12 +643,7 @@ func (c *RaftCluster) LoadClusterInfo() (*RaftCluster, error) { zap.Int("count", c.core.GetTotalRegionCount()), zap.Duration("cost", time.Since(start)), ) - if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { - for _, store := range c.GetStores() { - storeID := store.GetID() - c.slowStat.ObserveSlowStoreStatus(storeID, store.IsSlow()) - } - } + return c, nil } @@ -724,7 +741,7 @@ func (c *RaftCluster) Stop() { c.Unlock() c.wg.Wait() - log.Info("raftcluster is stopped") + log.Info("raft cluster is stopped") } // IsRunning return if the cluster is running. @@ -749,16 +766,6 @@ func (c *RaftCluster) GetHeartbeatStreams() *hbstream.HeartbeatStreams { return c.hbstreams } -// GetCoordinator returns the coordinator. -func (c *RaftCluster) GetCoordinator() *schedule.Coordinator { - return c.coordinator -} - -// GetOperatorController returns the operator controller. -func (c *RaftCluster) GetOperatorController() *operator.Controller { - return c.coordinator.GetOperatorController() -} - // AllocID returns a global unique ID. func (c *RaftCluster) AllocID() (uint64, error) { return c.id.Alloc() @@ -997,7 +1004,7 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { // Save to cache if meta or leader is updated, or contains any down/pending peer. // Mark isNew if the region in cache does not have leader. isNew, saveKV, saveCache, needSync := regionGuide(region, origin) - if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) && !saveKV && !saveCache && !isNew { + if !saveKV && !saveCache && !isNew { // Due to some config changes need to update the region stats as well, // so we do some extra checks here. if hasRegionStats && c.regionStats.RegionStatsNeedUpdate(region) { @@ -1028,9 +1035,11 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { regionUpdateCacheEventCounter.Inc() } + isPrepared := true if !c.IsServiceIndependent(mcsutils.SchedulingServiceName) { - cluster.Collect(c, region, c.GetRegionStores(region), hasRegionStats, isNew, c.IsPrepared()) + isPrepared = c.IsPrepared() } + cluster.Collect(c, region, c.GetRegionStores(region), hasRegionStats, isNew, isPrepared) if c.storage != nil { // If there are concurrent heartbeats from the same region, the last write will win even if diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index d424ea98e7b..0e34ba4c743 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -2138,7 +2138,7 @@ func newTestRaftCluster( panic(err) } } - rc.schedulingController.init(basicCluster, opt, nil, rc.ruleManager) + rc.schedulingController = newSchedulingController(rc.ctx, rc.core, rc.opt, rc.ruleManager) return rc } @@ -2505,8 +2505,8 @@ func TestCollectMetricsConcurrent(t *testing.T) { controller.CollectSchedulerMetrics() co.GetCluster().(*RaftCluster).collectStatisticsMetrics() } - co.ResetHotSpotMetrics() - controller.ResetSchedulerMetrics() + schedule.ResetHotSpotMetrics() + schedulers.ResetSchedulerMetrics() co.GetCluster().(*RaftCluster).resetStatisticsMetrics() wg.Wait() } @@ -2551,8 +2551,8 @@ func TestCollectMetrics(t *testing.T) { s.Stats = nil } re.Equal(status1, status2) - co.ResetHotSpotMetrics() - controller.ResetSchedulerMetrics() + schedule.ResetHotSpotMetrics() + schedulers.ResetSchedulerMetrics() co.GetCluster().(*RaftCluster).resetStatisticsMetrics() } diff --git a/server/cluster/cluster_worker.go b/server/cluster/cluster_worker.go index 3a319c48196..74a445ad78e 100644 --- a/server/cluster/cluster_worker.go +++ b/server/cluster/cluster_worker.go @@ -38,6 +38,9 @@ func (c *RaftCluster) HandleRegionHeartbeat(region *core.RegionInfo) error { return err } + if c.IsServiceIndependent(mcsutils.SchedulingServiceName) { + return nil + } c.coordinator.GetOperatorController().Dispatch(region, operator.DispatchFromHeartBeat, c.coordinator.RecordOpStepWithTTL) return nil } diff --git a/server/cluster/scheduling_controller.go b/server/cluster/scheduling_controller.go index 1c41c830cf6..bb6470252b0 100644 --- a/server/cluster/scheduling_controller.go +++ b/server/cluster/scheduling_controller.go @@ -25,6 +25,10 @@ import ( "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/schedule" "github.com/tikv/pd/pkg/schedule/checker" + sc "github.com/tikv/pd/pkg/schedule/config" + sche "github.com/tikv/pd/pkg/schedule/core" + "github.com/tikv/pd/pkg/schedule/hbstream" + "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/pkg/schedule/scatter" "github.com/tikv/pd/pkg/schedule/schedulers" @@ -33,9 +37,9 @@ import ( "github.com/tikv/pd/pkg/statistics/buckets" "github.com/tikv/pd/pkg/statistics/utils" "github.com/tikv/pd/pkg/utils/logutil" - "github.com/tikv/pd/server/config" ) +// schedulingController is used to manage all schedulers and checkers. type schedulingController struct { parentCtx context.Context ctx context.Context @@ -43,7 +47,7 @@ type schedulingController struct { mu sync.RWMutex wg sync.WaitGroup *core.BasicCluster - opt *config.PersistOptions + opt sc.ConfProvider coordinator *schedule.Coordinator labelStats *statistics.LabelStatistics regionStats *statistics.RegionStatistics @@ -52,25 +56,22 @@ type schedulingController struct { running bool } -func newSchedulingController(parentCtx context.Context) *schedulingController { +// newSchedulingController creates a new scheduling controller. +func newSchedulingController(parentCtx context.Context, basicCluster *core.BasicCluster, opt sc.ConfProvider, ruleManager *placement.RuleManager) *schedulingController { ctx, cancel := context.WithCancel(parentCtx) return &schedulingController{ - parentCtx: parentCtx, - ctx: ctx, - cancel: cancel, - labelStats: statistics.NewLabelStatistics(), - hotStat: statistics.NewHotStat(parentCtx), - slowStat: statistics.NewSlowStat(parentCtx), + parentCtx: parentCtx, + ctx: ctx, + cancel: cancel, + BasicCluster: basicCluster, + opt: opt, + labelStats: statistics.NewLabelStatistics(), + hotStat: statistics.NewHotStat(parentCtx), + slowStat: statistics.NewSlowStat(parentCtx), + regionStats: statistics.NewRegionStatistics(basicCluster, opt, ruleManager), } } -func (sc *schedulingController) init(basicCluster *core.BasicCluster, opt *config.PersistOptions, coordinator *schedule.Coordinator, ruleManager *placement.RuleManager) { - sc.BasicCluster = basicCluster - sc.opt = opt - sc.coordinator = coordinator - sc.regionStats = statistics.NewRegionStatistics(basicCluster, opt, ruleManager) -} - func (sc *schedulingController) stopSchedulingJobs() bool { sc.mu.Lock() defer sc.mu.Unlock() @@ -85,20 +86,31 @@ func (sc *schedulingController) stopSchedulingJobs() bool { return true } -func (sc *schedulingController) startSchedulingJobs() bool { +func (sc *schedulingController) startSchedulingJobs(cluster sche.ClusterInformer, hbstreams *hbstream.HeartbeatStreams) { sc.mu.Lock() defer sc.mu.Unlock() if sc.running { - return false + return } - sc.ctx, sc.cancel = context.WithCancel(sc.parentCtx) + sc.initCoordinatorLocked(sc.parentCtx, cluster, hbstreams) sc.wg.Add(3) go sc.runCoordinator() go sc.runStatsBackgroundJobs() go sc.runSchedulingMetricsCollectionJob() sc.running = true log.Info("scheduling service is started") - return true +} + +func (sc *schedulingController) initCoordinator(ctx context.Context, cluster sche.ClusterInformer, hbstreams *hbstream.HeartbeatStreams) { + sc.mu.Lock() + defer sc.mu.Unlock() + sc.initCoordinatorLocked(ctx, cluster, hbstreams) + sc.coordinator.InitSchedulers(false) +} + +func (sc *schedulingController) initCoordinatorLocked(ctx context.Context, cluster sche.ClusterInformer, hbstreams *hbstream.HeartbeatStreams) { + sc.ctx, sc.cancel = context.WithCancel(ctx) + sc.coordinator = schedule.NewCoordinator(sc.ctx, cluster, hbstreams) } // runCoordinator runs the main scheduling loop. @@ -156,8 +168,8 @@ func (sc *schedulingController) runSchedulingMetricsCollectionJob() { func (sc *schedulingController) resetSchedulingMetrics() { statistics.Reset() - sc.coordinator.GetSchedulersController().ResetSchedulerMetrics() - sc.coordinator.ResetHotSpotMetrics() + schedulers.ResetSchedulerMetrics() + schedule.ResetHotSpotMetrics() sc.resetStatisticsMetrics() } @@ -287,88 +299,136 @@ func (sc *schedulingController) BucketsStats(degree int, regionIDs ...uint64) ma return sc.hotStat.BucketsStats(degree, regionIDs...) } +// GetCoordinator returns the coordinator. +func (sc *schedulingController) GetCoordinator() *schedule.Coordinator { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator +} + // GetPausedSchedulerDelayAt returns DelayAt of a paused scheduler func (sc *schedulingController) GetPausedSchedulerDelayAt(name string) (int64, error) { + sc.mu.RLock() + defer sc.mu.RUnlock() return sc.coordinator.GetSchedulersController().GetPausedSchedulerDelayAt(name) } // GetPausedSchedulerDelayUntil returns DelayUntil of a paused scheduler func (sc *schedulingController) GetPausedSchedulerDelayUntil(name string) (int64, error) { + sc.mu.RLock() + defer sc.mu.RUnlock() return sc.coordinator.GetSchedulersController().GetPausedSchedulerDelayUntil(name) } +// GetOperatorController returns the operator controller. +func (sc *schedulingController) GetOperatorController() *operator.Controller { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.coordinator.GetOperatorController() +} + // GetRegionScatterer returns the region scatter. func (sc *schedulingController) GetRegionScatterer() *scatter.RegionScatterer { + sc.mu.RLock() + defer sc.mu.RUnlock() return sc.coordinator.GetRegionScatterer() } // GetRegionSplitter returns the region splitter func (sc *schedulingController) GetRegionSplitter() *splitter.RegionSplitter { + sc.mu.RLock() + defer sc.mu.RUnlock() return sc.coordinator.GetRegionSplitter() } // GetMergeChecker returns merge checker. func (sc *schedulingController) GetMergeChecker() *checker.MergeChecker { + sc.mu.RLock() + defer sc.mu.RUnlock() return sc.coordinator.GetMergeChecker() } // GetRuleChecker returns rule checker. func (sc *schedulingController) GetRuleChecker() *checker.RuleChecker { + sc.mu.RLock() + defer sc.mu.RUnlock() return sc.coordinator.GetRuleChecker() } // GetSchedulers gets all schedulers. func (sc *schedulingController) GetSchedulers() []string { + sc.mu.RLock() + defer sc.mu.RUnlock() return sc.coordinator.GetSchedulersController().GetSchedulerNames() } // GetSchedulerHandlers gets all scheduler handlers. func (sc *schedulingController) GetSchedulerHandlers() map[string]http.Handler { + sc.mu.RLock() + defer sc.mu.RUnlock() return sc.coordinator.GetSchedulersController().GetSchedulerHandlers() } // AddSchedulerHandler adds a scheduler handler. func (sc *schedulingController) AddSchedulerHandler(scheduler schedulers.Scheduler, args ...string) error { + sc.mu.RLock() + defer sc.mu.RUnlock() return sc.coordinator.GetSchedulersController().AddSchedulerHandler(scheduler, args...) } // RemoveSchedulerHandler removes a scheduler handler. func (sc *schedulingController) RemoveSchedulerHandler(name string) error { + sc.mu.RLock() + defer sc.mu.RUnlock() return sc.coordinator.GetSchedulersController().RemoveSchedulerHandler(name) } // AddScheduler adds a scheduler. func (sc *schedulingController) AddScheduler(scheduler schedulers.Scheduler, args ...string) error { + sc.mu.RLock() + defer sc.mu.RUnlock() return sc.coordinator.GetSchedulersController().AddScheduler(scheduler, args...) } // RemoveScheduler removes a scheduler. func (sc *schedulingController) RemoveScheduler(name string) error { + sc.mu.RLock() + defer sc.mu.RUnlock() return sc.coordinator.GetSchedulersController().RemoveScheduler(name) } // PauseOrResumeScheduler pauses or resumes a scheduler. func (sc *schedulingController) PauseOrResumeScheduler(name string, t int64) error { + sc.mu.RLock() + defer sc.mu.RUnlock() return sc.coordinator.GetSchedulersController().PauseOrResumeScheduler(name, t) } // PauseOrResumeChecker pauses or resumes checker. func (sc *schedulingController) PauseOrResumeChecker(name string, t int64) error { + sc.mu.RLock() + defer sc.mu.RUnlock() return sc.coordinator.PauseOrResumeChecker(name, t) } // AddSuspectRegions adds regions to suspect list. func (sc *schedulingController) AddSuspectRegions(regionIDs ...uint64) { + sc.mu.RLock() + defer sc.mu.RUnlock() sc.coordinator.GetCheckerController().AddSuspectRegions(regionIDs...) } // GetSuspectRegions gets all suspect regions. func (sc *schedulingController) GetSuspectRegions() []uint64 { + sc.mu.RLock() + defer sc.mu.RUnlock() return sc.coordinator.GetCheckerController().GetSuspectRegions() } // RemoveSuspectRegion removes region from suspect list. func (sc *schedulingController) RemoveSuspectRegion(id uint64) { + sc.mu.RLock() + defer sc.mu.RUnlock() sc.coordinator.GetCheckerController().RemoveSuspectRegion(id) } @@ -376,11 +436,15 @@ func (sc *schedulingController) RemoveSuspectRegion(id uint64) { // it would return value and true if pop success, or return empty [][2][]byte and false // if suspectKeyRanges couldn't pop keyRange group. func (sc *schedulingController) PopOneSuspectKeyRange() ([2][]byte, bool) { + sc.mu.RLock() + defer sc.mu.RUnlock() return sc.coordinator.GetCheckerController().PopOneSuspectKeyRange() } // ClearSuspectKeyRanges clears the suspect keyRanges, only for unit test func (sc *schedulingController) ClearSuspectKeyRanges() { + sc.mu.RLock() + defer sc.mu.RUnlock() sc.coordinator.GetCheckerController().ClearSuspectKeyRanges() } @@ -388,14 +452,14 @@ func (sc *schedulingController) ClearSuspectKeyRanges() { // The instance of each keyRange is like following format: // [2][]byte: start key/end key func (sc *schedulingController) AddSuspectKeyRange(start, end []byte) { + sc.mu.RLock() + defer sc.mu.RUnlock() sc.coordinator.GetCheckerController().AddSuspectKeyRange(start, end) } -func (sc *schedulingController) initSchedulers() { - sc.coordinator.InitSchedulers(false) -} - func (sc *schedulingController) getEvictLeaderStores() (evictStores []uint64) { + sc.mu.RLock() + defer sc.mu.RUnlock() if sc.coordinator == nil { return nil } @@ -415,10 +479,21 @@ func (sc *schedulingController) getEvictLeaderStores() (evictStores []uint64) { // IsPrepared return true if the prepare checker is ready. func (sc *schedulingController) IsPrepared() bool { + sc.mu.RLock() + defer sc.mu.RUnlock() return sc.coordinator.GetPrepareChecker().IsPrepared() } // SetPrepared set the prepare check to prepared. Only for test purpose. func (sc *schedulingController) SetPrepared() { + sc.mu.RLock() + defer sc.mu.RUnlock() sc.coordinator.GetPrepareChecker().SetPrepared() } + +// IsSchedulingControllerRunning returns whether the scheduling controller is running. Only for test purpose. +func (sc *schedulingController) IsSchedulingControllerRunning() bool { + sc.mu.RLock() + defer sc.mu.RUnlock() + return sc.running +} diff --git a/tests/integrations/mcs/scheduling/config_test.go b/tests/integrations/mcs/scheduling/config_test.go index 06d73caf130..ccf7cdaf48c 100644 --- a/tests/integrations/mcs/scheduling/config_test.go +++ b/tests/integrations/mcs/scheduling/config_test.go @@ -149,8 +149,9 @@ func (suite *configTestSuite) TestSchedulerConfigWatch() { ) re.NoError(err) // Get all default scheduler names. - var namesFromAPIServer, _, _ = suite.pdLeaderServer.GetRaftCluster().GetStorage().LoadAllSchedulerConfigs() + var namesFromAPIServer []string testutil.Eventually(re, func() bool { + namesFromAPIServer, _, _ = suite.pdLeaderServer.GetRaftCluster().GetStorage().LoadAllSchedulerConfigs() return len(namesFromAPIServer) == len(sc.DefaultSchedulers) }) // Check all default schedulers' configs. diff --git a/tests/integrations/mcs/scheduling/server_test.go b/tests/integrations/mcs/scheduling/server_test.go index a359e1d023a..41c00b8e9b4 100644 --- a/tests/integrations/mcs/scheduling/server_test.go +++ b/tests/integrations/mcs/scheduling/server_test.go @@ -195,6 +195,44 @@ func (suite *serverTestSuite) TestForwardStoreHeartbeat() { }) } +func (suite *serverTestSuite) TestDynamicSwitch() { + re := suite.Require() + // API server will execute scheduling jobs since there is no scheduler server. + testutil.Eventually(re, func() bool { + return suite.pdLeader.GetServer().GetRaftCluster().IsSchedulingControllerRunning() + }) + + tc, err := tests.NewTestSchedulingCluster(suite.ctx, 1, suite.backendEndpoints) + re.NoError(err) + defer tc.Destroy() + tc.WaitForPrimaryServing(re) + // After scheduling server is started, API server will not execute scheduling jobs. + testutil.Eventually(re, func() bool { + return !suite.pdLeader.GetServer().GetRaftCluster().IsSchedulingControllerRunning() + }) + // Scheduling server is responsible for executing scheduling jobs. + testutil.Eventually(re, func() bool { + return tc.GetPrimaryServer().GetCluster().IsBackgroundJobsRunning() + }) + tc.GetPrimaryServer().Close() + // Stop scheduling server. API server will execute scheduling jobs again. + testutil.Eventually(re, func() bool { + return suite.pdLeader.GetServer().GetRaftCluster().IsSchedulingControllerRunning() + }) + tc1, err := tests.NewTestSchedulingCluster(suite.ctx, 1, suite.backendEndpoints) + re.NoError(err) + defer tc1.Destroy() + tc1.WaitForPrimaryServing(re) + // After scheduling server is started, API server will not execute scheduling jobs. + testutil.Eventually(re, func() bool { + return !suite.pdLeader.GetServer().GetRaftCluster().IsSchedulingControllerRunning() + }) + // Scheduling server is responsible for executing scheduling jobs again. + testutil.Eventually(re, func() bool { + return tc1.GetPrimaryServer().GetCluster().IsBackgroundJobsRunning() + }) +} + func (suite *serverTestSuite) TestSchedulerSync() { re := suite.Require() tc, err := tests.NewTestSchedulingCluster(suite.ctx, 1, suite.backendEndpoints) diff --git a/tests/pdctl/hot/hot_test.go b/tests/pdctl/hot/hot_test.go index ac9bb3d83bf..8cab8ea9ab2 100644 --- a/tests/pdctl/hot/hot_test.go +++ b/tests/pdctl/hot/hot_test.go @@ -349,6 +349,7 @@ func (suite *hotTestSuite) checkHotWithoutHotPeer(cluster *tests.TestCluster) { hotRegion := statistics.StoreHotPeersInfos{} re.NoError(err) re.NoError(json.Unmarshal(output, &hotRegion)) + re.NotNil(hotRegion.AsPeer[1]) re.Equal(hotRegion.AsPeer[1].Count, 0) re.Equal(0.0, hotRegion.AsPeer[1].TotalBytesRate) re.Equal(load, hotRegion.AsPeer[1].StoreByteRate) diff --git a/tests/pdctl/keyspace/keyspace_group_test.go b/tests/pdctl/keyspace/keyspace_group_test.go index 0b09550d967..cbfdf1d099a 100644 --- a/tests/pdctl/keyspace/keyspace_group_test.go +++ b/tests/pdctl/keyspace/keyspace_group_test.go @@ -503,7 +503,7 @@ func TestShowKeyspaceGroupPrimary(t *testing.T) { for i := 0; i < 10; i++ { keyspaces = append(keyspaces, fmt.Sprintf("keyspace_%d", i)) } - tc, err := tests.NewTestAPICluster(ctx, 3, func(conf *config.Config, serverName string) { + tc, err := tests.NewTestAPICluster(ctx, 1, func(conf *config.Config, serverName string) { conf.Keyspace.PreAlloc = keyspaces }) re.NoError(err) @@ -528,7 +528,6 @@ func TestShowKeyspaceGroupPrimary(t *testing.T) { args := []string{"-u", pdAddr, "keyspace-group"} output, err := pdctl.ExecuteCommand(cmd, append(args, defaultKeyspaceGroupID)...) re.NoError(err) - err = json.Unmarshal(output, &keyspaceGroup) re.NoError(err) re.Equal(utils.DefaultKeyspaceGroupID, keyspaceGroup.ID) diff --git a/tests/server/api/operator_test.go b/tests/server/api/operator_test.go index e36ead7e44d..14b8618f6a6 100644 --- a/tests/server/api/operator_test.go +++ b/tests/server/api/operator_test.go @@ -27,7 +27,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/stretchr/testify/suite" "github.com/tikv/pd/pkg/core" - pdoperator "github.com/tikv/pd/pkg/schedule/operator" + "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/placement" tu "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/server/config" @@ -285,10 +285,10 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3]}`), expectedError: nil, expectSteps: convertStepsToStr([]string{ - pdoperator.AddLearner{ToStore: 3, PeerID: 1}.String(), - pdoperator.PromoteLearner{ToStore: 3, PeerID: 1}.String(), - pdoperator.TransferLeader{FromStore: 1, ToStore: 2}.String(), - pdoperator.RemovePeer{FromStore: 1, PeerID: 1}.String(), + operator.AddLearner{ToStore: 3, PeerID: 1}.String(), + operator.PromoteLearner{ToStore: 3, PeerID: 1}.String(), + operator.TransferLeader{FromStore: 1, ToStore: 2}.String(), + operator.RemovePeer{FromStore: 1, PeerID: 1}.String(), }), }, { @@ -297,11 +297,11 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3], "peer_roles":["follower", "leader"]}`), expectedError: nil, expectSteps: convertStepsToStr([]string{ - pdoperator.AddLearner{ToStore: 3, PeerID: 2}.String(), - pdoperator.PromoteLearner{ToStore: 3, PeerID: 2}.String(), - pdoperator.TransferLeader{FromStore: 1, ToStore: 2}.String(), - pdoperator.RemovePeer{FromStore: 1, PeerID: 2}.String(), - pdoperator.TransferLeader{FromStore: 2, ToStore: 3}.String(), + operator.AddLearner{ToStore: 3, PeerID: 2}.String(), + operator.PromoteLearner{ToStore: 3, PeerID: 2}.String(), + operator.TransferLeader{FromStore: 1, ToStore: 2}.String(), + operator.RemovePeer{FromStore: 1, PeerID: 2}.String(), + operator.TransferLeader{FromStore: 2, ToStore: 3}.String(), }), }, { @@ -316,11 +316,11 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te placementRuleEnable: true, input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3], "peer_roles":["follower", "leader"]}`), expectSteps: convertStepsToStr([]string{ - pdoperator.AddLearner{ToStore: 3, PeerID: 3}.String(), - pdoperator.PromoteLearner{ToStore: 3, PeerID: 3}.String(), - pdoperator.TransferLeader{FromStore: 1, ToStore: 2}.String(), - pdoperator.RemovePeer{FromStore: 1, PeerID: 1}.String(), - pdoperator.TransferLeader{FromStore: 2, ToStore: 3}.String(), + operator.AddLearner{ToStore: 3, PeerID: 3}.String(), + operator.PromoteLearner{ToStore: 3, PeerID: 3}.String(), + operator.TransferLeader{FromStore: 1, ToStore: 2}.String(), + operator.RemovePeer{FromStore: 1, PeerID: 1}.String(), + operator.TransferLeader{FromStore: 2, ToStore: 3}.String(), }), }, { @@ -377,10 +377,10 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3], "peer_roles":["follower", "leader"]}`), expectedError: nil, expectSteps: convertStepsToStr([]string{ - pdoperator.AddLearner{ToStore: 3, PeerID: 5}.String(), - pdoperator.PromoteLearner{ToStore: 3, PeerID: 5}.String(), - pdoperator.TransferLeader{FromStore: 1, ToStore: 3}.String(), - pdoperator.RemovePeer{FromStore: 1, PeerID: 1}.String(), + operator.AddLearner{ToStore: 3, PeerID: 5}.String(), + operator.PromoteLearner{ToStore: 3, PeerID: 5}.String(), + operator.TransferLeader{FromStore: 1, ToStore: 3}.String(), + operator.RemovePeer{FromStore: 1, PeerID: 1}.String(), }), }, { @@ -417,10 +417,10 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3], "peer_roles":["leader", "follower"]}`), expectedError: nil, expectSteps: convertStepsToStr([]string{ - pdoperator.AddLearner{ToStore: 3, PeerID: 6}.String(), - pdoperator.PromoteLearner{ToStore: 3, PeerID: 6}.String(), - pdoperator.TransferLeader{FromStore: 1, ToStore: 2}.String(), - pdoperator.RemovePeer{FromStore: 1, PeerID: 1}.String(), + operator.AddLearner{ToStore: 3, PeerID: 6}.String(), + operator.PromoteLearner{ToStore: 3, PeerID: 6}.String(), + operator.TransferLeader{FromStore: 1, ToStore: 2}.String(), + operator.RemovePeer{FromStore: 1, PeerID: 1}.String(), }), }, } diff --git a/tests/testutil.go b/tests/testutil.go index 059a152f06f..2ccf6fb76be 100644 --- a/tests/testutil.go +++ b/tests/testutil.go @@ -299,6 +299,7 @@ func (s *SchedulingTestEnvironment) startCluster(m mode) { leaderServer := s.cluster.GetServer(s.cluster.GetLeader()) re.NoError(leaderServer.BootstrapCluster()) case apiMode: + re.NoError(failpoint.Enable("github.com/tikv/pd/server/cluster/highFrequencyClusterJobs", `return(true)`)) s.cluster, err = NewTestAPICluster(s.ctx, 1, s.opts...) re.NoError(err) err = s.cluster.RunInitialServers() @@ -306,11 +307,13 @@ func (s *SchedulingTestEnvironment) startCluster(m mode) { re.NotEmpty(s.cluster.WaitLeader()) leaderServer := s.cluster.GetServer(s.cluster.GetLeader()) re.NoError(leaderServer.BootstrapCluster()) + leaderServer.GetRaftCluster().SetPrepared() // start scheduling cluster tc, err := NewTestSchedulingCluster(s.ctx, 1, leaderServer.GetAddr()) re.NoError(err) tc.WaitForPrimaryServing(re) s.cluster.SetSchedulingCluster(tc) time.Sleep(200 * time.Millisecond) // wait for scheduling cluster to update member + re.NoError(failpoint.Disable("github.com/tikv/pd/server/cluster/highFrequencyClusterJobs")) } } From 49b32511c19127c3e0db772dfaace833caa1fb6e Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Mon, 20 Nov 2023 14:05:41 +0800 Subject: [PATCH 10/10] *: fix data race of `TestRaftClusterMultipleRestart` (#7392) close tikv/pd#7391 Signed-off-by: Ryan Leung --- server/cluster/cluster.go | 12 +++--- server/cluster/cluster_test.go | 61 +++++++++++++-------------- server/cluster/cluster_worker_test.go | 5 +-- server/server.go | 2 +- tests/server/cluster/cluster_test.go | 12 +++--- 5 files changed, 45 insertions(+), 47 deletions(-) diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index 0df543c96c2..3b826d8d33e 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -178,7 +178,7 @@ type Status struct { } // NewRaftCluster create a new cluster. -func NewRaftCluster(ctx context.Context, clusterID uint64, regionSyncer *syncer.RegionSyncer, etcdClient *clientv3.Client, +func NewRaftCluster(ctx context.Context, clusterID uint64, basicCluster *core.BasicCluster, storage storage.Storage, regionSyncer *syncer.RegionSyncer, etcdClient *clientv3.Client, httpClient *http.Client) *RaftCluster { return &RaftCluster{ serverCtx: ctx, @@ -186,6 +186,8 @@ func NewRaftCluster(ctx context.Context, clusterID uint64, regionSyncer *syncer. regionSyncer: regionSyncer, httpClient: httpClient, etcdClient: etcdClient, + core: basicCluster, + storage: storage, } } @@ -258,11 +260,9 @@ func (c *RaftCluster) loadBootstrapTime() (time.Time, error) { func (c *RaftCluster) InitCluster( id id.Allocator, opt sc.ConfProvider, - storage storage.Storage, - basicCluster *core.BasicCluster, hbstreams *hbstream.HeartbeatStreams, keyspaceGroupManager *keyspace.GroupManager) error { - c.core, c.opt, c.storage, c.id = basicCluster, opt.(*config.PersistOptions), storage, id + c.opt, c.id = opt.(*config.PersistOptions), id c.ctx, c.cancel = context.WithCancel(c.serverCtx) c.progressManager = progress.NewManager() c.changedRegions = make(chan *core.RegionInfo, defaultChangedRegionsLimit) @@ -292,7 +292,7 @@ func (c *RaftCluster) Start(s Server) error { } c.isAPIServiceMode = s.IsAPIServiceMode() - err := c.InitCluster(s.GetAllocator(), s.GetPersistOptions(), s.GetStorage(), s.GetBasicCluster(), s.GetHBStreams(), s.GetKeyspaceGroupManager()) + err := c.InitCluster(s.GetAllocator(), s.GetPersistOptions(), s.GetHBStreams(), s.GetKeyspaceGroupManager()) if err != nil { return err } @@ -380,7 +380,7 @@ func (c *RaftCluster) runServiceCheckJob() { ticker := time.NewTicker(serviceCheckInterval) failpoint.Inject("highFrequencyClusterJobs", func() { ticker.Stop() - ticker = time.NewTicker(time.Millisecond * 10) + ticker = time.NewTicker(time.Millisecond) }) defer ticker.Stop() diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index 0e34ba4c743..e9ce35dfb54 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -68,7 +68,7 @@ func TestStoreHeartbeat(t *testing.T) { _, opt, err := newTestScheduleConfig() opt.GetScheduleConfig().StoreLimitVersion = "v2" re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) n, np := uint64(3), uint64(3) stores := newTestStores(n, "2.0.0") @@ -201,7 +201,7 @@ func TestFilterUnhealthyStore(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) stores := newTestStores(3, "2.0.0") req := &pdpb.StoreHeartbeatRequest{} @@ -239,7 +239,7 @@ func TestSetOfflineStore(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) cluster.ruleManager = placement.NewRuleManager(storage.NewStorageWithMemoryBackend(), cluster, cluster.GetOpts()) if opt.IsPlacementRulesEnabled() { @@ -305,7 +305,7 @@ func TestSetOfflineWithReplica(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) // Put 4 stores. @@ -344,7 +344,7 @@ func TestSetOfflineStoreWithEvictLeader(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) opt.SetMaxReplicas(1) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) // Put 3 stores. @@ -371,7 +371,7 @@ func TestForceBuryStore(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) // Put 2 stores. stores := newTestStores(2, "5.3.0") stores[1] = stores[1].Clone(core.SetLastHeartbeatTS(time.Now())) @@ -390,7 +390,7 @@ func TestReuseAddress(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) // Put 4 stores. for _, store := range newTestStores(4, "2.0.0") { @@ -436,7 +436,7 @@ func TestUpStore(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) cluster.ruleManager = placement.NewRuleManager(storage.NewStorageWithMemoryBackend(), cluster, cluster.GetOpts()) if opt.IsPlacementRulesEnabled() { @@ -481,7 +481,7 @@ func TestRemovingProcess(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) cluster.SetPrepared() @@ -539,7 +539,7 @@ func TestDeleteStoreUpdatesClusterVersion(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) cluster.ruleManager = placement.NewRuleManager(storage.NewStorageWithMemoryBackend(), cluster, cluster.GetOpts()) if opt.IsPlacementRulesEnabled() { @@ -574,7 +574,7 @@ func TestStoreClusterVersion(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) stores := newTestStores(3, "5.0.0") s1, s2, s3 := stores[0].GetMeta(), stores[1].GetMeta(), stores[2].GetMeta() s1.Version = "5.0.1" @@ -599,7 +599,7 @@ func TestRegionHeartbeatHotStat(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) newTestStores(4, "2.0.0") peers := []*metapb.Peer{ @@ -661,7 +661,7 @@ func TestBucketHeartbeat(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) // case1: region is not exist @@ -718,7 +718,7 @@ func TestRegionHeartbeat(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) n, np := uint64(3), uint64(3) cluster.wg.Add(1) @@ -963,7 +963,7 @@ func TestRegionFlowChanged(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) regions := []*core.RegionInfo{core.NewTestRegionInfo(1, 1, []byte{}, []byte{})} processRegions := func(regions []*core.RegionInfo) { @@ -988,7 +988,7 @@ func TestRegionSizeChanged(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) cluster.regionStats = statistics.NewRegionStatistics( cluster.GetBasicCluster(), @@ -1034,7 +1034,7 @@ func TestConcurrentReportBucket(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) regions := []*core.RegionInfo{core.NewTestRegionInfo(1, 1, []byte{}, []byte{})} @@ -1064,7 +1064,7 @@ func TestConcurrentRegionHeartbeat(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) regions := []*core.RegionInfo{core.NewTestRegionInfo(1, 1, []byte{}, []byte{})} @@ -1105,7 +1105,7 @@ func TestRegionLabelIsolationLevel(t *testing.T) { cfg.LocationLabels = []string{"zone"} opt.SetReplicationConfig(cfg) re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) for i := uint64(1); i <= 4; i++ { var labels []*metapb.StoreLabel @@ -1184,7 +1184,7 @@ func TestHeartbeatSplit(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) // 1: [nil, nil) @@ -1228,7 +1228,7 @@ func TestRegionSplitAndMerge(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) regions := []*core.RegionInfo{core.NewTestRegionInfo(1, 1, []byte{}, []byte{})} @@ -1266,7 +1266,7 @@ func TestOfflineAndMerge(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) cluster.ruleManager = placement.NewRuleManager(storage.NewStorageWithMemoryBackend(), cluster, cluster.GetOpts()) if opt.IsPlacementRulesEnabled() { @@ -1634,7 +1634,7 @@ func TestCalculateStoreSize1(t *testing.T) { cfg := opt.GetReplicationConfig() cfg.EnablePlacementRules = true opt.SetReplicationConfig(cfg) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) cluster.regionStats = statistics.NewRegionStatistics( cluster.GetBasicCluster(), @@ -1720,7 +1720,7 @@ func TestCalculateStoreSize2(t *testing.T) { cfg.EnablePlacementRules = true opt.SetReplicationConfig(cfg) opt.SetMaxReplicas(3) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil) cluster.regionStats = statistics.NewRegionStatistics( cluster.GetBasicCluster(), @@ -1829,7 +1829,7 @@ func Test(t *testing.T) { regions := newTestRegions(n, n, np) _, opts, err := newTestScheduleConfig() re.NoError(err) - tc := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opts, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + tc := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opts, storage.NewStorageWithMemoryBackend()) cache := tc.core for i := uint64(0); i < n; i++ { @@ -1943,7 +1943,7 @@ func TestAwakenStore(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) n := uint64(3) stores := newTestStores(n, "6.5.0") re.True(stores[0].NeedAwakenStore()) @@ -1997,7 +1997,7 @@ func TestUpdateAndDeleteLabel(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) stores := newTestStores(1, "6.5.1") for _, store := range stores { re.NoError(cluster.PutStore(store.GetMeta())) @@ -2115,7 +2115,7 @@ func newTestScheduleConfig() (*sc.ScheduleConfig, *config.PersistOptions, error) } func newTestCluster(ctx context.Context, opt *config.PersistOptions) *testCluster { - rc := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + rc := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) storage := storage.NewStorageWithMemoryBackend() rc.regionLabeler, _ = labeler.NewRegionLabeler(ctx, storage, time.Second*5) @@ -2127,10 +2127,9 @@ func newTestRaftCluster( id id.Allocator, opt *config.PersistOptions, s storage.Storage, - basicCluster *core.BasicCluster, ) *RaftCluster { - rc := &RaftCluster{serverCtx: ctx} - rc.InitCluster(id, opt, s, basicCluster, nil, nil) + rc := &RaftCluster{serverCtx: ctx, core: core.NewBasicCluster(), storage: s} + rc.InitCluster(id, opt, nil, nil) rc.ruleManager = placement.NewRuleManager(storage.NewStorageWithMemoryBackend(), rc, opt) if opt.IsPlacementRulesEnabled() { err := rc.ruleManager.Initialize(opt.GetMaxReplicas(), opt.GetLocationLabels(), opt.GetIsolationLevel()) diff --git a/server/cluster/cluster_worker_test.go b/server/cluster/cluster_worker_test.go index b376b38edc3..afc979e2b97 100644 --- a/server/cluster/cluster_worker_test.go +++ b/server/cluster/cluster_worker_test.go @@ -21,7 +21,6 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/stretchr/testify/require" - "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/mock/mockid" "github.com/tikv/pd/pkg/storage" ) @@ -33,7 +32,7 @@ func TestReportSplit(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) left := &metapb.Region{Id: 1, StartKey: []byte("a"), EndKey: []byte("b")} right := &metapb.Region{Id: 2, StartKey: []byte("b"), EndKey: []byte("c")} _, err = cluster.HandleReportSplit(&pdpb.ReportSplitRequest{Left: left, Right: right}) @@ -49,7 +48,7 @@ func TestReportBatchSplit(t *testing.T) { _, opt, err := newTestScheduleConfig() re.NoError(err) - cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend()) regions := []*metapb.Region{ {Id: 1, StartKey: []byte(""), EndKey: []byte("a")}, {Id: 2, StartKey: []byte("a"), EndKey: []byte("b")}, diff --git a/server/server.go b/server/server.go index d4b40af9c18..76893c24388 100644 --- a/server/server.go +++ b/server/server.go @@ -473,7 +473,7 @@ func (s *Server) startServer(ctx context.Context) error { s.gcSafePointManager = gc.NewSafePointManager(s.storage, s.cfg.PDServerCfg) s.basicCluster = core.NewBasicCluster() - s.cluster = cluster.NewRaftCluster(ctx, s.clusterID, syncer.NewRegionSyncer(s), s.client, s.httpClient) + s.cluster = cluster.NewRaftCluster(ctx, s.clusterID, s.GetBasicCluster(), s.GetStorage(), syncer.NewRegionSyncer(s), s.client, s.httpClient) keyspaceIDAllocator := id.NewAllocator(&id.AllocatorParams{ Client: s.client, RootPath: s.rootPath, diff --git a/tests/server/cluster/cluster_test.go b/tests/server/cluster/cluster_test.go index b7a428e3683..ccb469c04cb 100644 --- a/tests/server/cluster/cluster_test.go +++ b/tests/server/cluster/cluster_test.go @@ -812,10 +812,10 @@ func TestLoadClusterInfo(t *testing.T) { tc.WaitLeader() leaderServer := tc.GetLeaderServer() svr := leaderServer.GetServer() - rc := cluster.NewRaftCluster(ctx, svr.ClusterID(), syncer.NewRegionSyncer(svr), svr.GetClient(), svr.GetHTTPClient()) + rc := cluster.NewRaftCluster(ctx, svr.ClusterID(), svr.GetBasicCluster(), svr.GetStorage(), syncer.NewRegionSyncer(svr), svr.GetClient(), svr.GetHTTPClient()) // Cluster is not bootstrapped. - rc.InitCluster(svr.GetAllocator(), svr.GetPersistOptions(), svr.GetStorage(), svr.GetBasicCluster(), svr.GetHBStreams(), svr.GetKeyspaceGroupManager()) + rc.InitCluster(svr.GetAllocator(), svr.GetPersistOptions(), svr.GetHBStreams(), svr.GetKeyspaceGroupManager()) raftCluster, err := rc.LoadClusterInfo() re.NoError(err) re.Nil(raftCluster) @@ -852,8 +852,8 @@ func TestLoadClusterInfo(t *testing.T) { } re.NoError(testStorage.Flush()) - raftCluster = cluster.NewRaftCluster(ctx, svr.ClusterID(), syncer.NewRegionSyncer(svr), svr.GetClient(), svr.GetHTTPClient()) - raftCluster.InitCluster(mockid.NewIDAllocator(), svr.GetPersistOptions(), testStorage, basicCluster, svr.GetHBStreams(), svr.GetKeyspaceGroupManager()) + raftCluster = cluster.NewRaftCluster(ctx, svr.ClusterID(), basicCluster, testStorage, syncer.NewRegionSyncer(svr), svr.GetClient(), svr.GetHTTPClient()) + raftCluster.InitCluster(mockid.NewIDAllocator(), svr.GetPersistOptions(), svr.GetHBStreams(), svr.GetKeyspaceGroupManager()) raftCluster, err = raftCluster.LoadClusterInfo() re.NoError(err) re.NotNil(raftCluster) @@ -1560,8 +1560,8 @@ func TestTransferLeaderBack(t *testing.T) { tc.WaitLeader() leaderServer := tc.GetLeaderServer() svr := leaderServer.GetServer() - rc := cluster.NewRaftCluster(ctx, svr.ClusterID(), syncer.NewRegionSyncer(svr), svr.GetClient(), svr.GetHTTPClient()) - rc.InitCluster(svr.GetAllocator(), svr.GetPersistOptions(), svr.GetStorage(), svr.GetBasicCluster(), svr.GetHBStreams(), svr.GetKeyspaceGroupManager()) + rc := cluster.NewRaftCluster(ctx, svr.ClusterID(), svr.GetBasicCluster(), svr.GetStorage(), syncer.NewRegionSyncer(svr), svr.GetClient(), svr.GetHTTPClient()) + rc.InitCluster(svr.GetAllocator(), svr.GetPersistOptions(), svr.GetHBStreams(), svr.GetKeyspaceGroupManager()) storage := rc.GetStorage() meta := &metapb.Cluster{Id: 123} re.NoError(storage.SaveMeta(meta))