Skip to content

Commit

Permalink
slow node detection: enable evict-slow-trend on raft-kv2. (#6945)
Browse files Browse the repository at this point in the history
close #6868, close tikv/tikv#15267, ref tikv/tikv#15271

This pr contains:
+ Enable `evict-slow-trend` scheduler on `raft-kv2` as default.
+ Optimize the detection strategy on the perception of Network I/O delays on TiKV nodes.

Signed-off-by: lucasliang <nkcs_lykx@hotmail.com>
  • Loading branch information
LykxSassinator authored Aug 23, 2023
1 parent ebceb83 commit 1743552
Show file tree
Hide file tree
Showing 12 changed files with 315 additions and 79 deletions.
10 changes: 10 additions & 0 deletions pkg/mcs/scheduling/server/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -606,11 +606,21 @@ func (o *PersistConfig) GetRegionMaxKeys() uint64 {
return o.GetStoreConfig().GetRegionMaxKeys()
}

// IsSynced returns true if the cluster config is synced.
func (o *PersistConfig) IsSynced() bool {
return o.GetStoreConfig().IsSynced()
}

// IsEnableRegionBucket return true if the region bucket is enabled.
func (o *PersistConfig) IsEnableRegionBucket() bool {
return o.GetStoreConfig().IsEnableRegionBucket()
}

// IsRaftKV2 returns the whether the cluster use `raft-kv2` engine.
func (o *PersistConfig) IsRaftKV2() bool {
return o.GetStoreConfig().IsRaftKV2()
}

// TODO: implement the following methods

// AddSchedulerCfg adds the scheduler configurations.
Expand Down
2 changes: 2 additions & 0 deletions pkg/schedule/config/config_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,5 +139,7 @@ type StoreConfigProvider interface {
GetRegionMaxKeys() uint64
CheckRegionSize(uint64, uint64) error
CheckRegionKeys(uint64, uint64) error
IsSynced() bool
IsEnableRegionBucket() bool
IsRaftKV2() bool
}
18 changes: 18 additions & 0 deletions pkg/schedule/config/store_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ type StoreConfig struct {
RegionMaxSizeMB uint64 `json:"-"`
RegionSplitSizeMB uint64 `json:"-"`
RegionBucketSizeMB uint64 `json:"-"`

Sync bool `json:"sync"`
}

// Storage is the config for the tikv storage.
Expand All @@ -73,6 +75,7 @@ func (c *StoreConfig) Adjust() {
if c == nil {
return
}

c.RegionMaxSizeMB = typeutil.ParseMBFromText(c.RegionMaxSize, defaultRegionMaxSize)
c.RegionSplitSizeMB = typeutil.ParseMBFromText(c.RegionSplitSize, defaultRegionSplitSize)
c.RegionBucketSizeMB = typeutil.ParseMBFromText(c.RegionBucketSize, defaultBucketSize)
Expand Down Expand Up @@ -124,6 +127,21 @@ func (c *StoreConfig) GetRegionMaxKeys() uint64 {
return uint64(c.RegionMaxKeys)
}

// SetSynced marks StoreConfig has been synced.
func (c *StoreConfig) SetSynced() {
if c != nil {
c.Sync = true
}
}

// IsSynced returns whether the StoreConfig is synced or not.
func (c *StoreConfig) IsSynced() bool {
if c == nil {
return false
}
return c.Sync
}

// IsEnableRegionBucket return true if the region bucket is enabled.
func (c *StoreConfig) IsEnableRegionBucket() bool {
if c == nil {
Expand Down
22 changes: 20 additions & 2 deletions pkg/schedule/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"context"
"strconv"
"sync"
"testing"
"time"

"github.com/pingcap/errors"
Expand Down Expand Up @@ -375,7 +376,6 @@ func (c *Coordinator) initSchedulers() {
if err != nil {
log.Fatal("cannot load schedulers' config", errs.ZapError(err))
}

scheduleCfg := c.cluster.GetSchedulerConfig().GetScheduleConfig().Clone()
// The new way to create scheduler with the independent configuration.
for i, name := range scheduleNames {
Expand Down Expand Up @@ -439,6 +439,20 @@ func (c *Coordinator) initSchedulers() {
if err := c.cluster.GetSchedulerConfig().Persist(c.cluster.GetStorage()); err != nil {
log.Error("cannot persist schedule config", errs.ZapError(err))
}

// If the cluster was set up with `raft-kv2` engine, this cluster should
// enable `evict-slow-trend` scheduler as default.
if c.GetCluster().GetStoreConfig().IsRaftKV2() {
name := schedulers.EvictSlowTrendType
args := []string{}

s, err := schedulers.CreateScheduler(name, c.opController, c.cluster.GetStorage(), schedulers.ConfigSliceDecoder(name, args), c.schedulers.RemoveScheduler)
if err != nil {
log.Warn("initializing evict-slow-trend scheduler failed", errs.ZapError(err))
} else if err = c.schedulers.AddScheduler(s, args...); err != nil {
log.Error("can not add scheduler", zap.String("scheduler-name", s.GetName()), zap.Strings("scheduler-args", args), errs.ZapError(err))
}
}
}

// LoadPlugin load user plugin
Expand Down Expand Up @@ -625,7 +639,11 @@ func (c *Coordinator) ResetHotSpotMetrics() {

// ShouldRun returns true if the coordinator should run.
func (c *Coordinator) ShouldRun() bool {
return c.prepareChecker.check(c.cluster.GetBasicCluster())
isSynced := c.cluster.GetStoreConfig().IsSynced()
if testing.Testing() {
isSynced = true
}
return c.prepareChecker.check(c.cluster.GetBasicCluster()) && isSynced
}

// GetSchedulersController returns the schedulers controller.
Expand Down
Loading

0 comments on commit 1743552

Please sign in to comment.