Skip to content

Commit

Permalink
Merge branch 'master' into priority
Browse files Browse the repository at this point in the history
  • Loading branch information
ti-chi-bot[bot] authored Jun 19, 2023
2 parents eaea132 + 80edddb commit f30d5fb
Show file tree
Hide file tree
Showing 32 changed files with 356 additions and 120 deletions.
21 changes: 15 additions & 6 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,14 @@ type serviceModeKeeper struct {
tsoSvcDiscovery ServiceDiscovery
}

func (k *serviceModeKeeper) SetKeyspaceID(keyspaceID uint32) {
k.Lock()
defer k.Unlock()
if k.serviceMode == pdpb.ServiceMode_API_SVC_MODE {
k.tsoSvcDiscovery.SetKeyspaceID(keyspaceID)
}
}

func (k *serviceModeKeeper) close() {
k.Lock()
defer k.Unlock()
Expand Down Expand Up @@ -471,9 +479,6 @@ func newClientWithKeyspaceName(
ctx context.Context, keyspaceName string, svrAddrs []string,
security SecurityOption, opts ...ClientOption,
) (Client, error) {
log.Info("[pd] create pd client with endpoints and keyspace",
zap.Strings("pd-address", svrAddrs), zap.String("keyspace-name", keyspaceName))

tlsCfg := &tlsutil.TLSConfig{
CAPath: security.CAPath,
CertPath: security.CertPath,
Expand Down Expand Up @@ -510,8 +515,12 @@ func newClientWithKeyspaceName(
if err := c.initRetry(c.loadKeyspaceMeta, keyspaceName); err != nil {
return nil, err
}
// We call "c.pdSvcDiscovery.SetKeyspaceID(c.keyspaceID)" after service mode already switching to API mode
// and tso service discovery already initialized, so here we need to set the tso_service_discovery's keyspace id too.
c.pdSvcDiscovery.SetKeyspaceID(c.keyspaceID)

c.serviceModeKeeper.SetKeyspaceID(c.keyspaceID)
log.Info("[pd] create pd client with endpoints and keyspace",
zap.Strings("pd-address", svrAddrs), zap.String("keyspace-name", keyspaceName), zap.Uint32("keyspace-id", c.keyspaceID))
return c, nil
}

Expand Down Expand Up @@ -593,15 +602,15 @@ func (c *client) setServiceMode(newMode pdpb.ServiceMode) {
)
switch newMode {
case pdpb.ServiceMode_PD_SVC_MODE:
newTSOCli = newTSOClient(c.ctx, c.option, c.keyspaceID,
newTSOCli = newTSOClient(c.ctx, c.option,
c.pdSvcDiscovery, &pdTSOStreamBuilderFactory{})
case pdpb.ServiceMode_API_SVC_MODE:
newTSOSvcDiscovery = newTSOServiceDiscovery(
c.ctx, MetaStorageClient(c), c.pdSvcDiscovery,
c.GetClusterID(c.ctx), c.keyspaceID, c.tlsCfg, c.option)
// At this point, the keyspace group isn't known yet. Starts from the default keyspace group,
// and will be updated later.
newTSOCli = newTSOClient(c.ctx, c.option, c.keyspaceID,
newTSOCli = newTSOClient(c.ctx, c.option,
newTSOSvcDiscovery, &tsoTSOStreamBuilderFactory{})
if err := newTSOSvcDiscovery.Init(); err != nil {
log.Error("[pd] failed to initialize tso service discovery. keep the current service mode",
Expand Down
12 changes: 10 additions & 2 deletions client/resource_group/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,12 @@ func (c *ResourceGroupsController) Start(ctx context.Context) {
if err != nil {
log.Warn("load resource group revision failed", zap.Error(err))
}
watchChannel, err := c.provider.Watch(ctx, pd.GroupSettingsPathPrefixBytes, pd.WithRev(revision), pd.WithPrefix())
var watchChannel chan []*meta_storagepb.Event
if !c.config.isSingleGroupByKeyspace {
watchChannel, err = c.provider.Watch(ctx, pd.GroupSettingsPathPrefixBytes, pd.WithRev(revision), pd.WithPrefix())
}
watchRetryTimer := time.NewTimer(watchRetryInterval)
if err == nil {
if err == nil || c.config.isSingleGroupByKeyspace {
watchRetryTimer.Stop()
}
defer watchRetryTimer.Stop()
Expand Down Expand Up @@ -255,6 +258,11 @@ func (c *ResourceGroupsController) Start(ctx context.Context) {
case <-emergencyTokenAcquisitionTicker.C:
c.executeOnAllGroups((*groupCostController).resetEmergencyTokenAcquisition)
case resp, ok := <-watchChannel:
failpoint.Inject("disableWatch", func() {
if c.config.isSingleGroupByKeyspace {
panic("disableWatch")
}
})
if !ok {
watchChannel = nil
watchRetryTimer.Reset(watchRetryInterval)
Expand Down
4 changes: 1 addition & 3 deletions client/tso_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ type tsoClient struct {
wg sync.WaitGroup
option *option

keyspaceID uint32
svcDiscovery ServiceDiscovery
tsoStreamBuilderFactory
// tsoAllocators defines the mapping {dc-location -> TSO allocator leader URL}
Expand All @@ -94,15 +93,14 @@ type tsoClient struct {

// newTSOClient returns a new TSO client.
func newTSOClient(
ctx context.Context, option *option, keyspaceID uint32,
ctx context.Context, option *option,
svcDiscovery ServiceDiscovery, factory tsoStreamBuilderFactory,
) *tsoClient {
ctx, cancel := context.WithCancel(ctx)
c := &tsoClient{
ctx: ctx,
cancel: cancel,
option: option,
keyspaceID: keyspaceID,
svcDiscovery: svcDiscovery,
tsoStreamBuilderFactory: factory,
checkTSDeadlineCh: make(chan struct{}),
Expand Down
3 changes: 2 additions & 1 deletion pkg/balancer/round_robin.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ func (r *RoundRobin[T]) Next() (t T) {
func (r *RoundRobin[T]) GetAll() []T {
r.RLock()
defer r.RUnlock()
return r.nodes
// return a copy to avoid data race
return append(r.nodes[:0:0], r.nodes...)
}

// Put puts one into balancer.
Expand Down
4 changes: 2 additions & 2 deletions pkg/core/storelimit/store_limit.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ const (
smallInfluence = 200
)

// RegionInfluence represents the influence of a operator step, which is used by store limit.
// RegionInfluence represents the influence of an operator step, which is used by store limit.
var RegionInfluence = []int64{
AddPeer: influence,
RemovePeer: influence,
SendSnapshot: influence,
}

// SmallRegionInfluence represents the influence of a operator step
// SmallRegionInfluence represents the influence of an operator step
// when the region size is smaller than smallRegionThreshold, which is used by store limit.
var SmallRegionInfluence = []int64{
AddPeer: smallInfluence,
Expand Down
9 changes: 7 additions & 2 deletions pkg/keyspace/keyspace.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,13 @@ func NewKeyspaceManager(
kgm *GroupManager,
) *Manager {
return &Manager{
ctx: ctx,
metaLock: syncutil.NewLockGroup(syncutil.WithHash(MaskKeyspaceID)),
ctx: ctx,
// Remove the lock of the given key from the lock group when unlock to
// keep minimal working set, which is suited for low qps, non-time-critical
// and non-consecutive large key space scenarios. One of scenarios for
// last use case is keyspace group split loads non-consecutive keyspace meta
// in batches and lock all loaded keyspace meta within a batch at the same time.
metaLock: syncutil.NewLockGroup(syncutil.WithRemoveEntryOnUnlock(true)),
idAllocator: idAllocator,
store: store,
cluster: cluster,
Expand Down
3 changes: 3 additions & 0 deletions pkg/keyspace/tso_keyspace_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,9 @@ func (m *GroupManager) MergeKeyspaceGroups(mergeTargetID uint32, mergeList []uin
if (mergeListNum+1)*2 > maxEtcdTxnOps {
return ErrExceedMaxEtcdTxnOps
}
if slice.Contains(mergeList, utils.DefaultKeyspaceGroupID) {
return ErrModifyDefaultKeyspaceGroup
}
var (
groups = make(map[uint32]*endpoint.KeyspaceGroup, mergeListNum+1)
mergeTargetKg *endpoint.KeyspaceGroup
Expand Down
3 changes: 3 additions & 0 deletions pkg/keyspace/tso_keyspace_group_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -394,4 +394,7 @@ func (suite *keyspaceGroupTestSuite) TestKeyspaceGroupMerge() {
// merge with the number of keyspace groups exceeds the limit
err = suite.kgm.MergeKeyspaceGroups(1, make([]uint32, maxEtcdTxnOps/2))
re.ErrorIs(err, ErrExceedMaxEtcdTxnOps)
// merge the default keyspace group
err = suite.kgm.MergeKeyspaceGroups(1, []uint32{utils.DefaultKeyspaceGroupID})
re.ErrorIs(err, ErrModifyDefaultKeyspaceGroup)
}
2 changes: 2 additions & 0 deletions pkg/keyspace/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ var (
ErrNodeNotInKeyspaceGroup = errors.New("the tso node is not in this keyspace group")
// ErrKeyspaceGroupNotEnoughReplicas is used to indicate not enough replicas in the keyspace group.
ErrKeyspaceGroupNotEnoughReplicas = errors.New("not enough replicas in the keyspace group")
// ErrModifyDefaultKeyspaceGroup is used to indicate that default keyspace group cannot be modified.
ErrModifyDefaultKeyspaceGroup = errors.New("default keyspace group cannot be modified")
// ErrNoAvailableNode is used to indicate no available node in the keyspace group.
ErrNoAvailableNode = errors.New("no available node")
// ErrExceedMaxEtcdTxnOps is used to indicate the number of etcd txn operations exceeds the limit.
Expand Down
11 changes: 5 additions & 6 deletions pkg/mock/mockcluster/mockcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (
"github.com/tikv/pd/pkg/core"
"github.com/tikv/pd/pkg/core/storelimit"
"github.com/tikv/pd/pkg/errs"
"github.com/tikv/pd/pkg/id"
"github.com/tikv/pd/pkg/mock/mockid"
sc "github.com/tikv/pd/pkg/schedule/config"
"github.com/tikv/pd/pkg/schedule/labeler"
Expand Down Expand Up @@ -99,9 +98,9 @@ func (mc *Cluster) GetStorage() storage.Storage {
return mc.Storage
}

// GetAllocator returns the ID allocator.
func (mc *Cluster) GetAllocator() id.Allocator {
return mc.IDAllocator
// AllocID returns a new unique ID.
func (mc *Cluster) AllocID() (uint64, error) {
return mc.IDAllocator.Alloc()
}

// GetPersistOptions returns the persist options.
Expand Down Expand Up @@ -185,7 +184,7 @@ func hotRegionsFromStore(w *statistics.HotCache, storeID uint64, kind statistics

// AllocPeer allocs a new peer on a store.
func (mc *Cluster) AllocPeer(storeID uint64) (*metapb.Peer, error) {
peerID, err := mc.GetAllocator().Alloc()
peerID, err := mc.AllocID()
if err != nil {
log.Error("failed to alloc peer", errs.ZapError(err))
return nil, err
Expand Down Expand Up @@ -358,7 +357,7 @@ func (mc *Cluster) AddRegionStoreWithLeader(storeID uint64, regionCount int, lea
}
mc.AddRegionStore(storeID, regionCount)
for i := 0; i < leaderCount; i++ {
id, _ := mc.GetAllocator().Alloc()
id, _ := mc.AllocID()
mc.AddLeaderRegion(id, storeID)
}
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/replication/replication_mode.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ func (m *ModeManager) drSwitchToAsyncWait(availableStores []uint64) error {
m.Lock()
defer m.Unlock()

id, err := m.cluster.GetAllocator().Alloc()
id, err := m.cluster.AllocID()
if err != nil {
log.Warn("failed to switch to async wait state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err))
return err
Expand All @@ -257,7 +257,7 @@ func (m *ModeManager) drSwitchToAsync(availableStores []uint64) error {
}

func (m *ModeManager) drSwitchToAsyncWithLock(availableStores []uint64) error {
id, err := m.cluster.GetAllocator().Alloc()
id, err := m.cluster.AllocID()
if err != nil {
log.Warn("failed to switch to async state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err))
return err
Expand All @@ -280,7 +280,7 @@ func (m *ModeManager) drSwitchToSyncRecover() error {
}

func (m *ModeManager) drSwitchToSyncRecoverWithLock() error {
id, err := m.cluster.GetAllocator().Alloc()
id, err := m.cluster.AllocID()
if err != nil {
log.Warn("failed to switch to sync_recover state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err))
return err
Expand All @@ -301,7 +301,7 @@ func (m *ModeManager) drSwitchToSyncRecoverWithLock() error {
func (m *ModeManager) drSwitchToSync() error {
m.Lock()
defer m.Unlock()
id, err := m.cluster.GetAllocator().Alloc()
id, err := m.cluster.AllocID()
if err != nil {
log.Warn("failed to switch to sync state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err))
return err
Expand Down
2 changes: 1 addition & 1 deletion pkg/schedule/checker/checker_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ func (c *Controller) CheckRegion(region *core.RegionInfo) []*operator.Operator {
}
}
// skip the joint checker, split checker and rule checker when region label is set to "schedule=deny".
// those checkers is help to make region health, it's necessary to skip them when region is set to deny.
// those checkers are help to make region health, it's necessary to skip them when region is set to deny.
if cl, ok := c.cluster.(interface{ GetRegionLabeler() *labeler.RegionLabeler }); ok {
l := cl.GetRegionLabeler()
if l.ScheduleDisabled(region) {
Expand Down
3 changes: 1 addition & 2 deletions pkg/schedule/core/cluster_informer.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package core

import (
"github.com/tikv/pd/pkg/core"
"github.com/tikv/pd/pkg/id"
sc "github.com/tikv/pd/pkg/schedule/config"
"github.com/tikv/pd/pkg/schedule/labeler"
"github.com/tikv/pd/pkg/schedule/placement"
Expand Down Expand Up @@ -49,7 +48,7 @@ type ScheduleCluster interface {
GetRegionLabeler() *labeler.RegionLabeler
GetBasicCluster() *core.BasicCluster
GetStoreConfig() sc.StoreConfig
GetAllocator() id.Allocator
AllocID() (uint64, error)
}

// BasicCluster is an aggregate interface that wraps multiple interfaces
Expand Down
2 changes: 1 addition & 1 deletion pkg/schedule/operator/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ func (b *Builder) prepareBuild() (string, error) {
if o == nil || (!b.useJointConsensus && !core.IsLearner(o) && core.IsLearner(n)) {
if n.GetId() == 0 {
// Allocate peer ID if need.
id, err := b.GetAllocator().Alloc()
id, err := b.AllocID()
if err != nil {
return "", err
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/schedule/operator/operator_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ func (oc *Controller) ack(op *Operator) {
}
}

// RemoveOperator removes a operator from the running operators.
// RemoveOperator removes an operator from the running operators.
func (oc *Controller) RemoveOperator(op *Operator, extraFields ...zap.Field) bool {
oc.Lock()
removed := oc.removeOperatorLocked(op)
Expand Down Expand Up @@ -629,7 +629,7 @@ func (oc *Controller) GetOperatorStatus(id uint64) *OpWithStatus {
return oc.records.Get(id)
}

// GetOperator gets a operator from the given region.
// GetOperator gets an operator from the given region.
func (oc *Controller) GetOperator(regionID uint64) *Operator {
oc.RLock()
defer oc.RUnlock()
Expand Down
2 changes: 1 addition & 1 deletion pkg/schedule/operator/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ const (
// Followings are end status, i.e. no next status.
SUCCESS // Finished successfully
CANCELED // Canceled due to some reason
REPLACED // Replaced by an higher priority operator
REPLACED // Replaced by a higher priority operator
EXPIRED // Didn't start to run for too long
TIMEOUT // Running for too long
// Status list end
Expand Down
Loading

0 comments on commit f30d5fb

Please sign in to comment.