From 21a31abc1f00763ec311c5b3b1d0d1639fca32b4 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Thu, 12 Dec 2024 11:21:57 +0800 Subject: [PATCH] resource control: fix unsafe usage of timer.Reset (#8877) (#8900) close tikv/pd#8876 Signed-off-by: lhy1024 Co-authored-by: lhy1024 --- .../resource_group/controller/controller.go | 9 +++--- client/{timerpool => timerutil}/pool.go | 2 +- client/{timerpool => timerutil}/pool_test.go | 2 +- client/timerutil/util.go | 32 +++++++++++++++++++ client/tso_dispatcher.go | 21 ++++-------- pkg/election/lease.go | 12 ++----- pkg/{timerpool => utils/timerutil}/pool.go | 2 +- .../timerutil}/pool_test.go | 2 +- pkg/utils/timerutil/util.go | 32 +++++++++++++++++++ pkg/utils/tsoutil/tso_dispatcher.go | 10 +++--- 10 files changed, 86 insertions(+), 38 deletions(-) rename client/{timerpool => timerutil}/pool.go (98%) rename client/{timerpool => timerutil}/pool_test.go (98%) create mode 100644 client/timerutil/util.go rename pkg/{timerpool => utils/timerutil}/pool.go (98%) rename pkg/{timerpool => utils/timerutil}/pool_test.go (98%) create mode 100644 pkg/utils/timerutil/util.go diff --git a/client/resource_group/controller/controller.go b/client/resource_group/controller/controller.go index f7f82208b69..bb80e038c21 100755 --- a/client/resource_group/controller/controller.go +++ b/client/resource_group/controller/controller.go @@ -32,6 +32,7 @@ import ( "github.com/prometheus/client_golang/prometheus" pd "github.com/tikv/pd/client" "github.com/tikv/pd/client/errs" + "github.com/tikv/pd/client/timerutil" atomicutil "go.uber.org/atomic" "go.uber.org/zap" "golang.org/x/exp/slices" @@ -289,7 +290,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) { watchMetaChannel, err = c.provider.Watch(ctx, pd.GroupSettingsPathPrefixBytes, pd.WithRev(metaRevision), pd.WithPrefix(), pd.WithPrevKV()) if err != nil { log.Warn("watch resource group meta failed", zap.Error(err)) - watchRetryTimer.Reset(watchRetryInterval) + timerutil.SafeResetTimer(watchRetryTimer, watchRetryInterval) failpoint.Inject("watchStreamError", func() { watchRetryTimer.Reset(20 * time.Millisecond) }) @@ -299,7 +300,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) { watchConfigChannel, err = c.provider.Watch(ctx, pd.ControllerConfigPathPrefixBytes, pd.WithRev(cfgRevision), pd.WithPrefix()) if err != nil { log.Warn("watch resource group config failed", zap.Error(err)) - watchRetryTimer.Reset(watchRetryInterval) + timerutil.SafeResetTimer(watchRetryTimer, watchRetryInterval) } } case <-emergencyTokenAcquisitionTicker.C: @@ -333,7 +334,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) { }) if !ok { watchMetaChannel = nil - watchRetryTimer.Reset(watchRetryInterval) + timerutil.SafeResetTimer(watchRetryTimer, watchRetryInterval) failpoint.Inject("watchStreamError", func() { watchRetryTimer.Reset(20 * time.Millisecond) }) @@ -369,7 +370,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) { case resp, ok := <-watchConfigChannel: if !ok { watchConfigChannel = nil - watchRetryTimer.Reset(watchRetryInterval) + timerutil.SafeResetTimer(watchRetryTimer, watchRetryInterval) failpoint.Inject("watchStreamError", func() { watchRetryTimer.Reset(20 * time.Millisecond) }) diff --git a/client/timerpool/pool.go b/client/timerutil/pool.go similarity index 98% rename from client/timerpool/pool.go rename to client/timerutil/pool.go index 28ffacfc629..2d608b09053 100644 --- a/client/timerpool/pool.go +++ b/client/timerutil/pool.go @@ -4,7 +4,7 @@ // Note: This file is copied from https://go-review.googlesource.com/c/go/+/276133 -package timerpool +package timerutil import ( "sync" diff --git a/client/timerpool/pool_test.go b/client/timerutil/pool_test.go similarity index 98% rename from client/timerpool/pool_test.go rename to client/timerutil/pool_test.go index d6dffc723a9..f90a305d99f 100644 --- a/client/timerpool/pool_test.go +++ b/client/timerutil/pool_test.go @@ -4,7 +4,7 @@ // Note: This file is copied from https://go-review.googlesource.com/c/go/+/276133 -package timerpool +package timerutil import ( "testing" diff --git a/client/timerutil/util.go b/client/timerutil/util.go new file mode 100644 index 00000000000..7e24671a09e --- /dev/null +++ b/client/timerutil/util.go @@ -0,0 +1,32 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package timerutil + +import "time" + +// SafeResetTimer is used to reset timer safely. +// Before Go 1.23, the only safe way to use Reset was to call Timer.Stop and explicitly drain the timer first. +// We need be careful here, see more details in the comments of Timer.Reset. +// https://pkg.go.dev/time@master#Timer.Reset +func SafeResetTimer(t *time.Timer, d time.Duration) { + // Stop the timer if it's not stopped. + if !t.Stop() { + select { + case <-t.C: // try to drain from the channel + default: + } + } + t.Reset(d) +} diff --git a/client/tso_dispatcher.go b/client/tso_dispatcher.go index 6b2c33ca58d..80b9dc87dca 100644 --- a/client/tso_dispatcher.go +++ b/client/tso_dispatcher.go @@ -29,7 +29,7 @@ import ( "github.com/tikv/pd/client/errs" "github.com/tikv/pd/client/grpcutil" "github.com/tikv/pd/client/retry" - "github.com/tikv/pd/client/timerpool" + "github.com/tikv/pd/client/timerutil" "github.com/tikv/pd/client/tsoutil" "go.uber.org/zap" "google.golang.org/grpc" @@ -155,7 +155,7 @@ func newTSDeadline( done chan struct{}, cancel context.CancelFunc, ) *deadline { - timer := timerpool.GlobalTimerPool.Get(timeout) + timer := timerutil.GlobalTimerPool.Get(timeout) return &deadline{ timer: timer, done: done, @@ -201,11 +201,11 @@ func (c *tsoClient) watchTSDeadline(ctx context.Context, dcLocation string) { case <-d.timer.C: log.Error("[tso] tso request is canceled due to timeout", zap.String("dc-location", dc), errs.ZapError(errs.ErrClientGetTSOTimeout)) d.cancel() - timerpool.GlobalTimerPool.Put(d.timer) + timerutil.GlobalTimerPool.Put(d.timer) case <-d.done: - timerpool.GlobalTimerPool.Put(d.timer) + timerutil.GlobalTimerPool.Put(d.timer) case <-ctx.Done(): - timerpool.GlobalTimerPool.Put(d.timer) + timerutil.GlobalTimerPool.Put(d.timer) return } case <-ctx.Done(): @@ -419,16 +419,7 @@ tsoBatchLoop: if maxBatchWaitInterval >= 0 { tbc.adjustBestBatchSize() } - // Stop the timer if it's not stopped. - if !streamLoopTimer.Stop() { - select { - case <-streamLoopTimer.C: // try to drain from the channel - default: - } - } - // We need be careful here, see more details in the comments of Timer.Reset. - // https://pkg.go.dev/time@master#Timer.Reset - streamLoopTimer.Reset(c.option.timeout) + timerutil.SafeResetTimer(streamLoopTimer, c.option.timeout) // Choose a stream to send the TSO gRPC request. streamChoosingLoop: for { diff --git a/pkg/election/lease.go b/pkg/election/lease.go index eada4f8786d..28bc39a1752 100644 --- a/pkg/election/lease.go +++ b/pkg/election/lease.go @@ -23,6 +23,7 @@ import ( "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/utils/etcdutil" "github.com/tikv/pd/pkg/utils/logutil" + "github.com/tikv/pd/pkg/utils/timerutil" "github.com/tikv/pd/pkg/utils/typeutil" "go.etcd.io/etcd/clientv3" "go.uber.org/zap" @@ -124,16 +125,7 @@ func (l *lease) KeepAlive(ctx context.Context) { l.expireTime.Store(t) } } - // Stop the timer if it's not stopped. - if !timer.Stop() { - select { - case <-timer.C: // try to drain from the channel - default: - } - } - // We need be careful here, see more details in the comments of Timer.Reset. - // https://pkg.go.dev/time@master#Timer.Reset - timer.Reset(l.leaseTimeout) + timerutil.SafeResetTimer(timer, l.leaseTimeout) case <-timer.C: log.Info("lease timeout", zap.Time("expire", l.expireTime.Load().(time.Time)), zap.String("purpose", l.Purpose)) return diff --git a/pkg/timerpool/pool.go b/pkg/utils/timerutil/pool.go similarity index 98% rename from pkg/timerpool/pool.go rename to pkg/utils/timerutil/pool.go index 28ffacfc629..2d608b09053 100644 --- a/pkg/timerpool/pool.go +++ b/pkg/utils/timerutil/pool.go @@ -4,7 +4,7 @@ // Note: This file is copied from https://go-review.googlesource.com/c/go/+/276133 -package timerpool +package timerutil import ( "sync" diff --git a/pkg/timerpool/pool_test.go b/pkg/utils/timerutil/pool_test.go similarity index 98% rename from pkg/timerpool/pool_test.go rename to pkg/utils/timerutil/pool_test.go index d6dffc723a9..f90a305d99f 100644 --- a/pkg/timerpool/pool_test.go +++ b/pkg/utils/timerutil/pool_test.go @@ -4,7 +4,7 @@ // Note: This file is copied from https://go-review.googlesource.com/c/go/+/276133 -package timerpool +package timerutil import ( "testing" diff --git a/pkg/utils/timerutil/util.go b/pkg/utils/timerutil/util.go new file mode 100644 index 00000000000..7e24671a09e --- /dev/null +++ b/pkg/utils/timerutil/util.go @@ -0,0 +1,32 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package timerutil + +import "time" + +// SafeResetTimer is used to reset timer safely. +// Before Go 1.23, the only safe way to use Reset was to call Timer.Stop and explicitly drain the timer first. +// We need be careful here, see more details in the comments of Timer.Reset. +// https://pkg.go.dev/time@master#Timer.Reset +func SafeResetTimer(t *time.Timer, d time.Duration) { + // Stop the timer if it's not stopped. + if !t.Stop() { + select { + case <-t.C: // try to drain from the channel + default: + } + } + t.Reset(d) +} diff --git a/pkg/utils/tsoutil/tso_dispatcher.go b/pkg/utils/tsoutil/tso_dispatcher.go index 6d1ee2ace28..b2e453e45e2 100644 --- a/pkg/utils/tsoutil/tso_dispatcher.go +++ b/pkg/utils/tsoutil/tso_dispatcher.go @@ -24,9 +24,9 @@ import ( "github.com/pingcap/log" "github.com/prometheus/client_golang/prometheus" "github.com/tikv/pd/pkg/errs" - "github.com/tikv/pd/pkg/timerpool" "github.com/tikv/pd/pkg/utils/etcdutil" "github.com/tikv/pd/pkg/utils/logutil" + "github.com/tikv/pd/pkg/utils/timerutil" "go.uber.org/zap" "google.golang.org/grpc" ) @@ -209,7 +209,7 @@ func NewTSDeadline( done chan struct{}, cancel context.CancelFunc, ) *TSDeadline { - timer := timerpool.GlobalTimerPool.Get(timeout) + timer := timerutil.GlobalTimerPool.Get(timeout) return &TSDeadline{ timer: timer, done: done, @@ -230,11 +230,11 @@ func WatchTSDeadline(ctx context.Context, tsDeadlineCh <-chan *TSDeadline) { log.Error("tso proxy request processing is canceled due to timeout", errs.ZapError(errs.ErrProxyTSOTimeout)) d.cancel() - timerpool.GlobalTimerPool.Put(d.timer) + timerutil.GlobalTimerPool.Put(d.timer) case <-d.done: - timerpool.GlobalTimerPool.Put(d.timer) + timerutil.GlobalTimerPool.Put(d.timer) case <-ctx.Done(): - timerpool.GlobalTimerPool.Put(d.timer) + timerutil.GlobalTimerPool.Put(d.timer) return } case <-ctx.Done():