Skip to content

Commit

Permalink
*: fix memory leak introduced by timer.After (tikv#6720)
Browse files Browse the repository at this point in the history
close tikv#6719

Signed-off-by: lhy1024 <admin@liudos.us>

Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com>
Signed-off-by: lhy1024 <admin@liudos.us>
  • Loading branch information
lhy1024 and ti-chi-bot[bot] committed Dec 17, 2024
1 parent 60e81ba commit 14e8ce7
Show file tree
Hide file tree
Showing 21 changed files with 144 additions and 71 deletions.
4 changes: 3 additions & 1 deletion client/pd_service_discovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,14 +188,16 @@ func (c *pdServiceDiscovery) Init() error {

func (c *pdServiceDiscovery) initRetry(f func() error) error {
var err error
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
for i := 0; i < c.option.maxRetryTimes; i++ {
if err = f(); err == nil {
return nil
}
select {
case <-c.ctx.Done():
return err
case <-time.After(time.Second):
case <-ticker.C:
}
}
return errors.WithStack(err)
Expand Down
8 changes: 4 additions & 4 deletions client/resource_group/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) {
log.Warn("watch resource group meta failed", zap.Error(err))
timerutil.SafeResetTimer(watchRetryTimer, watchRetryInterval)
failpoint.Inject("watchStreamError", func() {
watchRetryTimer.Reset(20 * time.Millisecond)
timerutil.SafeResetTimer(watchRetryTimer, 20*time.Millisecond)
})
}
}
Expand Down Expand Up @@ -333,7 +333,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) {
watchMetaChannel = nil
timerutil.SafeResetTimer(watchRetryTimer, watchRetryInterval)
failpoint.Inject("watchStreamError", func() {
watchRetryTimer.Reset(20 * time.Millisecond)
timerutil.SafeResetTimer(watchRetryTimer, 20*time.Millisecond)
})
continue
}
Expand Down Expand Up @@ -369,7 +369,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) {
watchConfigChannel = nil
timerutil.SafeResetTimer(watchRetryTimer, watchRetryInterval)
failpoint.Inject("watchStreamError", func() {
watchRetryTimer.Reset(20 * time.Millisecond)
timerutil.SafeResetTimer(watchRetryTimer, 20*time.Millisecond)
})
continue
}
Expand Down Expand Up @@ -522,7 +522,7 @@ func (c *ResourceGroupsController) sendTokenBucketRequests(ctx context.Context,
ClientUniqueId: c.clientUniqueID,
}
if c.ruConfig.DegradedModeWaitDuration > 0 && c.responseDeadlineCh == nil {
c.run.responseDeadline.Reset(c.ruConfig.DegradedModeWaitDuration)
timerutil.SafeResetTimer(c.run.responseDeadline, c.ruConfig.DegradedModeWaitDuration)
c.responseDeadlineCh = c.run.responseDeadline.C
}
go func() {
Expand Down
4 changes: 3 additions & 1 deletion client/resource_manager_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,8 @@ func (c *client) tryResourceManagerConnect(ctx context.Context, connection *reso
err error
stream rmpb.ResourceManager_AcquireTokenBucketsClient
)
ticker := time.NewTicker(retryInterval)
defer ticker.Stop()
for i := 0; i < maxRetryTimes; i++ {
cc, err := c.resourceManagerClient()
if err != nil {
Expand All @@ -365,7 +367,7 @@ func (c *client) tryResourceManagerConnect(ctx context.Context, connection *reso
select {
case <-ctx.Done():
return err
case <-time.After(retryInterval):
case <-ticker.C:
}
}
return err
Expand Down
4 changes: 3 additions & 1 deletion client/retry/backoff.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,11 @@ func (bo *BackOffer) Exec(
fn func() error,
) error {
if err := fn(); err != nil {
timer := time.NewTimer(bo.nextInterval())
defer timer.Stop()
select {
case <-ctx.Done():
case <-time.After(bo.nextInterval()):
case <-timer.C:
failpoint.Inject("backOffExecute", func() {
testBackOffExecuteFlag = true
})
Expand Down
48 changes: 34 additions & 14 deletions client/tso_dispatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,24 @@ func (c *tsoClient) updateTSODispatcher() {
}

type deadline struct {
timer <-chan time.Time
timer *time.Timer
done chan struct{}
cancel context.CancelFunc
}

func newTSDeadline(
timeout time.Duration,
done chan struct{},
cancel context.CancelFunc,
) *deadline {
timer := timerutil.GlobalTimerPool.Get(timeout)
return &deadline{
timer: timer,
done: done,
cancel: cancel,
}
}

func (c *tsoClient) tsCancelLoop() {
defer c.wg.Done()

Expand Down Expand Up @@ -169,19 +182,21 @@ func (c *tsoClient) tsCancelLoop() {

func (c *tsoClient) watchTSDeadline(ctx context.Context, dcLocation string) {
if _, exist := c.tsDeadline.Load(dcLocation); !exist {
tsDeadlineCh := make(chan deadline, 1)
tsDeadlineCh := make(chan *deadline, 1)
c.tsDeadline.Store(dcLocation, tsDeadlineCh)
go func(dc string, tsDeadlineCh <-chan deadline) {
go func(dc string, tsDeadlineCh <-chan *deadline) {
for {
select {
case d := <-tsDeadlineCh:
select {
case <-d.timer:
case <-d.timer.C:
log.Error("[tso] tso request is canceled due to timeout", zap.String("dc-location", dc), errs.ZapError(errs.ErrClientGetTSOTimeout))
d.cancel()
timerutil.GlobalTimerPool.Put(d.timer)
case <-d.done:
continue
timerutil.GlobalTimerPool.Put(d.timer)
case <-ctx.Done():
timerutil.GlobalTimerPool.Put(d.timer)
return
}
case <-ctx.Done():
Expand Down Expand Up @@ -231,6 +246,8 @@ func (c *tsoClient) checkAllocator(
}()
cc, u := c.GetTSOAllocatorClientConnByDCLocation(dc)
var healthCli healthpb.HealthClient
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
for {
// the pd/allocator leader change, we need to re-establish the stream
if u != url {
Expand Down Expand Up @@ -261,7 +278,7 @@ func (c *tsoClient) checkAllocator(
select {
case <-dispatcherCtx.Done():
return
case <-time.After(time.Second):
case <-ticker.C:
// To ensure we can get the latest allocator leader
// and once the leader is changed, we can exit this function.
cc, u = c.GetTSOAllocatorClientConnByDCLocation(dc)
Expand Down Expand Up @@ -368,6 +385,7 @@ func (c *tsoClient) handleDispatcher(

// Loop through each batch of TSO requests and send them for processing.
streamLoopTimer := time.NewTimer(c.option.timeout)
defer streamLoopTimer.Stop()
bo := retry.InitialBackOffer(updateMemberBackOffBaseTime, updateMemberTimeout)
tsoBatchLoop:
for {
Expand Down Expand Up @@ -405,16 +423,20 @@ tsoBatchLoop:
if c.updateTSOConnectionCtxs(dispatcherCtx, dc, &connectionCtxs) {
continue streamChoosingLoop
}
timer := time.NewTimer(retryInterval)
select {
case <-dispatcherCtx.Done():
timer.Stop()
return
case <-streamLoopTimer.C:
err = errs.ErrClientCreateTSOStream.FastGenByArgs(errs.RetryTimeoutErr)
log.Error("[tso] create tso stream error", zap.String("dc-location", dc), errs.ZapError(err))
c.svcDiscovery.ScheduleCheckMemberChanged()
c.finishRequest(tbc.getCollectedRequests(), 0, 0, 0, errors.WithStack(err))
timer.Stop()
continue tsoBatchLoop
case <-time.After(retryInterval):
case <-timer.C:
timer.Stop()
continue streamChoosingLoop
}
}
Expand All @@ -431,11 +453,7 @@ tsoBatchLoop:
}
}
done := make(chan struct{})
dl := deadline{
timer: time.After(c.option.timeout),
done: done,
cancel: cancel,
}
dl := newTSDeadline(c.option.timeout, done, cancel)
tsDeadlineCh, ok := c.tsDeadline.Load(dc)
for !ok || tsDeadlineCh == nil {
c.scheduleCheckTSDeadline()
Expand All @@ -445,7 +463,7 @@ tsoBatchLoop:
select {
case <-dispatcherCtx.Done():
return
case tsDeadlineCh.(chan deadline) <- dl:
case tsDeadlineCh.(chan *deadline) <- dl:
}
opts = extractSpanReference(tbc, opts[:0])
err = c.processRequests(stream, dc, tbc, opts)
Expand Down Expand Up @@ -557,6 +575,8 @@ func (c *tsoClient) tryConnectToTSO(
}
// retry several times before falling back to the follower when the network problem happens

ticker := time.NewTicker(retryInterval)
defer ticker.Stop()
for i := 0; i < maxRetryTimes; i++ {
c.svcDiscovery.ScheduleCheckMemberChanged()
cc, url = c.GetTSOAllocatorClientConnByDCLocation(dc)
Expand Down Expand Up @@ -589,7 +609,7 @@ func (c *tsoClient) tryConnectToTSO(
select {
case <-dispatcherCtx.Done():
return err
case <-time.After(retryInterval):
case <-ticker.C:
}
}

Expand Down
8 changes: 6 additions & 2 deletions client/tso_service_discovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,14 +115,16 @@ func (c *tsoServiceDiscovery) Init() error {

func (c *tsoServiceDiscovery) initRetry(f func() error) error {
var err error
ticker := time.NewTicker(retryInterval)
defer ticker.Stop()
for i := 0; i < c.option.maxRetryTimes; i++ {
if err = f(); err == nil {
return nil
}
select {
case <-c.ctx.Done():
return err
case <-time.After(time.Second):
case <-ticker.C:
}
}
return errors.WithStack(err)
Expand Down Expand Up @@ -151,11 +153,13 @@ func (c *tsoServiceDiscovery) startCheckMemberLoop() {

ctx, cancel := context.WithCancel(c.ctx)
defer cancel()
ticker := time.NewTicker(memberUpdateInterval)
defer ticker.Stop()

for {
select {
case <-c.checkMembershipCh:
case <-time.After(memberUpdateInterval):
case <-ticker.C:
case <-ctx.Done():
log.Info("[tso] exit check member loop")
return
Expand Down
4 changes: 3 additions & 1 deletion client/tso_stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,12 @@ func (b *tsoTSOStreamBuilder) build(ctx context.Context, cancel context.CancelFu
}

func checkStreamTimeout(ctx context.Context, cancel context.CancelFunc, done chan struct{}, timeout time.Duration) {
timer := time.NewTimer(timeout)
defer timer.Stop()
select {
case <-done:
return
case <-time.After(timeout):
case <-timer.C:
cancel()
case <-ctx.Done():
}
Expand Down
6 changes: 5 additions & 1 deletion pkg/election/lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/tikv/pd/pkg/errs"
"github.com/tikv/pd/pkg/utils/etcdutil"
"github.com/tikv/pd/pkg/utils/logutil"
"github.com/tikv/pd/pkg/utils/timerutil"
"github.com/tikv/pd/pkg/utils/typeutil"
"go.etcd.io/etcd/clientv3"
"go.uber.org/zap"
Expand Down Expand Up @@ -105,6 +106,8 @@ func (l *lease) KeepAlive(ctx context.Context) {
timeCh := l.keepAliveWorker(ctx, l.leaseTimeout/3)

var maxExpire time.Time
timer := time.NewTimer(l.leaseTimeout)
defer timer.Stop()
for {
select {
case t := <-timeCh:
Expand All @@ -118,7 +121,8 @@ func (l *lease) KeepAlive(ctx context.Context) {
l.expireTime.Store(t)
}
}
case <-time.After(l.leaseTimeout):
timerutil.SafeResetTimer(timer, l.leaseTimeout)
case <-timer.C:
log.Info("lease timeout", zap.Time("expire", l.expireTime.Load().(time.Time)), zap.String("purpose", l.Purpose))
return
case <-ctx.Done():
Expand Down
4 changes: 3 additions & 1 deletion pkg/keyspace/tso_keyspace_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,13 @@ func (m *GroupManager) startWatchLoop() {
revision int64
err error
)
ticker := time.NewTicker(retryInterval)
defer ticker.Stop()
for i := 0; i < maxRetryTimes; i++ {
select {
case <-ctx.Done():
return
case <-time.After(retryInterval):
case <-ticker.C:
}
resp, err = etcdutil.EtcdKVGet(m.client, m.tsoServiceKey, clientv3.WithRange(m.tsoServiceEndKey))
if err == nil {
Expand Down
3 changes: 3 additions & 0 deletions pkg/mcs/discovery/register.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ func (sr *ServiceRegister) Register() error {
select {
case <-sr.ctx.Done():
log.Info("exit register process", zap.String("key", sr.key))
t.Stop()
return
default:
}
Expand All @@ -94,11 +95,13 @@ func (sr *ServiceRegister) Register() error {
resp, err := sr.cli.Grant(sr.ctx, sr.ttl)
if err != nil {
log.Error("grant lease failed", zap.String("key", sr.key), zap.Error(err))
t.Stop()
continue
}

if _, err := sr.cli.Put(sr.ctx, sr.key, sr.value, clientv3.WithLease(resp.ID)); err != nil {
log.Error("put the key failed", zap.String("key", sr.key), zap.Error(err))
t.Stop()
continue
}
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/mcs/resource_manager/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -286,9 +286,11 @@ func (s *Server) startGRPCServer(l net.Listener) {
gs.GracefulStop()
close(done)
}()
timer := time.NewTimer(utils.DefaultGRPCGracefulStopTimeout)
defer timer.Stop()
select {
case <-done:
case <-time.After(utils.DefaultGRPCGracefulStopTimeout):
case <-timer.C:
log.Info("stopping grpc gracefully is taking longer than expected and force stopping now", zap.Duration("default", utils.DefaultGRPCGracefulStopTimeout))
gs.Stop()
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/mcs/tso/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -362,9 +362,11 @@ func (s *Server) startGRPCServer(l net.Listener) {
gs.GracefulStop()
close(done)
}()
timer := time.NewTimer(mcsutils.DefaultGRPCGracefulStopTimeout)
defer timer.Stop()
select {
case <-done:
case <-time.After(mcsutils.DefaultGRPCGracefulStopTimeout):
case <-timer.C:
log.Info("stopping grpc gracefully is taking longer than expected and force stopping now")
gs.Stop()
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/mcs/utils/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,16 @@ const (

// InitClusterID initializes the cluster ID.
func InitClusterID(ctx context.Context, client *clientv3.Client) (id uint64, err error) {
ticker := time.NewTicker(retryInterval)
defer ticker.Stop()
for i := 0; i < maxRetryTimes; i++ {
if clusterID, err := etcdutil.GetClusterID(client, clusterIDPath); err == nil && clusterID != 0 {
return clusterID, nil
}
select {
case <-ctx.Done():
return 0, err
case <-time.After(retryInterval):
case <-ticker.C:
}
}
return 0, errors.Errorf("failed to init cluster ID after retrying %d times", maxRetryTimes)
Expand Down
Loading

0 comments on commit 14e8ce7

Please sign in to comment.