Skip to content

Commit c64fb80

Browse files
craig[bot]nvb
andcommitted
Merge #128698
128698: retry: trace on retry backoff r=nvanbenschoten a=nvanbenschoten This commit adds tracing to `retry.Next`, to assist in understanding the backoff behavior of retry loops. This is useful for debugging, where we otherwise see unexplained delays. It looks like the following in traces: ``` 2024-08-09 18:10:33.341503+00 | 00:00:00.001306 | retrying after 43.936507ms | [n1,client=[::1]:52944,hostnossl,user=root,txn=e7011024] | kv/kvclient/kvcoord/dist_sender.go:3081 | dist sender send | 9 2024-08-09 18:10:33.388118+00 | 00:00:00.047921 | retrying after 45.678614ms | [n1,client=[::1]:52944,hostnossl,user=root,txn=e7011024] | kv/kvclient/kvcoord/dist_sender.go:2135 | dist sender send | 9 2024-08-09 18:10:33.437466+00 | 00:00:00.097269 | retrying after 89.902637ms | [n1,client=[::1]:52944,hostnossl,user=root,txn=e7011024] | kv/kvclient/kvcoord/dist_sender.go:2135 | dist sender send | 9 2024-08-09 18:10:33.531346+00 | 00:00:00.191149 | retrying after 226.616186ms | [n1,client=[::1]:52944,hostnossl,user=root,txn=e7011024] | kv/kvclient/kvcoord/dist_sender.go:2135 | dist sender send | 9 2024-08-09 18:10:33.761593+00 | 00:00:00.421396 | retrying after 347.621605ms | [n1,client=[::1]:52944,hostnossl,user=root,txn=e7011024] | kv/kvclient/kvcoord/dist_sender.go:2135 | dist sender send | 9 2024-08-09 18:10:34.112724+00 | 00:00:00.772527 | retrying after 841.913917ms | [n1,client=[::1]:52944,hostnossl,user=root,txn=e7011024] | kv/kvclient/kvcoord/dist_sender.go:2135 | dist sender send | 9 2024-08-09 18:10:34.958975+00 | 00:00:01.618778 | retrying after 959.158772ms | [n1,client=[::1]:52944,hostnossl,user=root,txn=e7011024] | kv/kvclient/kvcoord/dist_sender.go:2135 | dist sender send | 9 2024-08-09 18:10:35.924476+00 | 00:00:02.584279 | retrying after 1.138820334s | [n1,client=[::1]:52944,hostnossl,user=root,txn=e7011024] | kv/kvclient/kvcoord/dist_sender.go:2135 | dist sender send | 9 2024-08-09 18:10:37.073436+00 | 00:00:03.733239 | retrying after 1.054426075s | [n1,client=[::1]:52944,hostnossl,user=root,txn=e7011024] | kv/kvclient/kvcoord/dist_sender.go:2135 | dist sender send | 9 2024-08-09 18:10:38.132316+00 | 00:00:04.792119 | retrying after 919.413404ms | [n1,client=[::1]:52944,hostnossl,user=root,txn=e7011024] | kv/kvclient/kvcoord/dist_sender.go:2135 | dist sender send | 9 2024-08-09 18:10:39.055159+00 | 00:00:05.714962 | retrying after 1.100328812s | [n1,client=[::1]:52944,hostnossl,user=root,txn=e7011024] | kv/kvclient/kvcoord/dist_sender.go:2135 | dist sender send | 9 2024-08-09 18:10:40.159483+00 | 00:00:06.819286 | retrying after 1.120451316s | [n1,client=[::1]:52944,hostnossl,user=root,txn=e7011024] | kv/kvclient/kvcoord/dist_sender.go:2135 | dist sender send | 9 ``` Epic: None Release note: None Co-authored-by: Nathan VanBenschoten <nvanbenschoten@gmail.com>
2 parents f429ec2 + e18e975 commit c64fb80

File tree

2 files changed

+11
-5
lines changed

2 files changed

+11
-5
lines changed

pkg/util/retry/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ go_library(
66
importpath = "github.com/cockroachdb/cockroach/pkg/util/retry",
77
visibility = ["//visibility:public"],
88
deps = [
9+
"//pkg/util/log",
910
"//pkg/util/timeutil",
1011
"@com_github_cockroachdb_errors//:errors",
1112
],

pkg/util/retry/retry.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"math/rand"
1717
"time"
1818

19+
"github.com/cockroachdb/cockroach/pkg/util/log"
1920
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
2021
"github.com/cockroachdb/errors"
2122
)
@@ -35,7 +36,7 @@ type Options struct {
3536
// backoff retry loop.
3637
type Retry struct {
3738
opts Options
38-
ctxDoneChan <-chan struct{}
39+
ctx context.Context
3940
currentAttempt int
4041
isReset bool
4142
}
@@ -66,7 +67,7 @@ func StartWithCtx(ctx context.Context, opts Options) Retry {
6667

6768
var r Retry
6869
r.opts = opts
69-
r.ctxDoneChan = ctx.Done()
70+
r.ctx = ctx
7071
r.mustReset()
7172
return r
7273
}
@@ -81,7 +82,7 @@ func (r *Retry) Reset() {
8182
select {
8283
case <-r.opts.Closer:
8384
// When the closer has fired, you can't keep going.
84-
case <-r.ctxDoneChan:
85+
case <-r.ctx.Done():
8586
// When the context was canceled, you can't keep going.
8687
default:
8788
r.mustReset()
@@ -125,13 +126,17 @@ func (r *Retry) Next() bool {
125126
}
126127

127128
// Wait before retry.
129+
d := r.retryIn()
130+
if d > 0 {
131+
log.VEventfDepth(r.ctx, 1 /* depth */, 2 /* level */, "will retry after %s", d)
132+
}
128133
select {
129-
case <-time.After(r.retryIn()):
134+
case <-time.After(d):
130135
r.currentAttempt++
131136
return true
132137
case <-r.opts.Closer:
133138
return false
134-
case <-r.ctxDoneChan:
139+
case <-r.ctx.Done():
135140
return false
136141
}
137142
}

0 commit comments

Comments
 (0)