From a135dc5eaf3b45b756a3899d71ab68b24134fa5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Mon, 20 Dec 2021 17:31:46 +0800 Subject: [PATCH] [cherry-pick] restore: fix failed to retry grpc errors(tidb#27423) (#1438) --- pkg/restore/import.go | 4 ++++ pkg/utils/backoff.go | 5 +++-- tests/br_full/run.sh | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pkg/restore/import.go b/pkg/restore/import.go index 514507e64..7f814eae4 100644 --- a/pkg/restore/import.go +++ b/pkg/restore/import.go @@ -320,6 +320,10 @@ func (importer *FileImporter) Import( log.Debug("failpoint download-sst-error injected.", zap.String("msg", msg)) e = errors.Annotate(e, msg) }) + failpoint.Inject("restore-gRPC-error", func(_ failpoint.Value) { + log.Warn("the connection to TiKV has been cut by a neko, meow :3") + e = status.Error(codes.Unavailable, "the connection to TiKV has been cut by a neko, meow :3") + }) if e != nil { remainFiles = remainFiles[i:] return errors.Trace(e) diff --git a/pkg/utils/backoff.go b/pkg/utils/backoff.go index 664093539..1e300b43e 100644 --- a/pkg/utils/backoff.go +++ b/pkg/utils/backoff.go @@ -59,7 +59,8 @@ func (bo *importerBackoffer) NextBackoff(err error) time.Duration { bo.delayTime = 2 * bo.delayTime bo.attempt-- } else { - switch errors.Cause(err) { // nolint:errorlint + e := errors.Cause(err) + switch e { // nolint:errorlint case berrors.ErrKVEpochNotMatch, berrors.ErrKVDownloadFailed, berrors.ErrKVIngestFailed: bo.delayTime = 2 * bo.delayTime bo.attempt-- @@ -68,7 +69,7 @@ func (bo *importerBackoffer) NextBackoff(err error) time.Duration { bo.delayTime = 0 bo.attempt = 0 default: - switch status.Code(err) { + switch status.Code(e) { case codes.Unavailable, codes.Aborted: bo.delayTime = 2 * bo.delayTime bo.attempt-- diff --git a/tests/br_full/run.sh b/tests/br_full/run.sh index 99f22b2aa..963a9a3d3 100755 --- a/tests/br_full/run.sh +++ b/tests/br_full/run.sh @@ -69,7 +69,7 @@ for ct in limit lz4 zstd; do # restore full echo "restore with $ct backup start..." - export GO_FAILPOINTS="github.com/pingcap/br/pkg/restore/restore-storage-error=1*return(\"connection refused\")" + export GO_FAILPOINTS="github.com/pingcap/br/pkg/restore/restore-storage-error=1*return(\"connection refused\");github.com/pingcap/br/pkg/restore/restore-gRPC-error=1*return(true)" run_br restore full -s "local://$TEST_DIR/$DB-$ct" --pd $PD_ADDR --ratelimit 1024 export GO_FAILPOINTS=""