diff --git a/br/pkg/lightning/backend/local/region_job.go b/br/pkg/lightning/backend/local/region_job.go index 98f15c4b84bac..544df9a6ca310 100644 --- a/br/pkg/lightning/backend/local/region_job.go +++ b/br/pkg/lightning/backend/local/region_job.go @@ -358,6 +358,11 @@ func (local *Backend) writeToTiKV(ctx context.Context, j *regionJob) error { } } + failpoint.Inject("NoLeader", func() { + log.FromContext(ctx).Warn("enter failpoint NoLeader") + leaderPeerMetas = nil + }) + // if there is not leader currently, we don't forward the stage to wrote and let caller // handle the retry. if len(leaderPeerMetas) == 0 { @@ -365,8 +370,7 @@ func (local *Backend) writeToTiKV(ctx context.Context, j *regionJob) error { logutil.Region(region), logutil.Leader(j.region.Leader), zap.Uint64("leader_id", leaderID), logutil.SSTMeta(meta), zap.Int64("kv_pairs", totalCount), zap.Int64("total_bytes", totalSize)) - return errors.Errorf("write to tikv with no leader returned, region '%d', leader: %d", - region.Id, leaderID) + return common.ErrNoLeader.GenWithStackByArgs(region.Id, leaderID) } takeTime := time.Since(begin) diff --git a/br/pkg/lightning/common/errors.go b/br/pkg/lightning/common/errors.go index 9ea4e7b2af63d..109f72755720f 100644 --- a/br/pkg/lightning/common/errors.go +++ b/br/pkg/lightning/common/errors.go @@ -83,6 +83,7 @@ var ( ErrKVReadIndexNotReady = errors.Normalize("read index not ready", errors.RFCCodeText("Lightning:KV:ReadIndexNotReady")) ErrKVIngestFailed = errors.Normalize("ingest tikv failed", errors.RFCCodeText("Lightning:KV:ErrKVIngestFailed")) ErrKVRaftProposalDropped = errors.Normalize("raft proposal dropped", errors.RFCCodeText("Lightning:KV:ErrKVRaftProposalDropped")) + ErrNoLeader = errors.Normalize("write to tikv with no leader returned, region '%d', leader: %d", errors.RFCCodeText("Lightning:KV:ErrNoLeader")) ErrUnknownBackend = errors.Normalize("unknown backend %s", errors.RFCCodeText("Lightning:Restore:ErrUnknownBackend")) ErrCheckLocalFile = errors.Normalize("cannot find local file for table: %s engineDir: %s", errors.RFCCodeText("Lightning:Restore:ErrCheckLocalFile")) diff --git a/br/pkg/lightning/common/retry.go b/br/pkg/lightning/common/retry.go index f6db6cda86407..c3bb979a9bd32 100644 --- a/br/pkg/lightning/common/retry.go +++ b/br/pkg/lightning/common/retry.go @@ -70,6 +70,7 @@ func IsRetryableError(err error) bool { var retryableErrorIDs = map[errors.ErrorID]struct{}{ ErrKVEpochNotMatch.ID(): {}, ErrKVNotLeader.ID(): {}, + ErrNoLeader.ID(): {}, ErrKVRegionNotFound.ID(): {}, // common.ErrKVServerIsBusy is a little duplication with tmysql.ErrTiKVServerBusy // it's because the response of sst.ingest gives us a sst.IngestResponse which doesn't contain error code, diff --git a/br/tests/lightning_local_backend/config.toml b/br/tests/lightning_local_backend/config.toml index 46ca06e09b4ab..73c54882430c7 100644 --- a/br/tests/lightning_local_backend/config.toml +++ b/br/tests/lightning_local_backend/config.toml @@ -1,5 +1,6 @@ [lightning] table-concurrency = 1 +index-concurrency = 1 [checkpoint] enable = true diff --git a/br/tests/lightning_local_backend/run.sh b/br/tests/lightning_local_backend/run.sh index 342cb92455c9a..e6f67c890c153 100755 --- a/br/tests/lightning_local_backend/run.sh +++ b/br/tests/lightning_local_backend/run.sh @@ -55,7 +55,7 @@ check_contains 'sum(c): 46' run_sql 'DROP DATABASE cpeng;' rm -f "/tmp/tidb_lightning_checkpoint_local_backend_test.pb" -export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/lightning/backend/local/FailIngestMeta=2*return("epochnotmatch")' +export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/lightning/backend/local/FailIngestMeta=2*return("epochnotmatch");github.com/pingcap/tidb/br/pkg/lightning/backend/local/NoLeader=1*return()' run_lightning --backend local --enable-checkpoint=1 --log-file "$TEST_DIR/lightning-local.log" --config "tests/$TEST_NAME/config.toml" diff --git a/errors.toml b/errors.toml index 42952079c7ee4..3955ab97ec4a0 100644 --- a/errors.toml +++ b/errors.toml @@ -406,6 +406,11 @@ error = ''' raft proposal dropped ''' +["Lightning:KV:ErrNoLeader"] +error = ''' +write to tikv with no leader returned, region '%d', leader: %d +''' + ["Lightning:KV:NotLeader"] error = ''' not leader