Skip to content

Commit

Permalink
This is an automated cherry-pick of pingcap#43351
Browse files Browse the repository at this point in the history
Signed-off-by: ti-chi-bot <ti-community-prow-bot@tidb.io>
  • Loading branch information
lance6716 authored and ti-chi-bot committed Jul 3, 2023
1 parent 3034205 commit f4bb945
Show file tree
Hide file tree
Showing 2 changed files with 175 additions and 20 deletions.
19 changes: 2 additions & 17 deletions br/pkg/lightning/backend/local/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -1108,7 +1108,7 @@ WriteAndIngest:
local.ingestConcurrency.Recycle(w)
if err != nil {
if !local.isRetryableImportTiKVError(err) {
return err
return errors.Trace(err)
}
_, regionStart, _ := codec.DecodeBytes(region.Region.StartKey, []byte{})
// if we have at least succeeded one region, retry without increasing the retry count
Expand Down Expand Up @@ -1792,22 +1792,7 @@ func (local *local) isIngestRetryable(
var err error
switch errPb := resp.GetError(); {
case errPb.NotLeader != nil:
if newLeader := errPb.GetNotLeader().GetLeader(); newLeader != nil {
newRegion = &split.RegionInfo{
Leader: newLeader,
Region: region.Region,
}
} else {
newRegion, err = getRegion()
if err != nil {
return retryNone, nil, errors.Trace(err)
}
}
// TODO: because in some case, TiKV may return retryable error while the ingest is succeeded.
// Thus directly retry ingest may cause TiKV panic. So always return retryWrite here to avoid
// this issue.
// See: https://github.com/tikv/tikv/issues/9496
return retryWrite, newRegion, common.ErrKVNotLeader.GenWithStack(errPb.GetMessage())
return retryNone, nil, common.ErrKVNotLeader.GenWithStack(errPb.GetMessage())
case errPb.EpochNotMatch != nil:
if currentRegions := errPb.GetEpochNotMatch().GetCurrentRegions(); currentRegions != nil {
var currentRegion *metapb.Region
Expand Down
176 changes: 173 additions & 3 deletions br/pkg/lightning/backend/local/local_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -488,9 +488,8 @@ func TestIsIngestRetryable(t *testing.T) {
},
},
}
retryType, newRegion, err := local.isIngestRetryable(ctx, resp, region, metas)
require.Equal(t, retryWrite, retryType)
require.Equal(t, uint64(2), newRegion.Leader.Id)
retryType, _, err := local.isIngestRetryable(ctx, resp, region, metas)
require.Equal(t, retryNone, retryType)
require.Error(t, err)

resp.Error = &errorpb.Error{
Expand All @@ -509,6 +508,7 @@ func TestIsIngestRetryable(t *testing.T) {
},
},
}
var newRegion *split.RegionInfo
retryType, newRegion, err = local.isIngestRetryable(ctx, resp, region, metas)
require.Equal(t, retryWrite, retryType)
require.Equal(t, uint64(2), newRegion.Region.RegionEpoch.Version)
Expand Down Expand Up @@ -1280,3 +1280,173 @@ func TestLocalIsRetryableTiKVWriteError(t *testing.T) {
require.True(t, l.isRetryableImportTiKVError(io.EOF))
require.True(t, l.isRetryableImportTiKVError(errors.Trace(io.EOF)))
}
<<<<<<< HEAD
=======

func TestCheckPeersBusy(t *testing.T) {
ctx := context.Background()
pdCli := &mockPdClient{}
pdCtl := &pdutil.PdController{}
pdCtl.SetPDClient(pdCli)

keys := [][]byte{[]byte(""), []byte("a"), []byte("b"), []byte("")}
splitCli := initTestSplitClient3Replica(keys, nil)
apiInvokeRecorder := map[string][]uint64{}
serverIsBusyResp := &sst.IngestResponse{
Error: &errorpb.Error{
ServerIsBusy: &errorpb.ServerIsBusy{},
}}

createTimeStore12 := 0
local := &local{
pdCtl: pdCtl,
splitCli: splitCli,
importClientFactory: &mockImportClientFactory{
stores: []*metapb.Store{
// region ["", "a") is not used, skip (1, 2, 3)
{Id: 11}, {Id: 12}, {Id: 13}, // region ["a", "b")
{Id: 21}, {Id: 22}, {Id: 23}, // region ["b", "")
},
createClientFn: func(store *metapb.Store) sst.ImportSSTClient {
importCli := newMockImportClient()
importCli.store = store
importCli.apiInvokeRecorder = apiInvokeRecorder
if store.Id == 12 {
createTimeStore12++
// the second time to checkWriteStall
if createTimeStore12 == 2 {
importCli.retry = 1
importCli.resp = serverIsBusyResp
}
}
return importCli
},
},
logger: log.L(),
ingestConcurrency: worker.NewPool(ctx, 1, "ingest"),
writeLimiter: noopStoreWriteLimiter{},
bufferPool: membuf.NewPool(),
supportMultiIngest: true,
shouldCheckWriteStall: true,
}

db, tmpPath := makePebbleDB(t, nil)
_, engineUUID := backend.MakeUUID("ww", 0)
engineCtx, cancel := context.WithCancel(context.Background())
f := &Engine{
db: db,
UUID: engineUUID,
sstDir: tmpPath,
ctx: engineCtx,
cancel: cancel,
sstMetasChan: make(chan metaOrFlush, 64),
keyAdapter: noopKeyAdapter{},
logger: log.L(),
}
err := f.db.Set([]byte("a"), []byte("a"), nil)
require.NoError(t, err)
err = f.db.Set([]byte("b"), []byte("b"), nil)
require.NoError(t, err)
err = local.writeAndIngestByRange(ctx, f, []byte("a"), []byte("c"), 0, 0)
require.NoError(t, err)

require.Equal(t, []uint64{11, 12, 13, 21, 22, 23}, apiInvokeRecorder["Write"])
// store 12 has a follower busy, so it will cause region peers (11, 12, 13) retry once
require.Equal(t, []uint64{11, 12, 11, 12, 13, 11, 21, 22, 23, 21}, apiInvokeRecorder["MultiIngest"])
}

type regionChangedHook struct {
noopHook
scanCount int
}

func (r *regionChangedHook) AfterScanRegions(res []*split.RegionInfo, err error) ([]*split.RegionInfo, error) {
r.scanCount++
if r.scanCount == 1 {
return res, err
}
for _, info := range res {
// skip modified region
if info.Leader.Id > 3 {
return res, err
}
for _, p := range info.Region.Peers {
p.Id += 10
p.StoreId += 10
}
}

return res, err
}

func TestNotLeaderErrorNeedUpdatePeers(t *testing.T) {
ctx := context.Background()

// test lightning using stale region info (1,2,3), now the region is (11,12,13)
apiInvokeRecorder := map[string][]uint64{}
notLeaderResp := &sst.IngestResponse{
Error: &errorpb.Error{
NotLeader: &errorpb.NotLeader{Leader: &metapb.Peer{StoreId: 11}},
}}

pdCli := &mockPdClient{}
pdCtl := &pdutil.PdController{}
pdCtl.SetPDClient(pdCli)

h := &regionChangedHook{}
splitCli := initTestSplitClient3Replica([][]byte{{}, {'b'}, {}}, h)

local := &local{
pdCtl: pdCtl,
splitCli: splitCli,
importClientFactory: &mockImportClientFactory{
stores: []*metapb.Store{
{Id: 1}, {Id: 2}, {Id: 3},
{Id: 11}, {Id: 12}, {Id: 13},
},
createClientFn: func(store *metapb.Store) sst.ImportSSTClient {
importCli := newMockImportClient()
importCli.store = store
importCli.apiInvokeRecorder = apiInvokeRecorder
if store.Id == 1 {
importCli.retry = 1
importCli.resp = notLeaderResp
}
return importCli
},
},
logger: log.L(),
ingestConcurrency: worker.NewPool(ctx, 1, "ingest"),
writeLimiter: noopStoreWriteLimiter{},
bufferPool: membuf.NewPool(),
supportMultiIngest: true,
shouldCheckWriteStall: true,
}

db, tmpPath := makePebbleDB(t, nil)
_, engineUUID := backend.MakeUUID("ww", 0)
engineCtx, cancel := context.WithCancel(context.Background())
f := &Engine{
db: db,
UUID: engineUUID,
sstDir: tmpPath,
ctx: engineCtx,
cancel: cancel,
sstMetasChan: make(chan metaOrFlush, 64),
keyAdapter: noopKeyAdapter{},
logger: log.L(),
}
err := f.db.Set([]byte("a"), []byte("a"), nil)
require.NoError(t, err)
err = local.writeAndIngestByRange(ctx, f, []byte{}, []byte("b"), 0, 0)
require.NoError(t, err)

// "ingest" to test peers busy of stale region: 1,2,3
// then "write" to stale region: 1,2,3
// then "ingest" to stale leader: 1
// then meet NotLeader error, scanned new region (11,12,13)
// repeat above for 11,12,13
require.Equal(t, []uint64{1, 2, 3, 11, 12, 13}, apiInvokeRecorder["Write"])
require.Equal(t, []uint64{1, 2, 3, 1, 11, 12, 13, 11}, apiInvokeRecorder["MultiIngest"])
}
>>>>>>> b693b2a25e5 (lightning: always retry from rescan when NotLeader (#43351))

0 comments on commit f4bb945

Please sign in to comment.