From bc60d68cd2d21b8f2d014954ea16b137a9721c89 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Wed, 1 Feb 2023 19:58:25 +0800 Subject: [PATCH 1/4] lightning: fix panic when nextKey twice Signed-off-by: lance6716 --- br/pkg/lightning/backend/local/local.go | 2 ++ br/pkg/lightning/backend/local/localhelper.go | 11 ++++++++++- .../lightning_local_backend/data/cpeng.a-schema.sql | 2 +- br/tests/lightning_local_backend/data/cpeng.a.1.sql | 2 +- br/tests/lightning_local_backend/data/cpeng.a.2.sql | 2 +- br/tests/lightning_local_backend/data/cpeng.a.3.sql | 2 +- br/tests/lightning_local_backend/run.sh | 6 ++++-- 7 files changed, 20 insertions(+), 7 deletions(-) diff --git a/br/pkg/lightning/backend/local/local.go b/br/pkg/lightning/backend/local/local.go index d16f1860a7fce..9f26666f090a8 100644 --- a/br/pkg/lightning/backend/local/local.go +++ b/br/pkg/lightning/backend/local/local.go @@ -1385,6 +1385,8 @@ func (local *local) ImportEngine(ctx context.Context, engineUUID uuid.UUID, regi // the table when table is created. needSplit := len(unfinishedRanges) > 1 || lfTotalSize > regionSplitSize || lfLength > regionSplitKeys // split region by given ranges + // TODO: use a failpoint + needSplit = true for i := 0; i < maxRetryTimes; i++ { err = local.SplitAndScatterRegionInBatches(ctx, unfinishedRanges, lf.tableInfo, needSplit, regionSplitSize, maxBatchSplitRanges) if err == nil || common.IsContextCanceledError(err) { diff --git a/br/pkg/lightning/backend/local/localhelper.go b/br/pkg/lightning/backend/local/localhelper.go index 98413b20e71e0..32f500d50a64e 100644 --- a/br/pkg/lightning/backend/local/localhelper.go +++ b/br/pkg/lightning/backend/local/localhelper.go @@ -27,6 +27,7 @@ import ( "github.com/docker/go-units" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" sst "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" @@ -381,7 +382,15 @@ func fetchTableRegionSizeStats(ctx context.Context, db *sql.DB, tableID int64) ( return stats, errors.Trace(err) } -func (local *local) BatchSplitRegions(ctx context.Context, region *split.RegionInfo, keys [][]byte) (*split.RegionInfo, []*split.RegionInfo, error) { +func (local *local) BatchSplitRegions( + ctx context.Context, + region *split.RegionInfo, + keys [][]byte, +) (*split.RegionInfo, []*split.RegionInfo, error) { + failpoint.Inject("failToSplit", func(_ failpoint.Value) { + log.FromContext(ctx).Warn("failToSplit") + failpoint.Return(nil, nil, errors.New("retryable error")) + }) region, newRegions, err := local.splitCli.BatchSplitRegionsWithOrigin(ctx, region, keys) if err != nil { return nil, nil, errors.Annotatef(err, "batch split regions failed") diff --git a/br/tests/lightning_local_backend/data/cpeng.a-schema.sql b/br/tests/lightning_local_backend/data/cpeng.a-schema.sql index fe3f493b6edfd..6c1f5ee154c58 100644 --- a/br/tests/lightning_local_backend/data/cpeng.a-schema.sql +++ b/br/tests/lightning_local_backend/data/cpeng.a-schema.sql @@ -1 +1 @@ -create table a (c int); +create table a (c VARCHAR(20) PRIMARY KEY); diff --git a/br/tests/lightning_local_backend/data/cpeng.a.1.sql b/br/tests/lightning_local_backend/data/cpeng.a.1.sql index 58829b7d87492..a75039e1304e3 100644 --- a/br/tests/lightning_local_backend/data/cpeng.a.1.sql +++ b/br/tests/lightning_local_backend/data/cpeng.a.1.sql @@ -1 +1 @@ -insert into a values (1); +insert into a values ('0000001'); diff --git a/br/tests/lightning_local_backend/data/cpeng.a.2.sql b/br/tests/lightning_local_backend/data/cpeng.a.2.sql index ccbcb5801f72b..a1b15acdecb11 100644 --- a/br/tests/lightning_local_backend/data/cpeng.a.2.sql +++ b/br/tests/lightning_local_backend/data/cpeng.a.2.sql @@ -1 +1 @@ -insert into a values (2); +insert into a values ('0000002'); diff --git a/br/tests/lightning_local_backend/data/cpeng.a.3.sql b/br/tests/lightning_local_backend/data/cpeng.a.3.sql index effdc8f3eab3c..0a457febecf55 100644 --- a/br/tests/lightning_local_backend/data/cpeng.a.3.sql +++ b/br/tests/lightning_local_backend/data/cpeng.a.3.sql @@ -1 +1 @@ -insert into a values (3),(4); +insert into a values ('0000003'),('0000004'); diff --git a/br/tests/lightning_local_backend/run.sh b/br/tests/lightning_local_backend/run.sh index b1c7440249c27..f2f670d6d3b07 100755 --- a/br/tests/lightning_local_backend/run.sh +++ b/br/tests/lightning_local_backend/run.sh @@ -34,11 +34,13 @@ grep -Fq 'table(s) [`cpeng`.`a`, `cpeng`.`b`] are not empty' $TEST_DIR/lightning # First, verify that inject with not leader error is fine. -export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/lightning/backend/local/FailIngestMeta=1*return("notleader")' +export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/lightning/backend/local/FailIngestMeta=1*return("notleader");github.com/pingcap/tidb/br/pkg/lightning/backend/local/failToSplit=2*return("")' rm -f "$TEST_DIR/lightning-local.log" run_sql 'DROP DATABASE IF EXISTS cpeng;' -run_lightning --backend local --enable-checkpoint=1 --log-file "$TEST_DIR/lightning-local.log" --config "tests/$TEST_NAME/config.toml" +run_lightning --backend local --enable-checkpoint=1 --log-file "$TEST_DIR/lightning-local.log" --config "tests/$TEST_NAME/config.toml" -L debug + +read -p 123 # Check that everything is correctly imported run_sql 'SELECT count(*), sum(c) FROM cpeng.a' check_contains 'count(*): 4' From 7ea055c16bc002d97a5e397bcaa84ddf8f04e3ce Mon Sep 17 00:00:00 2001 From: lance6716 Date: Wed, 1 Feb 2023 21:54:05 +0800 Subject: [PATCH 2/4] save work Signed-off-by: lance6716 --- br/pkg/lightning/backend/local/local.go | 5 +++-- br/pkg/lightning/backend/local/localhelper.go | 1 - br/tests/lightning_local_backend/run.sh | 2 +- kv/key.go | 2 ++ 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/br/pkg/lightning/backend/local/local.go b/br/pkg/lightning/backend/local/local.go index 9f26666f090a8..64939554dd484 100644 --- a/br/pkg/lightning/backend/local/local.go +++ b/br/pkg/lightning/backend/local/local.go @@ -1385,8 +1385,9 @@ func (local *local) ImportEngine(ctx context.Context, engineUUID uuid.UUID, regi // the table when table is created. needSplit := len(unfinishedRanges) > 1 || lfTotalSize > regionSplitSize || lfLength > regionSplitKeys // split region by given ranges - // TODO: use a failpoint - needSplit = true + failpoint.Inject("failToSplit", func(_ failpoint.Value) { + needSplit = true + }) for i := 0; i < maxRetryTimes; i++ { err = local.SplitAndScatterRegionInBatches(ctx, unfinishedRanges, lf.tableInfo, needSplit, regionSplitSize, maxBatchSplitRanges) if err == nil || common.IsContextCanceledError(err) { diff --git a/br/pkg/lightning/backend/local/localhelper.go b/br/pkg/lightning/backend/local/localhelper.go index 32f500d50a64e..4f1c926d3fc44 100644 --- a/br/pkg/lightning/backend/local/localhelper.go +++ b/br/pkg/lightning/backend/local/localhelper.go @@ -388,7 +388,6 @@ func (local *local) BatchSplitRegions( keys [][]byte, ) (*split.RegionInfo, []*split.RegionInfo, error) { failpoint.Inject("failToSplit", func(_ failpoint.Value) { - log.FromContext(ctx).Warn("failToSplit") failpoint.Return(nil, nil, errors.New("retryable error")) }) region, newRegions, err := local.splitCli.BatchSplitRegionsWithOrigin(ctx, region, keys) diff --git a/br/tests/lightning_local_backend/run.sh b/br/tests/lightning_local_backend/run.sh index f2f670d6d3b07..f4e81ae370209 100755 --- a/br/tests/lightning_local_backend/run.sh +++ b/br/tests/lightning_local_backend/run.sh @@ -38,7 +38,7 @@ export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/lightning/backend/local/Fai rm -f "$TEST_DIR/lightning-local.log" run_sql 'DROP DATABASE IF EXISTS cpeng;' run_lightning --backend local --enable-checkpoint=1 --log-file "$TEST_DIR/lightning-local.log" --config "tests/$TEST_NAME/config.toml" -L debug - +# TODO: check the log contains "split regions.*retry time" read -p 123 # Check that everything is correctly imported diff --git a/kv/key.go b/kv/key.go index d4227cff4b87f..7a2c384e5ea15 100644 --- a/kv/key.go +++ b/kv/key.go @@ -140,6 +140,7 @@ type Handle interface { // IntValue returns the int64 value if IsInt is true, it panics if IsInt returns false. IntValue() int64 // Next returns the minimum handle that is greater than this handle. + // The returned handle is not guaranteed to be able to decode. Next() Handle // Equal returns if the handle equals to another handle, it panics if the types are different. Equal(h Handle) bool @@ -279,6 +280,7 @@ func (ch *CommonHandle) IntValue() int64 { } // Next implements the Handle interface. +// Note that the returned encoded field is not guaranteed to be able to decode. func (ch *CommonHandle) Next() Handle { return &CommonHandle{ encoded: Key(ch.encoded).PrefixNext(), From 409a97dae67c94818196e3ea9cf25d083948ec04 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Wed, 1 Feb 2023 22:56:27 +0800 Subject: [PATCH 3/4] save work Signed-off-by: lance6716 --- br/tests/lightning_local_backend/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/br/tests/lightning_local_backend/run.sh b/br/tests/lightning_local_backend/run.sh index f4e81ae370209..3b5e320e98bd5 100755 --- a/br/tests/lightning_local_backend/run.sh +++ b/br/tests/lightning_local_backend/run.sh @@ -38,7 +38,7 @@ export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/lightning/backend/local/Fai rm -f "$TEST_DIR/lightning-local.log" run_sql 'DROP DATABASE IF EXISTS cpeng;' run_lightning --backend local --enable-checkpoint=1 --log-file "$TEST_DIR/lightning-local.log" --config "tests/$TEST_NAME/config.toml" -L debug -# TODO: check the log contains "split regions.*retry time" +grep -Eq "split regions.*retryable error" "$TEST_DIR/lightning-local.log" read -p 123 # Check that everything is correctly imported From 5c74dedd5c0b0293858dd0072d8e6fbc153c34a5 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Thu, 2 Feb 2023 10:47:28 +0800 Subject: [PATCH 4/4] fix bug Signed-off-by: lance6716 --- br/pkg/lightning/backend/local/local.go | 5 +++-- br/pkg/lightning/backend/local/local_test.go | 15 +++++++++++++-- br/tests/lightning_local_backend/run.sh | 1 - 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/br/pkg/lightning/backend/local/local.go b/br/pkg/lightning/backend/local/local.go index 64939554dd484..ad49ec5a6c2b9 100644 --- a/br/pkg/lightning/backend/local/local.go +++ b/br/pkg/lightning/backend/local/local.go @@ -1859,7 +1859,8 @@ func nextKey(key []byte) []byte { // in tikv <= 4.x, tikv will truncate the row key, so we should fetch the next valid row key // See: https://github.com/tikv/tikv/blob/f7f22f70e1585d7ca38a59ea30e774949160c3e8/components/raftstore/src/coprocessor/split_observer.rs#L36-L41 - if tablecodec.IsRecordKey(key) { + // we only do this for IntHandle, which is checked by length + if tablecodec.IsRecordKey(key) && len(key) == tablecodec.RecordRowKeyLen { tableID, handle, _ := tablecodec.DecodeRecordKey(key) nextHandle := handle.Next() // int handle overflow, use the next table prefix as nextKey @@ -1869,7 +1870,7 @@ func nextKey(key []byte) []byte { return tablecodec.EncodeRowKeyWithHandle(tableID, nextHandle) } - // if key is an index, directly append a 0x00 to the key. + // for index key and CommonHandle, directly append a 0x00 to the key. res := make([]byte, 0, len(key)+1) res = append(res, key...) res = append(res, 0) diff --git a/br/pkg/lightning/backend/local/local_test.go b/br/pkg/lightning/backend/local/local_test.go index b8c3d57df6719..5e7f21bdb3ca9 100644 --- a/br/pkg/lightning/backend/local/local_test.go +++ b/br/pkg/lightning/backend/local/local_test.go @@ -116,10 +116,21 @@ func TestNextKey(t *testing.T) { require.NoError(t, err) nextHdl, err := tidbkv.NewCommonHandle(nextKeyBytes) require.NoError(t, err) - expectNextKey := []byte(tablecodec.EncodeRowKeyWithHandle(1, nextHdl)) - require.Equal(t, expectNextKey, nextKey(key)) + nextValidKey := []byte(tablecodec.EncodeRowKeyWithHandle(1, nextHdl)) + // nextKey may return a key that can't be decoded, but it must not be larger than the valid next key. + require.True(t, bytes.Compare(nextKey(key), nextValidKey) <= 0, "datums: %v", datums) } + // a special case that when len(string datum) % 8 == 7, nextKey twice should not panic. + keyBytes, err := codec.EncodeKey(stmtCtx, nil, types.NewStringDatum("1234567")) + require.NoError(t, err) + h, err := tidbkv.NewCommonHandle(keyBytes) + require.NoError(t, err) + key = tablecodec.EncodeRowKeyWithHandle(1, h) + nextOnce := nextKey(key) + // should not panic + _ = nextKey(nextOnce) + // dIAAAAAAAAD/PV9pgAAAAAD/AAABA4AAAAD/AAAAAQOAAAD/AAAAAAEAAAD8 // a index key with: table: 61, index: 1, int64: 1, int64: 1 a := []byte{116, 128, 0, 0, 0, 0, 0, 0, 255, 61, 95, 105, 128, 0, 0, 0, 0, 255, 0, 0, 1, 3, 128, 0, 0, 0, 255, 0, 0, 0, 1, 3, 128, 0, 0, 255, 0, 0, 0, 0, 1, 0, 0, 0, 252} diff --git a/br/tests/lightning_local_backend/run.sh b/br/tests/lightning_local_backend/run.sh index 3b5e320e98bd5..bbb4f4c1a0fdc 100755 --- a/br/tests/lightning_local_backend/run.sh +++ b/br/tests/lightning_local_backend/run.sh @@ -40,7 +40,6 @@ run_sql 'DROP DATABASE IF EXISTS cpeng;' run_lightning --backend local --enable-checkpoint=1 --log-file "$TEST_DIR/lightning-local.log" --config "tests/$TEST_NAME/config.toml" -L debug grep -Eq "split regions.*retryable error" "$TEST_DIR/lightning-local.log" -read -p 123 # Check that everything is correctly imported run_sql 'SELECT count(*), sum(c) FROM cpeng.a' check_contains 'count(*): 4'