From 90f923e52e426ac5b3f836de690472824fcaa179 Mon Sep 17 00:00:00 2001 From: knull-cn Date: Wed, 20 Apr 2022 14:49:38 +0800 Subject: [PATCH 01/16] Preparation for add index acceleration, handle 'import cycle' for package tidb/ddl. --- br/pkg/conn/util/util.go | 62 ++ br/pkg/lightning/backend/backend.go | 15 + .../backend/kv/{ => kvtest}/session_test.go | 5 +- .../backend/kv/{ => kvtest}/sql2kv_test.go | 162 +++-- br/pkg/lightning/backend/kv/sql2kv.go | 30 +- br/pkg/lightning/backend/local/duplicate.go | 18 +- br/pkg/lightning/backend/local/engine.go | 12 + br/pkg/lightning/backend/local/local.go | 18 +- br/pkg/lightning/backend/local/local_test.go | 63 +- br/pkg/lightning/backend/local/localhelper.go | 2 +- .../backend/local/localhelper_test.go | 87 ++- .../backend/local/mock/local_test.go | 65 ++ br/pkg/lightning/backend/noop/noop.go | 9 + br/pkg/lightning/backend/tidb/tidb.go | 7 + br/pkg/lightning/config/config.go | 68 +- br/pkg/lightning/lightning.go | 2 + br/pkg/mock/backend.go | 27 + br/pkg/restore/split/region.go | 21 + br/pkg/restore/split/split.go | 25 + br/pkg/restore/split/split_client.go | 667 ++++++++++++++++++ br/pkg/utils/utildb/db.go | 32 + br/pkg/utils/utildb/retry.go | 142 ++++ br/pkg/utils/utildb/retry_test.go | 63 ++ go.mod | 3 +- 24 files changed, 1381 insertions(+), 224 deletions(-) create mode 100644 br/pkg/conn/util/util.go rename br/pkg/lightning/backend/kv/{ => kvtest}/session_test.go (82%) rename br/pkg/lightning/backend/kv/{ => kvtest}/sql2kv_test.go (80%) create mode 100644 br/pkg/lightning/backend/local/mock/local_test.go create mode 100644 br/pkg/restore/split/region.go create mode 100644 br/pkg/restore/split/split.go create mode 100644 br/pkg/restore/split/split_client.go create mode 100644 br/pkg/utils/utildb/db.go create mode 100644 br/pkg/utils/utildb/retry.go create mode 100644 br/pkg/utils/utildb/retry_test.go diff --git a/br/pkg/conn/util/util.go b/br/pkg/conn/util/util.go new file mode 100644 index 0000000000000..20df99db280e9 --- /dev/null +++ b/br/pkg/conn/util/util.go @@ -0,0 +1,62 @@ +package util + +import ( + "context" + + "github.com/pingcap/errors" + "github.com/pingcap/kvproto/pkg/metapb" + errors2 "github.com/pingcap/tidb/br/pkg/errors" + "github.com/pingcap/tidb/br/pkg/version" + pd "github.com/tikv/pd/client" +) + +// GetAllTiKVStores returns all TiKV stores registered to the PD client. The +// stores must not be a tombstone and must never contain a label `engine=tiflash`. +func GetAllTiKVStores( + ctx context.Context, + pdClient pd.Client, + storeBehavior StoreBehavior, +) ([]*metapb.Store, error) { + // get all live stores. + stores, err := pdClient.GetAllStores(ctx, pd.WithExcludeTombstone()) + if err != nil { + return nil, errors.Trace(err) + } + + // filter out all stores which are TiFlash. + j := 0 + for _, store := range stores { + isTiFlash := false + if version.IsTiFlash(store) { + if storeBehavior == SkipTiFlash { + continue + } else if storeBehavior == ErrorOnTiFlash { + return nil, errors.Annotatef(errors2.ErrPDInvalidResponse, + "cannot restore to a cluster with active TiFlash stores (store %d at %s)", store.Id, store.Address) + } + isTiFlash = true + } + if !isTiFlash && storeBehavior == TiFlashOnly { + continue + } + stores[j] = store + j++ + } + return stores[:j], nil +} + +// StoreBehavior is the action to do in GetAllTiKVStores when a non-TiKV +// store (e.g. TiFlash store) is found. +type StoreBehavior uint8 + +const ( + // ErrorOnTiFlash causes GetAllTiKVStores to return error when the store is + // found to be a TiFlash node. + ErrorOnTiFlash StoreBehavior = 0 + // SkipTiFlash causes GetAllTiKVStores to skip the store when it is found to + // be a TiFlash node. + SkipTiFlash StoreBehavior = 1 + // TiFlashOnly caused GetAllTiKVStores to skip the store which is not a + // TiFlash node. + TiFlashOnly StoreBehavior = 2 +) diff --git a/br/pkg/lightning/backend/backend.go b/br/pkg/lightning/backend/backend.go index f8b3e79132aa9..5801e23155eab 100644 --- a/br/pkg/lightning/backend/backend.go +++ b/br/pkg/lightning/backend/backend.go @@ -209,6 +209,9 @@ type AbstractBackend interface { // ResolveDuplicateRows resolves duplicated rows by deleting/inserting data // according to the required algorithm. ResolveDuplicateRows(ctx context.Context, tbl table.Table, tableName string, algorithm config.DuplicateResolutionAlgorithm) error + + // TotalMemoryConsume is only used for local backend to cacul memory consumption. + TotalMemoryConsume() int64 } // Backend is the delivery target for Lightning @@ -280,6 +283,10 @@ func (be Backend) FlushAll(ctx context.Context) error { return be.abstract.FlushAllEngines(ctx) } +func (be Backend) TotalMemoryConsume() int64 { + return be.abstract.TotalMemoryConsume() +} + // CheckDiskQuota verifies if the total engine file size is below the given // quota. If the quota is exceeded, this method returns an array of engines, // which after importing can decrease the total size below quota. @@ -405,6 +412,10 @@ func (engine *OpenedEngine) LocalWriter(ctx context.Context, cfg *LocalWriterCon return &LocalEngineWriter{writer: w, tableName: engine.tableName}, nil } +func (engine *OpenedEngine) TotalMemoryConsume() int64 { + return engine.engine.backend.TotalMemoryConsume() +} + // WriteRows writes a collection of encoded rows into the engine. func (w *LocalEngineWriter) WriteRows(ctx context.Context, columnNames []string, rows kv.Rows) error { return w.writer.AppendRows(ctx, w.tableName, columnNames, rows) @@ -495,3 +506,7 @@ type EngineWriter interface { IsSynced() bool Close(ctx context.Context) (ChunkFlushStatus, error) } + +func (oe *OpenedEngine) GetEngineUUID() uuid.UUID { + return oe.uuid +} diff --git a/br/pkg/lightning/backend/kv/session_test.go b/br/pkg/lightning/backend/kv/kvtest/session_test.go similarity index 82% rename from br/pkg/lightning/backend/kv/session_test.go rename to br/pkg/lightning/backend/kv/kvtest/session_test.go index a37f48c190ed8..a1212b5692266 100644 --- a/br/pkg/lightning/backend/kv/session_test.go +++ b/br/pkg/lightning/backend/kv/kvtest/session_test.go @@ -12,18 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -package kv +package kvtest import ( "testing" + "github.com/pingcap/tidb/br/pkg/lightning/backend/kv" "github.com/pingcap/tidb/br/pkg/lightning/log" "github.com/pingcap/tidb/parser/mysql" "github.com/stretchr/testify/require" ) func TestSession(t *testing.T) { - session := newSession(&SessionOptions{SQLMode: mysql.ModeNone, Timestamp: 1234567890}, log.L()) + session := kv.NewSession(&kv.SessionOptions{SQLMode: mysql.ModeNone, Timestamp: 1234567890}, log.L()) _, err := session.Txn(true) require.NoError(t, err) } diff --git a/br/pkg/lightning/backend/kv/sql2kv_test.go b/br/pkg/lightning/backend/kv/kvtest/sql2kv_test.go similarity index 80% rename from br/pkg/lightning/backend/kv/sql2kv_test.go rename to br/pkg/lightning/backend/kv/kvtest/sql2kv_test.go index b604942e38756..08a112fe3e921 100644 --- a/br/pkg/lightning/backend/kv/sql2kv_test.go +++ b/br/pkg/lightning/backend/kv/kvtest/sql2kv_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package kv +package kvtest import ( "errors" @@ -20,6 +20,7 @@ import ( "reflect" "testing" + lkv "github.com/pingcap/tidb/br/pkg/lightning/backend/kv" "github.com/pingcap/tidb/br/pkg/lightning/common" "github.com/pingcap/tidb/br/pkg/lightning/log" "github.com/pingcap/tidb/br/pkg/lightning/verification" @@ -47,7 +48,7 @@ func TestMarshal(t *testing.T) { minNotNull := types.Datum{} minNotNull.SetMinNotNull() encoder := zapcore.NewMapObjectEncoder() - err := encoder.AddArray("test", RowArrayMarshaler{types.NewStringDatum("1"), nullDatum, minNotNull, types.MaxValueDatum()}) + err := encoder.AddArray("test", lkv.RowArrayMarshaler{types.NewStringDatum("1"), nullDatum, minNotNull, types.MaxValueDatum()}) require.NoError(t, err) require.Equal(t, encoder.Fields["test"], []interface{}{ map[string]interface{}{"kind": "string", "val": "1"}, @@ -58,7 +59,7 @@ func TestMarshal(t *testing.T) { invalid := types.Datum{} invalid.SetInterface(1) - err = encoder.AddArray("bad-test", RowArrayMarshaler{minNotNull, invalid}) + err = encoder.AddArray("bad-test", lkv.RowArrayMarshaler{minNotNull, invalid}) require.Regexp(t, "cannot convert.*", err) require.Equal(t, encoder.Fields["bad-test"], []interface{}{ map[string]interface{}{"kind": "min", "val": "-inf"}, @@ -77,7 +78,7 @@ func TestEncode(t *testing.T) { c1 := &model.ColumnInfo{ID: 1, Name: model.NewCIStr("c1"), State: model.StatePublic, Offset: 0, FieldType: *types.NewFieldType(mysql.TypeTiny)} cols := []*model.ColumnInfo{c1} tblInfo := &model.TableInfo{ID: 1, Columns: cols, PKIsHandle: false, State: model.StatePublic} - tbl, err := tables.TableFromMeta(NewPanickingAllocators(0), tblInfo) + tbl, err := tables.TableFromMeta(lkv.NewPanickingAllocators(0), tblInfo) require.NoError(t, err) logger := log.Logger{Logger: zap.NewNop()} @@ -86,7 +87,7 @@ func TestEncode(t *testing.T) { } // Strict mode - strictMode, err := NewTableKVEncoder(tbl, &SessionOptions{ + strictMode, err := lkv.NewTableKVEncoder(tbl, &lkv.SessionOptions{ SQLMode: mysql.ModeStrictAllTables, Timestamp: 1234567890, }, nil, logger) @@ -108,17 +109,17 @@ func TestEncode(t *testing.T) { } pairs, err = strictMode.Encode(logger, rowsWithPk2, 2, []int{0, 1}, "1.csv", 1234) require.NoError(t, err) - require.Equal(t, pairs, &KvPairs{pairs: []common.KvPair{ + require.Equal(t, pairs, lkv.MakeRowFromKvPairs([]common.KvPair{ { Key: []uint8{0x74, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x5f, 0x72, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1}, Val: []uint8{0x8, 0x2, 0x8, 0x2}, RowID: 2, }, - }}) + })) // Mock add record error mockTbl := &mockTable{Table: tbl} - mockMode, err := NewTableKVEncoder(mockTbl, &SessionOptions{ + mockMode, err := lkv.NewTableKVEncoder(mockTbl, &lkv.SessionOptions{ SQLMode: mysql.ModeStrictAllTables, Timestamp: 1234567891, }, nil, logger) @@ -127,7 +128,7 @@ func TestEncode(t *testing.T) { require.EqualError(t, err, "mock error") // Non-strict mode - noneMode, err := NewTableKVEncoder(tbl, &SessionOptions{ + noneMode, err := lkv.NewTableKVEncoder(tbl, &lkv.SessionOptions{ SQLMode: mysql.ModeNone, Timestamp: 1234567892, SysVars: map[string]string{"tidb_row_format_version": "1"}, @@ -135,22 +136,22 @@ func TestEncode(t *testing.T) { require.NoError(t, err) pairs, err = noneMode.Encode(logger, rows, 1, []int{0, 1}, "1.csv", 1234) require.NoError(t, err) - require.Equal(t, pairs, &KvPairs{pairs: []common.KvPair{ + require.Equal(t, pairs, lkv.MakeRowFromKvPairs([]common.KvPair{ { Key: []uint8{0x74, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x5f, 0x72, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1}, Val: []uint8{0x8, 0x2, 0x8, 0xfe, 0x1}, RowID: 1, }, - }}) + })) } func TestDecode(t *testing.T) { c1 := &model.ColumnInfo{ID: 1, Name: model.NewCIStr("c1"), State: model.StatePublic, Offset: 0, FieldType: *types.NewFieldType(mysql.TypeTiny)} cols := []*model.ColumnInfo{c1} tblInfo := &model.TableInfo{ID: 1, Columns: cols, PKIsHandle: false, State: model.StatePublic} - tbl, err := tables.TableFromMeta(NewPanickingAllocators(0), tblInfo) + tbl, err := tables.TableFromMeta(lkv.NewPanickingAllocators(0), tblInfo) require.NoError(t, err) - decoder, err := NewTableKVDecoder(tbl, "`test`.`c1`", &SessionOptions{ + decoder, err := lkv.NewTableKVDecoder(tbl, "`test`.`c1`", &lkv.SessionOptions{ SQLMode: mysql.ModeStrictAllTables, Timestamp: 1234567890, }, log.L()) @@ -171,6 +172,15 @@ func TestDecode(t *testing.T) { }) } +type LocalKvPairs struct { + pairs []common.KvPair +} + +func fromRow(r lkv.Row) (l LocalKvPairs) { + l.pairs = lkv.KvPairsFromRow(r) + return l +} + func TestDecodeIndex(t *testing.T) { logger := log.Logger{Logger: zap.NewNop()} tblInfo := &model.TableInfo{ @@ -194,7 +204,7 @@ func TestDecodeIndex(t *testing.T) { State: model.StatePublic, PKIsHandle: false, } - tbl, err := tables.TableFromMeta(NewPanickingAllocators(0), tblInfo) + tbl, err := tables.TableFromMeta(lkv.NewPanickingAllocators(0), tblInfo) if err != nil { fmt.Printf("error: %v", err.Error()) } @@ -205,16 +215,16 @@ func TestDecodeIndex(t *testing.T) { } // Strict mode - strictMode, err := NewTableKVEncoder(tbl, &SessionOptions{ + strictMode, err := lkv.NewTableKVEncoder(tbl, &lkv.SessionOptions{ SQLMode: mysql.ModeStrictAllTables, Timestamp: 1234567890, }, nil, log.L()) require.NoError(t, err) pairs, err := strictMode.Encode(logger, rows, 1, []int{0, 1, -1}, "1.csv", 123) - data := pairs.(*KvPairs) + data := fromRow(pairs) require.Len(t, data.pairs, 2) - decoder, err := NewTableKVDecoder(tbl, "`test`.``", &SessionOptions{ + decoder, err := lkv.NewTableKVDecoder(tbl, "`test`.``", &lkv.SessionOptions{ SQLMode: mysql.ModeStrictAllTables, Timestamp: 1234567890, }, log.L()) @@ -235,7 +245,7 @@ func TestEncodeRowFormatV2(t *testing.T) { c1 := &model.ColumnInfo{ID: 1, Name: model.NewCIStr("c1"), State: model.StatePublic, Offset: 0, FieldType: *types.NewFieldType(mysql.TypeTiny)} cols := []*model.ColumnInfo{c1} tblInfo := &model.TableInfo{ID: 1, Columns: cols, PKIsHandle: false, State: model.StatePublic} - tbl, err := tables.TableFromMeta(NewPanickingAllocators(0), tblInfo) + tbl, err := tables.TableFromMeta(lkv.NewPanickingAllocators(0), tblInfo) require.NoError(t, err) logger := log.Logger{Logger: zap.NewNop()} @@ -243,7 +253,7 @@ func TestEncodeRowFormatV2(t *testing.T) { types.NewIntDatum(10000000), } - noneMode, err := NewTableKVEncoder(tbl, &SessionOptions{ + noneMode, err := lkv.NewTableKVEncoder(tbl, &lkv.SessionOptions{ SQLMode: mysql.ModeNone, Timestamp: 1234567892, SysVars: map[string]string{"tidb_row_format_version": "2"}, @@ -251,7 +261,7 @@ func TestEncodeRowFormatV2(t *testing.T) { require.NoError(t, err) pairs, err := noneMode.Encode(logger, rows, 1, []int{0, 1}, "1.csv", 1234) require.NoError(t, err) - require.Equal(t, pairs, &KvPairs{pairs: []common.KvPair{ + require.Equal(t, pairs, lkv.MakeRowFromKvPairs([]common.KvPair{ { // the key should be the same as TestEncode() Key: []uint8{0x74, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x5f, 0x72, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1}, @@ -266,7 +276,7 @@ func TestEncodeRowFormatV2(t *testing.T) { }, RowID: 1, }, - }}) + })) } func TestEncodeTimestamp(t *testing.T) { @@ -283,12 +293,12 @@ func TestEncodeTimestamp(t *testing.T) { } cols := []*model.ColumnInfo{c1} tblInfo := &model.TableInfo{ID: 1, Columns: cols, PKIsHandle: false, State: model.StatePublic} - tbl, err := tables.TableFromMeta(NewPanickingAllocators(0), tblInfo) + tbl, err := tables.TableFromMeta(lkv.NewPanickingAllocators(0), tblInfo) require.NoError(t, err) logger := log.Logger{Logger: zap.NewNop()} - encoder, err := NewTableKVEncoder(tbl, &SessionOptions{ + encoder, err := lkv.NewTableKVEncoder(tbl, &lkv.SessionOptions{ SQLMode: mysql.ModeStrictAllTables, Timestamp: 1234567893, SysVars: map[string]string{ @@ -299,23 +309,23 @@ func TestEncodeTimestamp(t *testing.T) { require.NoError(t, err) pairs, err := encoder.Encode(logger, nil, 70, []int{-1, 1}, "1.csv", 1234) require.NoError(t, err) - require.Equal(t, pairs, &KvPairs{pairs: []common.KvPair{ + require.Equal(t, pairs, lkv.MakeRowFromKvPairs([]common.KvPair{ { Key: []uint8{0x74, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x5f, 0x72, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x46}, Val: []uint8{0x8, 0x2, 0x9, 0x80, 0x80, 0x80, 0xf0, 0xfd, 0x8e, 0xf7, 0xc0, 0x19}, RowID: 70, }, - }}) + })) } func TestEncodeDoubleAutoIncrement(t *testing.T) { tblInfo := mockTableInfo(t, "create table t (id double not null auto_increment, unique key `u_id` (`id`));") - tbl, err := tables.TableFromMeta(NewPanickingAllocators(0), tblInfo) + tbl, err := tables.TableFromMeta(lkv.NewPanickingAllocators(0), tblInfo) require.NoError(t, err) logger := log.Logger{Logger: zap.NewNop()} - encoder, err := NewTableKVEncoder(tbl, &SessionOptions{ + encoder, err := lkv.NewTableKVEncoder(tbl, &lkv.SessionOptions{ SQLMode: mysql.ModeStrictAllTables, SysVars: map[string]string{ "tidb_row_format_version": "2", @@ -324,7 +334,7 @@ func TestEncodeDoubleAutoIncrement(t *testing.T) { require.NoError(t, err) strDatumForID := types.NewStringDatum("1") - actualDatum, err := encoder.(*tableKVEncoder).getActualDatum(70, 0, &strDatumForID) + actualDatum, err := lkv.GetActualDatum(encoder, 70, 0, &strDatumForID) require.NoError(t, err) require.Equal(t, types.NewFloat64Datum(1.0), actualDatum) @@ -332,7 +342,7 @@ func TestEncodeDoubleAutoIncrement(t *testing.T) { types.NewFloat64Datum(1.0), }, 70, []int{0, -1}, "1.csv", 1234) require.NoError(t, err) - require.Equal(t, &KvPairs{pairs: []common.KvPair{ + require.Equal(t, lkv.MakeRowFromKvPairs([]common.KvPair{ { Key: []uint8{0x74, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x5f, 0x72, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x46}, Val: []uint8{0x80, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x8, 0x0, 0xbf, 0xf0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, @@ -343,7 +353,7 @@ func TestEncodeDoubleAutoIncrement(t *testing.T) { Val: []uint8{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x46}, RowID: 70, }, - }}, pairsExpect) + }), pairsExpect) pairs, err := encoder.Encode(logger, []types.Datum{ types.NewStringDatum("1"), @@ -351,7 +361,7 @@ func TestEncodeDoubleAutoIncrement(t *testing.T) { require.NoError(t, err) require.Equal(t, pairsExpect, pairs) - require.Equal(t, tbl.Allocators(encoder.(*tableKVEncoder).se).Get(autoid.AutoIncrementType).Base(), int64(70)) + require.Equal(t, tbl.Allocators(lkv.GetEncoderSe(encoder)).Get(autoid.AutoIncrementType).Base(), int64(70)) } func TestEncodeMissingAutoValue(t *testing.T) { @@ -378,10 +388,10 @@ func TestEncodeMissingAutoValue(t *testing.T) { // seems parser can't parse auto_random properly. tblInfo.AutoRandomBits = 3 } - tbl, err := tables.TableFromMeta(NewPanickingAllocators(0), tblInfo) + tbl, err := tables.TableFromMeta(lkv.NewPanickingAllocators(0), tblInfo) require.NoError(t, err) - encoder, err := NewTableKVEncoder(tbl, &SessionOptions{ + encoder, err := lkv.NewTableKVEncoder(tbl, &lkv.SessionOptions{ SQLMode: mysql.ModeStrictAllTables, SysVars: map[string]string{ "tidb_row_format_version": "2", @@ -389,17 +399,17 @@ func TestEncodeMissingAutoValue(t *testing.T) { }, nil, log.L()) require.NoError(t, err) - realRowID := encoder.(*tableKVEncoder).autoIDFn(rowID) + realRowID := lkv.GetEncoderAutoIDFn(encoder, rowID) var nullDatum types.Datum nullDatum.SetNull() expectIDDatum := types.NewIntDatum(realRowID) - actualIDDatum, err := encoder.(*tableKVEncoder).getActualDatum(rowID, 0, nil) + actualIDDatum, err := lkv.GetActualDatum(encoder, rowID, 0, nil) require.NoError(t, err) require.Equal(t, expectIDDatum, actualIDDatum) - actualIDDatum, err = encoder.(*tableKVEncoder).getActualDatum(rowID, 0, &nullDatum) + actualIDDatum, err = lkv.GetActualDatum(encoder, rowID, 0, &nullDatum) require.NoError(t, err) require.Equal(t, expectIDDatum, actualIDDatum) @@ -414,13 +424,13 @@ func TestEncodeMissingAutoValue(t *testing.T) { }, rowID, []int{0}, "1.csv", 1234) require.NoError(t, err) require.Equalf(t, pairsExpect, pairs, "test table info: %+v", testTblInfo) - require.Equalf(t, rowID, tbl.Allocators(encoder.(*tableKVEncoder).se).Get(testTblInfo.AllocType).Base(), "test table info: %+v", testTblInfo) + require.Equalf(t, rowID, tbl.Allocators(lkv.GetEncoderSe(encoder)).Get(testTblInfo.AllocType).Base(), "test table info: %+v", testTblInfo) // test insert a row without specifying the auto_xxxx column pairs, err = encoder.Encode(logger, []types.Datum{}, rowID, []int{0}, "1.csv", 1234) require.NoError(t, err) require.Equalf(t, pairsExpect, pairs, "test table info: %+v", testTblInfo) - require.Equalf(t, rowID, tbl.Allocators(encoder.(*tableKVEncoder).se).Get(testTblInfo.AllocType).Base(), "test table info: %+v", testTblInfo) + require.Equalf(t, rowID, tbl.Allocators(lkv.GetEncoderSe(encoder)).Get(testTblInfo.AllocType).Base(), "test table info: %+v", testTblInfo) } } @@ -440,9 +450,9 @@ func TestDefaultAutoRandoms(t *testing.T) { tblInfo := mockTableInfo(t, "create table t (id bigint unsigned NOT NULL auto_random primary key clustered, a varchar(100));") // seems parser can't parse auto_random properly. tblInfo.AutoRandomBits = 5 - tbl, err := tables.TableFromMeta(NewPanickingAllocators(0), tblInfo) + tbl, err := tables.TableFromMeta(lkv.NewPanickingAllocators(0), tblInfo) require.NoError(t, err) - encoder, err := NewTableKVEncoder(tbl, &SessionOptions{ + encoder, err := lkv.NewTableKVEncoder(tbl, &lkv.SessionOptions{ SQLMode: mysql.ModeStrictAllTables, Timestamp: 1234567893, SysVars: map[string]string{"tidb_row_format_version": "2"}, @@ -452,32 +462,32 @@ func TestDefaultAutoRandoms(t *testing.T) { logger := log.Logger{Logger: zap.NewNop()} pairs, err := encoder.Encode(logger, []types.Datum{types.NewStringDatum("")}, 70, []int{-1, 0}, "1.csv", 1234) require.NoError(t, err) - require.Equal(t, pairs, &KvPairs{pairs: []common.KvPair{ + require.Equal(t, pairs, lkv.MakeRowFromKvPairs([]common.KvPair{ { Key: []uint8{0x74, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x5f, 0x72, 0xf0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x46}, Val: []uint8{0x80, 0x0, 0x1, 0x0, 0x0, 0x0, 0x2, 0x0, 0x0}, RowID: 70, }, - }}) - require.Equal(t, tbl.Allocators(encoder.(*tableKVEncoder).se).Get(autoid.AutoRandomType).Base(), int64(70)) + })) + require.Equal(t, tbl.Allocators(lkv.GetSession4test(encoder)).Get(autoid.AutoRandomType).Base(), int64(70)) pairs, err = encoder.Encode(logger, []types.Datum{types.NewStringDatum("")}, 71, []int{-1, 0}, "1.csv", 1234) require.NoError(t, err) - require.Equal(t, pairs, &KvPairs{pairs: []common.KvPair{ + require.Equal(t, pairs, lkv.MakeRowFromKvPairs([]common.KvPair{ { Key: []uint8{0x74, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x5f, 0x72, 0xf0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x47}, Val: []uint8{0x80, 0x0, 0x1, 0x0, 0x0, 0x0, 0x2, 0x0, 0x0}, RowID: 71, }, - }}) - require.Equal(t, tbl.Allocators(encoder.(*tableKVEncoder).se).Get(autoid.AutoRandomType).Base(), int64(71)) + })) + require.Equal(t, tbl.Allocators(lkv.GetSession4test(encoder)).Get(autoid.AutoRandomType).Base(), int64(71)) } func TestShardRowId(t *testing.T) { tblInfo := mockTableInfo(t, "create table t (s varchar(16)) shard_row_id_bits = 3;") - tbl, err := tables.TableFromMeta(NewPanickingAllocators(0), tblInfo) + tbl, err := tables.TableFromMeta(lkv.NewPanickingAllocators(0), tblInfo) require.NoError(t, err) - encoder, err := NewTableKVEncoder(tbl, &SessionOptions{ + encoder, err := lkv.NewTableKVEncoder(tbl, &lkv.SessionOptions{ SQLMode: mysql.ModeStrictAllTables, Timestamp: 1234567893, SysVars: map[string]string{"tidb_row_format_version": "2"}, @@ -489,7 +499,7 @@ func TestShardRowId(t *testing.T) { for i := int64(1); i <= 32; i++ { pairs, err := encoder.Encode(logger, []types.Datum{types.NewStringDatum(fmt.Sprintf("%d", i))}, i, []int{0, -1}, "1.csv", i*32) require.NoError(t, err) - kvs := pairs.(*KvPairs) + kvs := fromRow(pairs) require.Len(t, kvs.pairs, 1) _, h, err := tablecodec.DecodeRecordKey(kvs.pairs[0].Key) require.NoError(t, err) @@ -498,7 +508,7 @@ func TestShardRowId(t *testing.T) { keyMap[rowID>>60] = struct{}{} } require.Len(t, keyMap, 8) - require.Equal(t, tbl.Allocators(encoder.(*tableKVEncoder).se).Get(autoid.RowIDAllocType).Base(), int64(32)) + require.Equal(t, tbl.Allocators(lkv.GetSession4test(encoder)).Get(autoid.RowIDAllocType).Base(), int64(32)) } func TestSplitIntoChunks(t *testing.T) { @@ -521,35 +531,35 @@ func TestSplitIntoChunks(t *testing.T) { }, } - splitBy10 := MakeRowsFromKvPairs(pairs).SplitIntoChunks(10) - require.Equal(t, splitBy10, []Rows{ - MakeRowsFromKvPairs(pairs[0:2]), - MakeRowsFromKvPairs(pairs[2:3]), - MakeRowsFromKvPairs(pairs[3:4]), + splitBy10 := lkv.MakeRowsFromKvPairs(pairs).SplitIntoChunks(10) + require.Equal(t, splitBy10, []lkv.Rows{ + lkv.MakeRowsFromKvPairs(pairs[0:2]), + lkv.MakeRowsFromKvPairs(pairs[2:3]), + lkv.MakeRowsFromKvPairs(pairs[3:4]), }) - splitBy12 := MakeRowsFromKvPairs(pairs).SplitIntoChunks(12) - require.Equal(t, splitBy12, []Rows{ - MakeRowsFromKvPairs(pairs[0:2]), - MakeRowsFromKvPairs(pairs[2:4]), + splitBy12 := lkv.MakeRowsFromKvPairs(pairs).SplitIntoChunks(12) + require.Equal(t, splitBy12, []lkv.Rows{ + lkv.MakeRowsFromKvPairs(pairs[0:2]), + lkv.MakeRowsFromKvPairs(pairs[2:4]), }) - splitBy1000 := MakeRowsFromKvPairs(pairs).SplitIntoChunks(1000) - require.Equal(t, splitBy1000, []Rows{ - MakeRowsFromKvPairs(pairs[0:4]), + splitBy1000 := lkv.MakeRowsFromKvPairs(pairs).SplitIntoChunks(1000) + require.Equal(t, splitBy1000, []lkv.Rows{ + lkv.MakeRowsFromKvPairs(pairs[0:4]), }) - splitBy1 := MakeRowsFromKvPairs(pairs).SplitIntoChunks(1) - require.Equal(t, splitBy1, []Rows{ - MakeRowsFromKvPairs(pairs[0:1]), - MakeRowsFromKvPairs(pairs[1:2]), - MakeRowsFromKvPairs(pairs[2:3]), - MakeRowsFromKvPairs(pairs[3:4]), + splitBy1 := lkv.MakeRowsFromKvPairs(pairs).SplitIntoChunks(1) + require.Equal(t, splitBy1, []lkv.Rows{ + lkv.MakeRowsFromKvPairs(pairs[0:1]), + lkv.MakeRowsFromKvPairs(pairs[1:2]), + lkv.MakeRowsFromKvPairs(pairs[2:3]), + lkv.MakeRowsFromKvPairs(pairs[3:4]), }) } func TestClassifyAndAppend(t *testing.T) { - kvs := MakeRowFromKvPairs([]common.KvPair{ + kvs := lkv.MakeRowFromKvPairs([]common.KvPair{ { Key: []byte("txxxxxxxx_ryyyyyyyy"), Val: []byte("value1"), @@ -564,14 +574,14 @@ func TestClassifyAndAppend(t *testing.T) { }, }) - data := MakeRowsFromKvPairs(nil) - indices := MakeRowsFromKvPairs(nil) + data := lkv.MakeRowsFromKvPairs(nil) + indices := lkv.MakeRowsFromKvPairs(nil) dataChecksum := verification.MakeKVChecksum(0, 0, 0) indexChecksum := verification.MakeKVChecksum(0, 0, 0) kvs.ClassifyAndAppend(&data, &dataChecksum, &indices, &indexChecksum) - require.Equal(t, data, MakeRowsFromKvPairs([]common.KvPair{ + require.Equal(t, data, lkv.MakeRowsFromKvPairs([]common.KvPair{ { Key: []byte("txxxxxxxx_ryyyyyyyy"), Val: []byte("value1"), @@ -581,7 +591,7 @@ func TestClassifyAndAppend(t *testing.T) { Val: []byte("value2"), }, })) - require.Equal(t, indices, MakeRowsFromKvPairs([]common.KvPair{ + require.Equal(t, indices, lkv.MakeRowsFromKvPairs([]common.KvPair{ { Key: []byte("txxxxxxxx_izzzzzzzz"), Val: []byte("index1"), @@ -594,7 +604,7 @@ func TestClassifyAndAppend(t *testing.T) { type benchSQL2KVSuite struct { row []types.Datum colPerm []int - encoder Encoder + encoder lkv.Encoder logger log.Logger } @@ -634,9 +644,9 @@ func SetUpTest(b *testing.B) *benchSQL2KVSuite { tableInfo.State = model.StatePublic // Construct the corresponding KV encoder. - tbl, err := tables.TableFromMeta(NewPanickingAllocators(0), tableInfo) + tbl, err := tables.TableFromMeta(lkv.NewPanickingAllocators(0), tableInfo) require.NoError(b, err) - encoder, err := NewTableKVEncoder(tbl, &SessionOptions{SysVars: map[string]string{"tidb_row_format_version": "2"}}, nil, log.L()) + encoder, err := lkv.NewTableKVEncoder(tbl, &lkv.SessionOptions{SysVars: map[string]string{"tidb_row_format_version": "2"}}, nil, log.L()) require.NoError(b, err) logger := log.Logger{Logger: zap.NewNop()} diff --git a/br/pkg/lightning/backend/kv/sql2kv.go b/br/pkg/lightning/backend/kv/sql2kv.go index 66be51a19ec5e..d8d4f2940c527 100644 --- a/br/pkg/lightning/backend/kv/sql2kv.go +++ b/br/pkg/lightning/backend/kv/sql2kv.go @@ -33,8 +33,7 @@ import ( "github.com/pingcap/tidb/meta/autoid" "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/parser/mysql" //nolint: goimports - // Import tidb/planner/core to initialize expression.RewriteAstExpr - _ "github.com/pingcap/tidb/planner/core" + "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/sessionctx/variable" "github.com/pingcap/tidb/table" "github.com/pingcap/tidb/table/tables" @@ -65,6 +64,10 @@ type tableKVEncoder struct { metrics *metric.Metrics } +func GetSession4test(encoder Encoder) sessionctx.Context { + return encoder.(*tableKVEncoder).se +} + func NewTableKVEncoder( tbl table.Table, options *SessionOptions, @@ -322,6 +325,14 @@ func KvPairsFromRows(rows Rows) []common.KvPair { return rows.(*KvPairs).pairs } +// KvPairsFromRow converts a Rows instance constructed from MakeRowsFromKvPairs +// back into a slice of KvPair. This method panics if the Rows is not +// constructed in such way. +// nolint:golint // kv.KvPairsFromRow sounds good. +func KvPairsFromRow(rows Row) []common.KvPair { + return rows.(*KvPairs).pairs +} + func evaluateGeneratedColumns(se *session, record []types.Datum, cols []*table.Column, genCols []genCol) (err error, errCol *model.ColumnInfo) { mutRow := chunk.MutRowFromDatums(record) for _, gc := range genCols { @@ -449,6 +460,21 @@ func isPKCol(colInfo *model.ColumnInfo) bool { return mysql.HasPriKeyFlag(colInfo.GetFlag()) } +// GetEncoderAutoIDFn return Auto increment id. +func GetEncoderAutoIDFn(encoder Encoder, id int64) int64 { + return encoder.(*tableKVEncoder).autoIDFn(id) +} + +// GetEncoderSe return session. +func GetEncoderSe(encoder Encoder) *session { + return encoder.(*tableKVEncoder).se +} + +// GetActualDatum export getActualDatum function. +func GetActualDatum(encoder Encoder, rowID int64, colIndex int, inputDatum *types.Datum) (types.Datum, error) { + return encoder.(*tableKVEncoder).getActualDatum(70, 0, inputDatum) +} + func (kvcodec *tableKVEncoder) getActualDatum(rowID int64, colIndex int, inputDatum *types.Datum) (types.Datum, error) { var ( value types.Datum diff --git a/br/pkg/lightning/backend/local/duplicate.go b/br/pkg/lightning/backend/local/duplicate.go index b44a6c680f670..6e56f5dae04c3 100644 --- a/br/pkg/lightning/backend/local/duplicate.go +++ b/br/pkg/lightning/backend/local/duplicate.go @@ -33,7 +33,7 @@ import ( "github.com/pingcap/tidb/br/pkg/lightning/errormanager" "github.com/pingcap/tidb/br/pkg/lightning/log" "github.com/pingcap/tidb/br/pkg/logutil" - "github.com/pingcap/tidb/br/pkg/restore" + "github.com/pingcap/tidb/br/pkg/restore/split" "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/distsql" tidbkv "github.com/pingcap/tidb/kv" @@ -299,7 +299,7 @@ type RemoteDupKVStream struct { func getDupDetectClient( ctx context.Context, - region *restore.RegionInfo, + region *split.RegionInfo, keyRange tidbkv.KeyRange, importClientFactory ImportClientFactory, ) (import_sstpb.ImportSST_DuplicateDetectClient, error) { @@ -331,7 +331,7 @@ func getDupDetectClient( // NewRemoteDupKVStream creates a new RemoteDupKVStream. func NewRemoteDupKVStream( ctx context.Context, - region *restore.RegionInfo, + region *split.RegionInfo, keyRange tidbkv.KeyRange, importClientFactory ImportClientFactory, ) (*RemoteDupKVStream, error) { @@ -393,7 +393,7 @@ func (s *RemoteDupKVStream) Close() error { type DuplicateManager struct { tbl table.Table tableName string - splitCli restore.SplitClient + splitCli split.SplitClient tikvCli *tikv.KVStore errorMgr *errormanager.ErrorManager decoder *kv.TableKVDecoder @@ -406,7 +406,7 @@ type DuplicateManager struct { func NewDuplicateManager( tbl table.Table, tableName string, - splitCli restore.SplitClient, + splitCli split.SplitClient, tikvCli *tikv.KVStore, errMgr *errormanager.ErrorManager, sessOpts *kv.SessionOptions, @@ -661,14 +661,14 @@ func (m *DuplicateManager) CollectDuplicateRowsFromDupDB(ctx context.Context, du func (m *DuplicateManager) splitKeyRangeByRegions( ctx context.Context, keyRange tidbkv.KeyRange, -) ([]*restore.RegionInfo, []tidbkv.KeyRange, error) { +) ([]*split.RegionInfo, []tidbkv.KeyRange, error) { rawStartKey := codec.EncodeBytes(nil, keyRange.StartKey) rawEndKey := codec.EncodeBytes(nil, keyRange.EndKey) - allRegions, err := restore.PaginateScanRegion(ctx, m.splitCli, rawStartKey, rawEndKey, 1024) + allRegions, err := split.PaginateScanRegion(ctx, m.splitCli, rawStartKey, rawEndKey, 1024) if err != nil { return nil, nil, errors.Trace(err) } - regions := make([]*restore.RegionInfo, 0, len(allRegions)) + regions := make([]*split.RegionInfo, 0, len(allRegions)) keyRanges := make([]tidbkv.KeyRange, 0, len(allRegions)) for _, region := range allRegions { startKey := keyRange.StartKey @@ -711,7 +711,7 @@ func (m *DuplicateManager) processRemoteDupTaskOnce( remainKeyRanges *pendingKeyRanges, ) (madeProgress bool, err error) { //nolint: prealloc - var regions []*restore.RegionInfo + var regions []*split.RegionInfo //nolint: prealloc var keyRanges []tidbkv.KeyRange diff --git a/br/pkg/lightning/backend/local/engine.go b/br/pkg/lightning/backend/local/engine.go index 90254f3332fd0..56ecff75587f5 100644 --- a/br/pkg/lightning/backend/local/engine.go +++ b/br/pkg/lightning/backend/local/engine.go @@ -235,6 +235,18 @@ func (e *Engine) unlock() { e.mutex.Unlock() } +func (e *Engine) TotalMemorySize() int64 { + var memSize int64 = 0 + e.localWriters.Range(func(k, v interface{}) bool { + w := k.(*Writer) + if w.kvBuffer != nil { + memSize += w.kvBuffer.TotalSize() + } + return true + }) + return memSize +} + type rangeOffsets struct { Size uint64 Keys uint64 diff --git a/br/pkg/lightning/backend/local/local.go b/br/pkg/lightning/backend/local/local.go index d5d9e9bea7b5c..d6efa398246bd 100644 --- a/br/pkg/lightning/backend/local/local.go +++ b/br/pkg/lightning/backend/local/local.go @@ -49,7 +49,7 @@ import ( "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/membuf" "github.com/pingcap/tidb/br/pkg/pdutil" - split "github.com/pingcap/tidb/br/pkg/restore" + "github.com/pingcap/tidb/br/pkg/restore/split" "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/br/pkg/version" "github.com/pingcap/tidb/infoschema" @@ -355,6 +355,18 @@ func NewLocalBackend( return backend.MakeBackend(local), nil } +func (local *local) TotalMemoryConsume() int64 { + var memConsume int64 = 0 + local.engines.Range(func(k, v interface{}) bool { + e := v.(*Engine) + if e != nil { + memConsume += e.TotalMemorySize() + } + return true + }) + return memConsume +} + func (local *local) checkMultiIngestSupport(ctx context.Context) error { stores, err := local.pdCtl.GetPDClient().GetAllStores(ctx, pd.WithExcludeTombstone()) if err != nil { @@ -1698,6 +1710,10 @@ func (t tblNames) String() string { return b.String() } +func CheckTiFlashVersion4test(ctx context.Context, g glue.Glue, checkCtx *backend.CheckCtx, tidbVersion semver.Version) error { + return checkTiFlashVersion(ctx, g, checkCtx, tidbVersion) +} + // check TiFlash replicas. // local backend doesn't support TiFlash before tidb v4.0.5 func checkTiFlashVersion(ctx context.Context, g glue.Glue, checkCtx *backend.CheckCtx, tidbVersion semver.Version) error { diff --git a/br/pkg/lightning/backend/local/local_test.go b/br/pkg/lightning/backend/local/local_test.go index 13112051e59d6..7fb59cf29de12 100644 --- a/br/pkg/lightning/backend/local/local_test.go +++ b/br/pkg/lightning/backend/local/local_test.go @@ -29,9 +29,7 @@ import ( "testing" "github.com/cockroachdb/pebble" - "github.com/coreos/go-semver/semver" "github.com/docker/go-units" - "github.com/golang/mock/gomock" "github.com/google/uuid" "github.com/pingcap/errors" "github.com/pingcap/failpoint" @@ -42,11 +40,9 @@ import ( "github.com/pingcap/tidb/br/pkg/lightning/backend/kv" "github.com/pingcap/tidb/br/pkg/lightning/common" "github.com/pingcap/tidb/br/pkg/lightning/log" - "github.com/pingcap/tidb/br/pkg/lightning/mydump" "github.com/pingcap/tidb/br/pkg/membuf" - "github.com/pingcap/tidb/br/pkg/mock" "github.com/pingcap/tidb/br/pkg/pdutil" - "github.com/pingcap/tidb/br/pkg/restore" + "github.com/pingcap/tidb/br/pkg/restore/split" "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/br/pkg/version" tidbkv "github.com/pingcap/tidb/kv" @@ -424,11 +420,11 @@ func TestLocalWriterWithIngestUnsort(t *testing.T) { } type mockSplitClient struct { - restore.SplitClient + split.SplitClient } -func (c *mockSplitClient) GetRegion(ctx context.Context, key []byte) (*restore.RegionInfo, error) { - return &restore.RegionInfo{ +func (c *mockSplitClient) GetRegion(ctx context.Context, key []byte) (*split.RegionInfo, error) { + return &split.RegionInfo{ Leader: &metapb.Peer{Id: 1}, Region: &metapb.Region{ Id: 1, @@ -451,7 +447,7 @@ func TestIsIngestRetryable(t *testing.T) { }, } ctx := context.Background() - region := &restore.RegionInfo{ + region := &split.RegionInfo{ Leader: &metapb.Peer{Id: 1}, Region: &metapb.Region{ Id: 1, @@ -626,55 +622,6 @@ func TestLocalIngestLoop(t *testing.T) { require.Equal(t, atomic.LoadInt32(&maxMetaSeq), f.finishedMetaSeq.Load()) } -func TestCheckRequirementsTiFlash(t *testing.T) { - controller := gomock.NewController(t) - defer controller.Finish() - glue := mock.NewMockGlue(controller) - exec := mock.NewMockSQLExecutor(controller) - ctx := context.Background() - - dbMetas := []*mydump.MDDatabaseMeta{ - { - Name: "test", - Tables: []*mydump.MDTableMeta{ - { - DB: "test", - Name: "t1", - DataFiles: []mydump.FileInfo{{}}, - }, - { - DB: "test", - Name: "tbl", - DataFiles: []mydump.FileInfo{{}}, - }, - }, - }, - { - Name: "test1", - Tables: []*mydump.MDTableMeta{ - { - DB: "test1", - Name: "t", - DataFiles: []mydump.FileInfo{{}}, - }, - { - DB: "test1", - Name: "tbl", - DataFiles: []mydump.FileInfo{{}}, - }, - }, - }, - } - checkCtx := &backend.CheckCtx{DBMetas: dbMetas} - - glue.EXPECT().GetSQLExecutor().Return(exec) - exec.EXPECT().QueryStringsWithLog(ctx, tiFlashReplicaQuery, gomock.Any(), gomock.Any()). - Return([][]string{{"db", "tbl"}, {"test", "t1"}, {"test1", "tbl"}}, nil) - - err := checkTiFlashVersion(ctx, glue, checkCtx, *semver.New("4.0.2")) - require.Regexp(t, "^lightning local backend doesn't support TiFlash in this TiDB version. conflict tables: \\[`test`.`t1`, `test1`.`tbl`\\]", err.Error()) -} - func makeRanges(input []string) []Range { ranges := make([]Range, 0, len(input)/2) for i := 0; i < len(input)-1; i += 2 { diff --git a/br/pkg/lightning/backend/local/localhelper.go b/br/pkg/lightning/backend/local/localhelper.go index ae736eaaabd37..169a0cc5f885f 100644 --- a/br/pkg/lightning/backend/local/localhelper.go +++ b/br/pkg/lightning/backend/local/localhelper.go @@ -34,7 +34,7 @@ import ( "github.com/pingcap/tidb/br/pkg/lightning/common" "github.com/pingcap/tidb/br/pkg/lightning/log" "github.com/pingcap/tidb/br/pkg/logutil" - split "github.com/pingcap/tidb/br/pkg/restore" + "github.com/pingcap/tidb/br/pkg/restore/split" "github.com/pingcap/tidb/util/codec" "github.com/pingcap/tidb/util/mathutil" "go.uber.org/multierr" diff --git a/br/pkg/lightning/backend/local/localhelper_test.go b/br/pkg/lightning/backend/local/localhelper_test.go index 8d3d367443ac8..fcdc49078a2b4 100644 --- a/br/pkg/lightning/backend/local/localhelper_test.go +++ b/br/pkg/lightning/backend/local/localhelper_test.go @@ -30,7 +30,7 @@ import ( "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/tidb/br/pkg/lightning/glue" "github.com/pingcap/tidb/br/pkg/lightning/log" - "github.com/pingcap/tidb/br/pkg/restore" + "github.com/pingcap/tidb/br/pkg/restore/split" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tidb/sessionctx/stmtctx" @@ -44,14 +44,13 @@ import ( func init() { // Reduce the time cost for test cases. - restore.ScanRegionAttemptTimes = 2 splitRetryTimes = 2 } type testClient struct { mu sync.RWMutex stores map[uint64]*metapb.Store - regions map[uint64]*restore.RegionInfo + regions map[uint64]*split.RegionInfo regionsInfo *pdtypes.RegionTree // For now it's only used in ScanRegions nextRegionID uint64 splitCount atomic.Int32 @@ -60,7 +59,7 @@ type testClient struct { func newTestClient( stores map[uint64]*metapb.Store, - regions map[uint64]*restore.RegionInfo, + regions map[uint64]*split.RegionInfo, nextRegionID uint64, hook clientHook, ) *testClient { @@ -78,11 +77,11 @@ func newTestClient( } // ScatterRegions scatters regions in a batch. -func (c *testClient) ScatterRegions(ctx context.Context, regionInfo []*restore.RegionInfo) error { +func (c *testClient) ScatterRegions(ctx context.Context, regionInfo []*split.RegionInfo) error { return nil } -func (c *testClient) GetAllRegions() map[uint64]*restore.RegionInfo { +func (c *testClient) GetAllRegions() map[uint64]*split.RegionInfo { c.mu.RLock() defer c.mu.RUnlock() return c.regions @@ -98,7 +97,7 @@ func (c *testClient) GetStore(ctx context.Context, storeID uint64) (*metapb.Stor return store, nil } -func (c *testClient) GetRegion(ctx context.Context, key []byte) (*restore.RegionInfo, error) { +func (c *testClient) GetRegion(ctx context.Context, key []byte) (*split.RegionInfo, error) { c.mu.RLock() defer c.mu.RUnlock() for _, region := range c.regions { @@ -110,7 +109,7 @@ func (c *testClient) GetRegion(ctx context.Context, key []byte) (*restore.Region return nil, errors.Errorf("region not found: key=%s", string(key)) } -func (c *testClient) GetRegionByID(ctx context.Context, regionID uint64) (*restore.RegionInfo, error) { +func (c *testClient) GetRegionByID(ctx context.Context, regionID uint64) (*split.RegionInfo, error) { c.mu.RLock() defer c.mu.RUnlock() region, ok := c.regions[regionID] @@ -122,12 +121,12 @@ func (c *testClient) GetRegionByID(ctx context.Context, regionID uint64) (*resto func (c *testClient) SplitRegion( ctx context.Context, - regionInfo *restore.RegionInfo, + regionInfo *split.RegionInfo, key []byte, -) (*restore.RegionInfo, error) { +) (*split.RegionInfo, error) { c.mu.Lock() defer c.mu.Unlock() - var target *restore.RegionInfo + var target *split.RegionInfo splitKey := codec.EncodeBytes([]byte{}, key) for _, region := range c.regions { if bytes.Compare(splitKey, region.Region.StartKey) >= 0 && @@ -138,7 +137,7 @@ func (c *testClient) SplitRegion( if target == nil { return nil, errors.Errorf("region not found: key=%s", string(key)) } - newRegion := &restore.RegionInfo{ + newRegion := &split.RegionInfo{ Region: &metapb.Region{ Peers: target.Region.Peers, Id: c.nextRegionID, @@ -161,8 +160,8 @@ func (c *testClient) SplitRegion( } func (c *testClient) BatchSplitRegionsWithOrigin( - ctx context.Context, regionInfo *restore.RegionInfo, keys [][]byte, -) (*restore.RegionInfo, []*restore.RegionInfo, error) { + ctx context.Context, regionInfo *split.RegionInfo, keys [][]byte, +) (*split.RegionInfo, []*split.RegionInfo, error) { c.mu.Lock() defer c.mu.Unlock() c.splitCount.Inc() @@ -180,7 +179,7 @@ func (c *testClient) BatchSplitRegionsWithOrigin( default: } - newRegions := make([]*restore.RegionInfo, 0) + newRegions := make([]*split.RegionInfo, 0) target, ok := c.regions[regionInfo.Region.Id] if !ok { return nil, nil, errors.New("region not found") @@ -203,7 +202,7 @@ func (c *testClient) BatchSplitRegionsWithOrigin( if bytes.Compare(key, startKey) <= 0 || bytes.Compare(key, target.Region.EndKey) >= 0 { continue } - newRegion := &restore.RegionInfo{ + newRegion := &split.RegionInfo{ Region: &metapb.Region{ Peers: target.Region.Peers, Id: c.nextRegionID, @@ -236,13 +235,13 @@ func (c *testClient) BatchSplitRegionsWithOrigin( } func (c *testClient) BatchSplitRegions( - ctx context.Context, regionInfo *restore.RegionInfo, keys [][]byte, -) ([]*restore.RegionInfo, error) { + ctx context.Context, regionInfo *split.RegionInfo, keys [][]byte, +) ([]*split.RegionInfo, error) { _, newRegions, err := c.BatchSplitRegionsWithOrigin(ctx, regionInfo, keys) return newRegions, err } -func (c *testClient) ScatterRegion(ctx context.Context, regionInfo *restore.RegionInfo) error { +func (c *testClient) ScatterRegion(ctx context.Context, regionInfo *split.RegionInfo) error { return nil } @@ -252,15 +251,15 @@ func (c *testClient) GetOperator(ctx context.Context, regionID uint64) (*pdpb.Ge }, nil } -func (c *testClient) ScanRegions(ctx context.Context, key, endKey []byte, limit int) ([]*restore.RegionInfo, error) { +func (c *testClient) ScanRegions(ctx context.Context, key, endKey []byte, limit int) ([]*split.RegionInfo, error) { if c.hook != nil { key, endKey, limit = c.hook.BeforeScanRegions(ctx, key, endKey, limit) } infos := c.regionsInfo.ScanRange(key, endKey, limit) - regions := make([]*restore.RegionInfo, 0, len(infos)) + regions := make([]*split.RegionInfo, 0, len(infos)) for _, info := range infos { - regions = append(regions, &restore.RegionInfo{ + regions = append(regions, &split.RegionInfo{ Region: info.Meta, Leader: info.Leader, }) @@ -289,7 +288,7 @@ func (c *testClient) SetStoresLabel(ctx context.Context, stores []uint64, labelK return nil } -func cloneRegion(region *restore.RegionInfo) *restore.RegionInfo { +func cloneRegion(region *split.RegionInfo) *split.RegionInfo { r := &metapb.Region{} if region.Region != nil { b, _ := region.Region.Marshal() @@ -301,7 +300,7 @@ func cloneRegion(region *restore.RegionInfo) *restore.RegionInfo { b, _ := region.Region.Marshal() _ = l.Unmarshal(b) } - return &restore.RegionInfo{Region: r, Leader: l} + return &split.RegionInfo{Region: r, Leader: l} } // For keys ["", "aay", "bba", "bbh", "cca", ""], the key ranges of @@ -312,7 +311,7 @@ func initTestClient(keys [][]byte, hook clientHook) *testClient { Id: 1, StoreId: 1, } - regions := make(map[uint64]*restore.RegionInfo) + regions := make(map[uint64]*split.RegionInfo) for i := uint64(1); i < uint64(len(keys)); i++ { startKey := keys[i-1] if len(startKey) != 0 { @@ -322,7 +321,7 @@ func initTestClient(keys [][]byte, hook clientHook) *testClient { if len(endKey) != 0 { endKey = codec.EncodeBytes([]byte{}, endKey) } - regions[i] = &restore.RegionInfo{ + regions[i] = &split.RegionInfo{ Region: &metapb.Region{ Id: i, Peers: peers, @@ -339,7 +338,7 @@ func initTestClient(keys [][]byte, hook clientHook) *testClient { return newTestClient(stores, regions, uint64(len(keys)), hook) } -func checkRegionRanges(t *testing.T, regions []*restore.RegionInfo, keys [][]byte) { +func checkRegionRanges(t *testing.T, regions []*split.RegionInfo, keys [][]byte) { for i, r := range regions { _, regionStart, _ := codec.DecodeBytes(r.Region.StartKey, []byte{}) _, regionEnd, _ := codec.DecodeBytes(r.Region.EndKey, []byte{}) @@ -349,21 +348,21 @@ func checkRegionRanges(t *testing.T, regions []*restore.RegionInfo, keys [][]byt } type clientHook interface { - BeforeSplitRegion(ctx context.Context, regionInfo *restore.RegionInfo, keys [][]byte) (*restore.RegionInfo, [][]byte) - AfterSplitRegion(context.Context, *restore.RegionInfo, [][]byte, []*restore.RegionInfo, error) ([]*restore.RegionInfo, error) + BeforeSplitRegion(ctx context.Context, regionInfo *split.RegionInfo, keys [][]byte) (*split.RegionInfo, [][]byte) + AfterSplitRegion(context.Context, *split.RegionInfo, [][]byte, []*split.RegionInfo, error) ([]*split.RegionInfo, error) BeforeScanRegions(ctx context.Context, key, endKey []byte, limit int) ([]byte, []byte, int) - AfterScanRegions([]*restore.RegionInfo, error) ([]*restore.RegionInfo, error) + AfterScanRegions([]*split.RegionInfo, error) ([]*split.RegionInfo, error) } type noopHook struct{} -func (h *noopHook) BeforeSplitRegion(ctx context.Context, regionInfo *restore.RegionInfo, keys [][]byte) (*restore.RegionInfo, [][]byte) { +func (h *noopHook) BeforeSplitRegion(ctx context.Context, regionInfo *split.RegionInfo, keys [][]byte) (*split.RegionInfo, [][]byte) { delayTime := rand.Int31n(10) + 1 time.Sleep(time.Duration(delayTime) * time.Millisecond) return regionInfo, keys } -func (h *noopHook) AfterSplitRegion(c context.Context, r *restore.RegionInfo, keys [][]byte, res []*restore.RegionInfo, err error) ([]*restore.RegionInfo, error) { +func (h *noopHook) AfterSplitRegion(c context.Context, r *split.RegionInfo, keys [][]byte, res []*split.RegionInfo, err error) ([]*split.RegionInfo, error) { return res, err } @@ -371,7 +370,7 @@ func (h *noopHook) BeforeScanRegions(ctx context.Context, key, endKey []byte, li return key, endKey, limit } -func (h *noopHook) AfterScanRegions(res []*restore.RegionInfo, err error) ([]*restore.RegionInfo, error) { +func (h *noopHook) AfterScanRegions(res []*split.RegionInfo, err error) ([]*split.RegionInfo, error) { return res, err } @@ -425,7 +424,7 @@ func doTestBatchSplitRegionByRanges(ctx context.Context, t *testing.T, hook clie // current region ranges: [, aay), [aay, bba), [bba, bbh), [bbh, cca), [cca, ) rangeStart := codec.EncodeBytes([]byte{}, []byte("b")) rangeEnd := codec.EncodeBytes([]byte{}, []byte("c")) - regions, err := restore.PaginateScanRegion(ctx, client, rangeStart, rangeEnd, 5) + regions, err := split.PaginateScanRegion(ctx, client, rangeStart, rangeEnd, 5) require.NoError(t, err) // regions is: [aay, bba), [bba, bbh), [bbh, cca) checkRegionRanges(t, regions, [][]byte{[]byte("aay"), []byte("bba"), []byte("bbh"), []byte("cca")}) @@ -451,7 +450,7 @@ func doTestBatchSplitRegionByRanges(ctx context.Context, t *testing.T, hook clie splitHook.check(t, client) // check split ranges - regions, err = restore.PaginateScanRegion(ctx, client, rangeStart, rangeEnd, 5) + regions, err = split.PaginateScanRegion(ctx, client, rangeStart, rangeEnd, 5) require.NoError(t, err) result := [][]byte{ []byte("b"), []byte("ba"), []byte("bb"), []byte("bba"), []byte("bbh"), []byte("bc"), @@ -505,7 +504,7 @@ type scanRegionEmptyHook struct { cnt int } -func (h *scanRegionEmptyHook) AfterScanRegions(res []*restore.RegionInfo, err error) ([]*restore.RegionInfo, error) { +func (h *scanRegionEmptyHook) AfterScanRegions(res []*split.RegionInfo, err error) ([]*split.RegionInfo, error) { h.cnt++ // skip the first call if h.cnt == 1 { @@ -522,7 +521,7 @@ type splitRegionEpochNotMatchHook struct { noopHook } -func (h *splitRegionEpochNotMatchHook) BeforeSplitRegion(ctx context.Context, regionInfo *restore.RegionInfo, keys [][]byte) (*restore.RegionInfo, [][]byte) { +func (h *splitRegionEpochNotMatchHook) BeforeSplitRegion(ctx context.Context, regionInfo *split.RegionInfo, keys [][]byte) (*split.RegionInfo, [][]byte) { regionInfo, keys = h.noopHook.BeforeSplitRegion(ctx, regionInfo, keys) regionInfo = cloneRegion(regionInfo) // decrease the region epoch, so split region will fail @@ -540,7 +539,7 @@ type splitRegionEpochNotMatchHookRandom struct { cnt atomic.Int32 } -func (h *splitRegionEpochNotMatchHookRandom) BeforeSplitRegion(ctx context.Context, regionInfo *restore.RegionInfo, keys [][]byte) (*restore.RegionInfo, [][]byte) { +func (h *splitRegionEpochNotMatchHookRandom) BeforeSplitRegion(ctx context.Context, regionInfo *split.RegionInfo, keys [][]byte) (*split.RegionInfo, [][]byte) { regionInfo, keys = h.noopHook.BeforeSplitRegion(ctx, regionInfo, keys) if h.cnt.Inc() != 0 { return regionInfo, keys @@ -561,7 +560,7 @@ type splitRegionNoValidKeyHook struct { errorCnt atomic.Int32 } -func (h *splitRegionNoValidKeyHook) BeforeSplitRegion(ctx context.Context, regionInfo *restore.RegionInfo, keys [][]byte) (*restore.RegionInfo, [][]byte) { +func (h *splitRegionNoValidKeyHook) BeforeSplitRegion(ctx context.Context, regionInfo *split.RegionInfo, keys [][]byte) (*split.RegionInfo, [][]byte) { regionInfo, keys = h.noopHook.BeforeSplitRegion(ctx, regionInfo, keys) if h.errorCnt.Inc() <= h.returnErrTimes { // clean keys to trigger "no valid keys" error @@ -607,7 +606,7 @@ func TestSplitAndScatterRegionInBatches(t *testing.T) { rangeStart := codec.EncodeBytes([]byte{}, []byte("a")) rangeEnd := codec.EncodeBytes([]byte{}, []byte("b")) - regions, err := restore.PaginateScanRegion(ctx, client, rangeStart, rangeEnd, 5) + regions, err := split.PaginateScanRegion(ctx, client, rangeStart, rangeEnd, 5) require.NoError(t, err) result := [][]byte{[]byte("a"), []byte("a00"), []byte("a01"), []byte("a02"), []byte("a03"), []byte("a04"), []byte("a05"), []byte("a06"), []byte("a07"), []byte("a08"), []byte("a09"), []byte("a10"), []byte("a11"), @@ -622,7 +621,7 @@ type reportAfterSplitHook struct { ch chan<- struct{} } -func (h *reportAfterSplitHook) AfterSplitRegion(ctx context.Context, region *restore.RegionInfo, keys [][]byte, resultRegions []*restore.RegionInfo, err error) ([]*restore.RegionInfo, error) { +func (h *reportAfterSplitHook) AfterSplitRegion(ctx context.Context, region *split.RegionInfo, keys [][]byte, resultRegions []*split.RegionInfo, err error) ([]*split.RegionInfo, error) { h.ch <- struct{}{} return resultRegions, err } @@ -705,7 +704,7 @@ func doTestBatchSplitByRangesWithClusteredIndex(t *testing.T, hook clientHook) { startKey := codec.EncodeBytes([]byte{}, rangeKeys[0]) endKey := codec.EncodeBytes([]byte{}, rangeKeys[len(rangeKeys)-1]) // check split ranges - regions, err := restore.PaginateScanRegion(ctx, client, startKey, endKey, 5) + regions, err := split.PaginateScanRegion(ctx, client, startKey, endKey, 5) require.NoError(t, err) require.Equal(t, len(ranges)+1, len(regions)) @@ -733,14 +732,14 @@ func TestNeedSplit(t *testing.T) { keys := []int64{10, 100, 500, 1000, 999999, -1} start := tablecodec.EncodeRowKeyWithHandle(tableID, kv.IntHandle(0)) regionStart := codec.EncodeBytes([]byte{}, start) - regions := make([]*restore.RegionInfo, 0) + regions := make([]*split.RegionInfo, 0) for _, end := range keys { var regionEndKey []byte if end >= 0 { endKey := tablecodec.EncodeRowKeyWithHandle(tableID, kv.IntHandle(end)) regionEndKey = codec.EncodeBytes([]byte{}, endKey) } - region := &restore.RegionInfo{ + region := &split.RegionInfo{ Region: &metapb.Region{ Id: 1, Peers: peers, diff --git a/br/pkg/lightning/backend/local/mock/local_test.go b/br/pkg/lightning/backend/local/mock/local_test.go new file mode 100644 index 0000000000000..e615573a08040 --- /dev/null +++ b/br/pkg/lightning/backend/local/mock/local_test.go @@ -0,0 +1,65 @@ +package mock + +import ( + "context" + "testing" + + "github.com/coreos/go-semver/semver" + "github.com/golang/mock/gomock" + "github.com/pingcap/tidb/br/pkg/lightning/backend" + "github.com/pingcap/tidb/br/pkg/lightning/backend/local" + "github.com/pingcap/tidb/br/pkg/lightning/mydump" + "github.com/pingcap/tidb/br/pkg/mock" + "github.com/stretchr/testify/require" +) + +var tiFlashReplicaQuery = "SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.TIFLASH_REPLICA WHERE REPLICA_COUNT > 0;" + +func TestCheckRequirementsTiFlash(t *testing.T) { + controller := gomock.NewController(t) + defer controller.Finish() + glue := mock.NewMockGlue(controller) + exec := mock.NewMockSQLExecutor(controller) + ctx := context.Background() + + dbMetas := []*mydump.MDDatabaseMeta{ + { + Name: "test", + Tables: []*mydump.MDTableMeta{ + { + DB: "test", + Name: "t1", + DataFiles: []mydump.FileInfo{{}}, + }, + { + DB: "test", + Name: "tbl", + DataFiles: []mydump.FileInfo{{}}, + }, + }, + }, + { + Name: "test1", + Tables: []*mydump.MDTableMeta{ + { + DB: "test1", + Name: "t", + DataFiles: []mydump.FileInfo{{}}, + }, + { + DB: "test1", + Name: "tbl", + DataFiles: []mydump.FileInfo{{}}, + }, + }, + }, + } + checkCtx := &backend.CheckCtx{DBMetas: dbMetas} + + glue.EXPECT().GetSQLExecutor().Return(exec) + exec.EXPECT().QueryStringsWithLog(ctx, tiFlashReplicaQuery, gomock.Any(), gomock.Any()). + Return([][]string{{"db", "tbl"}, {"test", "t1"}, {"test1", "tbl"}}, nil) + + err := local.CheckTiFlashVersion4test(ctx, glue, checkCtx, *semver.New("4.0.2")) + require.Regexp(t, "^lightning local backend doesn't support TiFlash in this TiDB version. conflict tables: \\[`test`.`t1`, `test1`.`tbl`\\]", err.Error()) +} diff --git a/br/pkg/lightning/backend/noop/noop.go b/br/pkg/lightning/backend/noop/noop.go index 93332e41b7074..a191fdff01193 100644 --- a/br/pkg/lightning/backend/noop/noop.go +++ b/br/pkg/lightning/backend/noop/noop.go @@ -21,6 +21,7 @@ import ( "github.com/google/uuid" "github.com/pingcap/tidb/br/pkg/lightning/backend" "github.com/pingcap/tidb/br/pkg/lightning/backend/kv" + "github.com/pingcap/tidb/br/pkg/lightning/common" "github.com/pingcap/tidb/br/pkg/lightning/config" "github.com/pingcap/tidb/br/pkg/lightning/log" "github.com/pingcap/tidb/br/pkg/lightning/verification" @@ -155,6 +156,10 @@ func (b noopBackend) ResolveDuplicateRows(ctx context.Context, tbl table.Table, return nil } +func (b noopBackend) TotalMemoryConsume() int64 { + return 0 +} + type noopEncoder struct{} // Close the encoder. @@ -181,6 +186,10 @@ func (w Writer) AppendRows(context.Context, string, []string, kv.Rows) error { return nil } +func (w Writer) AppendRow(context.Context, string, []string, []common.KvPair) error { + return nil +} + func (w Writer) IsSynced() bool { return true } diff --git a/br/pkg/lightning/backend/tidb/tidb.go b/br/pkg/lightning/backend/tidb/tidb.go index 1a9d100d39bd5..901c7419f91c6 100644 --- a/br/pkg/lightning/backend/tidb/tidb.go +++ b/br/pkg/lightning/backend/tidb/tidb.go @@ -466,6 +466,10 @@ rowLoop: return nil } +func (be *tidbBackend) TotalMemoryConsume() int64 { + return 0 +} + type stmtTask struct { rows tidbRows stmt string @@ -723,6 +727,9 @@ func (w *Writer) AppendRows(ctx context.Context, tableName string, columnNames [ return w.be.WriteRows(ctx, tableName, columnNames, rows) } +func (w *Writer) AppendRow(ctx context.Context, tableName string, columnNames []string, kvs []common.KvPair) error { + return nil +} func (w *Writer) IsSynced() bool { return true } diff --git a/br/pkg/lightning/config/config.go b/br/pkg/lightning/config/config.go index 0066895568550..f88e7693d6664 100644 --- a/br/pkg/lightning/config/config.go +++ b/br/pkg/lightning/config/config.go @@ -861,8 +861,39 @@ func (cfg *Config) Adjust(ctx context.Context) error { zap.ByteString("invalid-char-replacement", []byte(cfg.Mydumper.DataInvalidCharReplace))) } + mustHaveInternalConnections, err := cfg.AdjustCommon() + if err != nil { + return err + } + + // mydumper.filter and black-white-list cannot co-exist. + if cfg.HasLegacyBlackWhiteList() { + log.L().Warn("the config `black-white-list` has been deprecated, please replace with `mydumper.filter`") + if !common.StringSliceEqual(cfg.Mydumper.Filter, DefaultFilter) { + return common.ErrInvalidConfig.GenWithStack("`mydumper.filter` and `black-white-list` cannot be simultaneously defined") + } + } + + for _, rule := range cfg.Routes { + if !cfg.Mydumper.CaseSensitive { + rule.ToLower() + } + if err := rule.Valid(); err != nil { + return common.ErrInvalidConfig.Wrap(err).GenWithStack("file route rule is invalid") + } + } + + if err := cfg.CheckAndAdjustTiDBPort(ctx, mustHaveInternalConnections); err != nil { + return err + } + cfg.AdjustMydumper() + cfg.AdjustCheckPoint() + return cfg.CheckAndAdjustFilePath() +} + +func (cfg *Config) AdjustCommon() (bool, error) { if cfg.TikvImporter.Backend == "" { - return common.ErrInvalidConfig.GenWithStack("tikv-importer.backend must not be empty!") + return false, common.ErrInvalidConfig.GenWithStack("tikv-importer.backend must not be empty!") } cfg.TikvImporter.Backend = strings.ToLower(cfg.TikvImporter.Backend) mustHaveInternalConnections := true @@ -881,7 +912,7 @@ func (cfg *Config) Adjust(ctx context.Context) error { } cfg.DefaultVarsForImporterAndLocalBackend() default: - return common.ErrInvalidConfig.GenWithStack("unsupported `tikv-importer.backend` (%s)", cfg.TikvImporter.Backend) + return mustHaveInternalConnections, common.ErrInvalidConfig.GenWithStack("unsupported `tikv-importer.backend` (%s)", cfg.TikvImporter.Backend) } // TODO calculate these from the machine's free memory. @@ -894,7 +925,7 @@ func (cfg *Config) Adjust(ctx context.Context) error { if cfg.TikvImporter.Backend == BackendLocal { if err := cfg.CheckAndAdjustForLocalBackend(); err != nil { - return err + return mustHaveInternalConnections, err } } else { cfg.TikvImporter.DuplicateResolution = DupeResAlgNone @@ -905,7 +936,7 @@ func (cfg *Config) Adjust(ctx context.Context) error { switch cfg.TikvImporter.OnDuplicate { case ReplaceOnDup, IgnoreOnDup, ErrorOnDup: default: - return common.ErrInvalidConfig.GenWithStack( + return mustHaveInternalConnections, common.ErrInvalidConfig.GenWithStack( "unsupported `tikv-importer.on-duplicate` (%s)", cfg.TikvImporter.OnDuplicate) } } @@ -913,36 +944,13 @@ func (cfg *Config) Adjust(ctx context.Context) error { var err error cfg.TiDB.SQLMode, err = mysql.GetSQLMode(cfg.TiDB.StrSQLMode) if err != nil { - return common.ErrInvalidConfig.Wrap(err).GenWithStack("`mydumper.tidb.sql_mode` must be a valid SQL_MODE") + return mustHaveInternalConnections, common.ErrInvalidConfig.Wrap(err).GenWithStack("`mydumper.tidb.sql_mode` must be a valid SQL_MODE") } if err := cfg.CheckAndAdjustSecurity(); err != nil { - return err - } - - // mydumper.filter and black-white-list cannot co-exist. - if cfg.HasLegacyBlackWhiteList() { - log.L().Warn("the config `black-white-list` has been deprecated, please replace with `mydumper.filter`") - if !common.StringSliceEqual(cfg.Mydumper.Filter, DefaultFilter) { - return common.ErrInvalidConfig.GenWithStack("`mydumper.filter` and `black-white-list` cannot be simultaneously defined") - } + return mustHaveInternalConnections, err } - - for _, rule := range cfg.Routes { - if !cfg.Mydumper.CaseSensitive { - rule.ToLower() - } - if err := rule.Valid(); err != nil { - return common.ErrInvalidConfig.Wrap(err).GenWithStack("file route rule is invalid") - } - } - - if err := cfg.CheckAndAdjustTiDBPort(ctx, mustHaveInternalConnections); err != nil { - return err - } - cfg.AdjustMydumper() - cfg.AdjustCheckPoint() - return cfg.CheckAndAdjustFilePath() + return mustHaveInternalConnections, err } func (cfg *Config) CheckAndAdjustForLocalBackend() error { diff --git a/br/pkg/lightning/lightning.go b/br/pkg/lightning/lightning.go index 0b48b5b4e3e81..dca45948be263 100644 --- a/br/pkg/lightning/lightning.go +++ b/br/pkg/lightning/lightning.go @@ -51,6 +51,8 @@ import ( "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/br/pkg/version/build" + _ "github.com/pingcap/tidb/expression" // get rid of `import cycle`: just init expression.RewriteAstExpr,and called at package `backend.kv` + _ "github.com/pingcap/tidb/planner/core" "github.com/pingcap/tidb/util/promutil" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/collectors" diff --git a/br/pkg/mock/backend.go b/br/pkg/mock/backend.go index 04896d4a8efd1..6027f1dfa7a95 100644 --- a/br/pkg/mock/backend.go +++ b/br/pkg/mock/backend.go @@ -347,6 +347,20 @@ func (mr *MockEngineWriterMockRecorder) AppendRows(arg0, arg1, arg2, arg3 interf return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AppendRows", reflect.TypeOf((*MockEngineWriter)(nil).AppendRows), arg0, arg1, arg2, arg3) } +// AppendRow mocks base method. +func (m *MockEngineWriter) AppendRow(arg0 context.Context, arg1 string, arg2 []string, arg3 kv.Rows) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "AppendRow", arg0, arg1, arg2, arg3) + ret0, _ := ret[0].(error) + return ret0 +} + +// AppendRow indicates an expected call of AppendRows. +func (mr *MockEngineWriterMockRecorder) AppendRow(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AppendRow", reflect.TypeOf((*MockEngineWriter)(nil).AppendRows), arg0, arg1, arg2, arg3) +} + // Close mocks base method. func (m *MockEngineWriter) Close(arg0 context.Context) (backend.ChunkFlushStatus, error) { m.ctrl.T.Helper() @@ -375,3 +389,16 @@ func (mr *MockEngineWriterMockRecorder) IsSynced() *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsSynced", reflect.TypeOf((*MockEngineWriter)(nil).IsSynced)) } + +func (m *MockBackend) TotalMemoryConsume() int64 { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "TotalMemoryConsume") + ret0, _ := ret[0].(int64) + return ret0 +} + +// LocalWriter indicates an expected call of LocalWriter. +func (mr *MockBackendMockRecorder) TotalMemoryConsume() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "TotalMemoryConsume", reflect.TypeOf((*MockBackend)(nil).OpenEngine)) +} diff --git a/br/pkg/restore/split/region.go b/br/pkg/restore/split/region.go new file mode 100644 index 0000000000000..2d8a72f75e072 --- /dev/null +++ b/br/pkg/restore/split/region.go @@ -0,0 +1,21 @@ +package split + +import ( + "bytes" + + "github.com/pingcap/kvproto/pkg/metapb" +) + +// RegionInfo includes a region and the leader of the region. +type RegionInfo struct { + Region *metapb.Region + Leader *metapb.Peer +} + +// ContainsInterior returns whether the region contains the given key, and also +// that the key does not fall on the boundary (start key) of the region. +func (region *RegionInfo) ContainsInterior(key []byte) bool { + return bytes.Compare(key, region.Region.GetStartKey()) > 0 && + (len(region.Region.GetEndKey()) == 0 || + bytes.Compare(key, region.Region.GetEndKey()) < 0) +} diff --git a/br/pkg/restore/split/split.go b/br/pkg/restore/split/split.go new file mode 100644 index 0000000000000..487f72c52c611 --- /dev/null +++ b/br/pkg/restore/split/split.go @@ -0,0 +1,25 @@ +package split + +import "time" + +// Constants for split retry machinery. +const ( + SplitRetryTimes = 32 + SplitRetryInterval = 50 * time.Millisecond + SplitMaxRetryInterval = time.Second + + SplitCheckMaxRetryTimes = 64 + SplitCheckInterval = 8 * time.Millisecond + SplitMaxCheckInterval = time.Second + + ScatterWaitMaxRetryTimes = 64 + ScatterWaitInterval = 50 * time.Millisecond + ScatterMaxWaitInterval = time.Second + ScatterWaitUpperInterval = 180 * time.Second + + ScanRegionPaginationLimit = 128 + + RejectStoreCheckRetryTimes = 64 + RejectStoreCheckInterval = 100 * time.Millisecond + RejectStoreMaxCheckInterval = 2 * time.Second +) diff --git a/br/pkg/restore/split/split_client.go b/br/pkg/restore/split/split_client.go new file mode 100644 index 0000000000000..4b452c8d56df3 --- /dev/null +++ b/br/pkg/restore/split/split_client.go @@ -0,0 +1,667 @@ +package split + +import ( + "bytes" + "context" + "crypto/tls" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "net/http" + "path" + "strconv" + "strings" + "sync" + "time" + + "github.com/pingcap/errors" + "github.com/pingcap/failpoint" + "github.com/pingcap/kvproto/pkg/errorpb" + "github.com/pingcap/kvproto/pkg/kvrpcpb" + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/pingcap/kvproto/pkg/pdpb" + "github.com/pingcap/kvproto/pkg/tikvpb" + "github.com/pingcap/log" + "github.com/pingcap/tidb/br/pkg/conn/util" + errors2 "github.com/pingcap/tidb/br/pkg/errors" + "github.com/pingcap/tidb/br/pkg/httputil" + "github.com/pingcap/tidb/br/pkg/logutil" + "github.com/pingcap/tidb/br/pkg/redact" + "github.com/pingcap/tidb/br/pkg/utils/utildb" + "github.com/pingcap/tidb/store/pdtypes" + pd "github.com/tikv/pd/client" + "go.uber.org/multierr" + "go.uber.org/zap" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" +) + +// SplitClient is an external client used by RegionSplitter. +type SplitClient interface { + // GetStore gets a store by a store id. + GetStore(ctx context.Context, storeID uint64) (*metapb.Store, error) + // GetRegion gets a region which includes a specified key. + GetRegion(ctx context.Context, key []byte) (*RegionInfo, error) + // GetRegionByID gets a region by a region id. + GetRegionByID(ctx context.Context, regionID uint64) (*RegionInfo, error) + // SplitRegion splits a region from a key, if key is not included in the region, it will return nil. + // note: the key should not be encoded + SplitRegion(ctx context.Context, regionInfo *RegionInfo, key []byte) (*RegionInfo, error) + // BatchSplitRegions splits a region from a batch of keys. + // note: the keys should not be encoded + BatchSplitRegions(ctx context.Context, regionInfo *RegionInfo, keys [][]byte) ([]*RegionInfo, error) + // BatchSplitRegionsWithOrigin splits a region from a batch of keys and return the original region and split new regions + BatchSplitRegionsWithOrigin(ctx context.Context, regionInfo *RegionInfo, keys [][]byte) (*RegionInfo, []*RegionInfo, error) + // ScatterRegion scatters a specified region. + ScatterRegion(ctx context.Context, regionInfo *RegionInfo) error + // GetOperator gets the status of operator of the specified region. + GetOperator(ctx context.Context, regionID uint64) (*pdpb.GetOperatorResponse, error) + // ScanRegion gets a list of regions, starts from the region that contains key. + // Limit limits the maximum number of regions returned. + ScanRegions(ctx context.Context, key, endKey []byte, limit int) ([]*RegionInfo, error) + // GetPlacementRule loads a placement rule from PD. + GetPlacementRule(ctx context.Context, groupID, ruleID string) (pdtypes.Rule, error) + // SetPlacementRule insert or update a placement rule to PD. + SetPlacementRule(ctx context.Context, rule pdtypes.Rule) error + // DeletePlacementRule removes a placement rule from PD. + DeletePlacementRule(ctx context.Context, groupID, ruleID string) error + // SetStoreLabel add or update specified label of stores. If labelValue + // is empty, it clears the label. + SetStoresLabel(ctx context.Context, stores []uint64, labelKey, labelValue string) error +} + +func checkRegionConsistency(startKey, endKey []byte, regions []*RegionInfo) error { + // current pd can't guarantee the consistency of returned regions + if len(regions) == 0 { + return errors.Annotatef(errors2.ErrPDBatchScanRegion, "scan region return empty result, startKey: %s, endkey: %s", + redact.Key(startKey), redact.Key(endKey)) + } + + if bytes.Compare(regions[0].Region.StartKey, startKey) > 0 { + return errors.Annotatef(errors2.ErrPDBatchScanRegion, "first region's startKey > startKey, startKey: %s, regionStartKey: %s", + redact.Key(startKey), redact.Key(regions[0].Region.StartKey)) + } else if len(regions[len(regions)-1].Region.EndKey) != 0 && bytes.Compare(regions[len(regions)-1].Region.EndKey, endKey) < 0 { + return errors.Annotatef(errors2.ErrPDBatchScanRegion, "last region's endKey < startKey, startKey: %s, regionStartKey: %s", + redact.Key(endKey), redact.Key(regions[len(regions)-1].Region.EndKey)) + } + + cur := regions[0] + for _, r := range regions[1:] { + if !bytes.Equal(cur.Region.EndKey, r.Region.StartKey) { + return errors.Annotatef(errors2.ErrPDBatchScanRegion, "region endKey not equal to next region startKey, endKey: %s, startKey: %s", + redact.Key(cur.Region.EndKey), redact.Key(r.Region.StartKey)) + } + cur = r + } + + return nil +} + +// PaginateScanRegion scan regions with a limit pagination and +// return all regions at once. +// It reduces max gRPC message size. +func PaginateScanRegion( + ctx context.Context, client SplitClient, startKey, endKey []byte, limit int, +) ([]*RegionInfo, error) { + if len(endKey) != 0 && bytes.Compare(startKey, endKey) >= 0 { + return nil, errors.Annotatef(errors2.ErrRestoreInvalidRange, "startKey >= endKey, startKey: %s, endkey: %s", + hex.EncodeToString(startKey), hex.EncodeToString(endKey)) + } + + var regions []*RegionInfo + err := utildb.WithRetry(ctx, func() error { + regions = []*RegionInfo{} + scanStartKey := startKey + for { + batch, err := client.ScanRegions(ctx, scanStartKey, endKey, limit) + if err != nil { + return errors.Trace(err) + } + regions = append(regions, batch...) + if len(batch) < limit { + // No more region + break + } + scanStartKey = batch[len(batch)-1].Region.GetEndKey() + if len(scanStartKey) == 0 || + (len(endKey) > 0 && bytes.Compare(scanStartKey, endKey) >= 0) { + // All key space have scanned + break + } + } + if err := checkRegionConsistency(startKey, endKey, regions); err != nil { + log.Warn("failed to scan region, retrying", logutil.ShortError(err)) + return err + } + return nil + }, newScanRegionBackoffer()) + + return regions, err +} + +type scanRegionBackoffer struct { + attempt int +} + +func newScanRegionBackoffer() utildb.Backoffer { + return &scanRegionBackoffer{ + attempt: 3, + } +} + +// NextBackoff returns a duration to wait before retrying again +func (b *scanRegionBackoffer) NextBackoff(err error) time.Duration { + if errors2.ErrPDBatchScanRegion.Equal(err) { + // 500ms * 3 could be enough for splitting remain regions in the hole. + b.attempt-- + return 500 * time.Millisecond + } + b.attempt = 0 + return 0 +} + +// Attempt returns the remain attempt times +func (b *scanRegionBackoffer) Attempt() int { + return b.attempt +} + +const ( + splitRegionMaxRetryTime = 4 +) + +// pdClient is a wrapper of pd client, can be used by RegionSplitter. +type pdClient struct { + mu sync.Mutex + client pd.Client + tlsConf *tls.Config + storeCache map[uint64]*metapb.Store + isRawKv bool + // FIXME when config changed during the lifetime of pdClient, + // this may mislead the scatter. + needScatterVal bool + needScatterInit sync.Once +} + +// NewSplitClient returns a client used by RegionSplitter. +func NewSplitClient(client pd.Client, tlsConf *tls.Config, isRawKv bool) SplitClient { + cli := &pdClient{ + client: client, + tlsConf: tlsConf, + storeCache: make(map[uint64]*metapb.Store), + isRawKv: isRawKv, + } + return cli +} + +func (c *pdClient) needScatter(ctx context.Context) bool { + c.needScatterInit.Do(func() { + var err error + c.needScatterVal, err = c.checkNeedScatter(ctx) + if err != nil { + log.Warn("failed to check whether need to scatter, use permissive strategy: always scatter", logutil.ShortError(err)) + c.needScatterVal = true + } + if !c.needScatterVal { + log.Info("skipping scatter because the replica number isn't less than store count.") + } + }) + return c.needScatterVal +} + +func (c *pdClient) GetStore(ctx context.Context, storeID uint64) (*metapb.Store, error) { + c.mu.Lock() + defer c.mu.Unlock() + store, ok := c.storeCache[storeID] + if ok { + return store, nil + } + store, err := c.client.GetStore(ctx, storeID) + if err != nil { + return nil, errors.Trace(err) + } + c.storeCache[storeID] = store + return store, nil +} + +func (c *pdClient) GetRegion(ctx context.Context, key []byte) (*RegionInfo, error) { + region, err := c.client.GetRegion(ctx, key) + if err != nil { + return nil, errors.Trace(err) + } + if region == nil { + return nil, nil + } + return &RegionInfo{ + Region: region.Meta, + Leader: region.Leader, + }, nil +} + +func (c *pdClient) GetRegionByID(ctx context.Context, regionID uint64) (*RegionInfo, error) { + region, err := c.client.GetRegionByID(ctx, regionID) + if err != nil { + return nil, errors.Trace(err) + } + if region == nil { + return nil, nil + } + return &RegionInfo{ + Region: region.Meta, + Leader: region.Leader, + }, nil +} + +func (c *pdClient) SplitRegion(ctx context.Context, regionInfo *RegionInfo, key []byte) (*RegionInfo, error) { + var peer *metapb.Peer + if regionInfo.Leader != nil { + peer = regionInfo.Leader + } else { + if len(regionInfo.Region.Peers) == 0 { + return nil, errors.Annotate(errors2.ErrRestoreNoPeer, "region does not have peer") + } + peer = regionInfo.Region.Peers[0] + } + storeID := peer.GetStoreId() + store, err := c.GetStore(ctx, storeID) + if err != nil { + return nil, errors.Trace(err) + } + conn, err := grpc.Dial(store.GetAddress(), grpc.WithInsecure()) + if err != nil { + return nil, errors.Trace(err) + } + defer conn.Close() + + client := tikvpb.NewTikvClient(conn) + resp, err := client.SplitRegion(ctx, &kvrpcpb.SplitRegionRequest{ + Context: &kvrpcpb.Context{ + RegionId: regionInfo.Region.Id, + RegionEpoch: regionInfo.Region.RegionEpoch, + Peer: peer, + }, + SplitKey: key, + }) + if err != nil { + return nil, errors.Trace(err) + } + if resp.RegionError != nil { + log.Error("fail to split region", + logutil.Region(regionInfo.Region), + logutil.Key("key", key), + zap.Stringer("regionErr", resp.RegionError)) + return nil, errors.Annotatef(errors2.ErrRestoreSplitFailed, "err=%v", resp.RegionError) + } + + // BUG: Left is deprecated, it may be nil even if split is succeed! + // Assume the new region is the left one. + newRegion := resp.GetLeft() + if newRegion == nil { + regions := resp.GetRegions() + for _, r := range regions { + if bytes.Equal(r.GetStartKey(), regionInfo.Region.GetStartKey()) { + newRegion = r + break + } + } + } + if newRegion == nil { + return nil, errors.Annotate(errors2.ErrRestoreSplitFailed, "new region is nil") + } + var leader *metapb.Peer + // Assume the leaders will be at the same store. + if regionInfo.Leader != nil { + for _, p := range newRegion.GetPeers() { + if p.GetStoreId() == regionInfo.Leader.GetStoreId() { + leader = p + break + } + } + } + return &RegionInfo{ + Region: newRegion, + Leader: leader, + }, nil +} + +func splitRegionWithFailpoint( + ctx context.Context, + regionInfo *RegionInfo, + peer *metapb.Peer, + client tikvpb.TikvClient, + keys [][]byte, +) (*kvrpcpb.SplitRegionResponse, error) { + failpoint.Inject("not-leader-error", func(injectNewLeader failpoint.Value) { + log.Debug("failpoint not-leader-error injected.") + resp := &kvrpcpb.SplitRegionResponse{ + RegionError: &errorpb.Error{ + NotLeader: &errorpb.NotLeader{ + RegionId: regionInfo.Region.Id, + }, + }, + } + if injectNewLeader.(bool) { + resp.RegionError.NotLeader.Leader = regionInfo.Leader + } + failpoint.Return(resp, nil) + }) + failpoint.Inject("somewhat-retryable-error", func() { + log.Debug("failpoint somewhat-retryable-error injected.") + failpoint.Return(&kvrpcpb.SplitRegionResponse{ + RegionError: &errorpb.Error{ + ServerIsBusy: &errorpb.ServerIsBusy{}, + }, + }, nil) + }) + return client.SplitRegion(ctx, &kvrpcpb.SplitRegionRequest{ + Context: &kvrpcpb.Context{ + RegionId: regionInfo.Region.Id, + RegionEpoch: regionInfo.Region.RegionEpoch, + Peer: peer, + }, + SplitKeys: keys, + }) +} + +func (c *pdClient) sendSplitRegionRequest( + ctx context.Context, regionInfo *RegionInfo, keys [][]byte, +) (*kvrpcpb.SplitRegionResponse, error) { + var splitErrors error + for i := 0; i < splitRegionMaxRetryTime; i++ { + var peer *metapb.Peer + // scanRegions may return empty Leader in https://github.com/tikv/pd/blob/v4.0.8/server/grpc_service.go#L524 + // so wee also need check Leader.Id != 0 + if regionInfo.Leader != nil && regionInfo.Leader.Id != 0 { + peer = regionInfo.Leader + } else { + if len(regionInfo.Region.Peers) == 0 { + return nil, multierr.Append(splitErrors, + errors.Annotatef(errors2.ErrRestoreNoPeer, "region[%d] doesn't have any peer", regionInfo.Region.GetId())) + } + peer = regionInfo.Region.Peers[0] + } + storeID := peer.GetStoreId() + store, err := c.GetStore(ctx, storeID) + if err != nil { + return nil, multierr.Append(splitErrors, err) + } + opt := grpc.WithInsecure() + if c.tlsConf != nil { + opt = grpc.WithTransportCredentials(credentials.NewTLS(c.tlsConf)) + } + conn, err := grpc.Dial(store.GetAddress(), opt) + if err != nil { + return nil, multierr.Append(splitErrors, err) + } + defer conn.Close() + client := tikvpb.NewTikvClient(conn) + resp, err := splitRegionWithFailpoint(ctx, regionInfo, peer, client, keys) + if err != nil { + return nil, multierr.Append(splitErrors, err) + } + if resp.RegionError != nil { + log.Warn("fail to split region", + logutil.Region(regionInfo.Region), + zap.Stringer("regionErr", resp.RegionError)) + splitErrors = multierr.Append(splitErrors, + errors.Annotatef(errors2.ErrRestoreSplitFailed, "split region failed: err=%v", resp.RegionError)) + if nl := resp.RegionError.NotLeader; nl != nil { + if leader := nl.GetLeader(); leader != nil { + regionInfo.Leader = leader + } else { + newRegionInfo, findLeaderErr := c.GetRegionByID(ctx, nl.RegionId) + if findLeaderErr != nil { + return nil, multierr.Append(splitErrors, findLeaderErr) + } + if !CheckRegionEpoch(newRegionInfo, regionInfo) { + return nil, multierr.Append(splitErrors, errors2.ErrKVEpochNotMatch) + } + log.Info("find new leader", zap.Uint64("new leader", newRegionInfo.Leader.Id)) + regionInfo = newRegionInfo + } + log.Info("split region meet not leader error, retrying", + zap.Int("retry times", i), + zap.Uint64("regionID", regionInfo.Region.Id), + zap.Any("new leader", regionInfo.Leader), + ) + continue + } + // TODO: we don't handle RegionNotMatch and RegionNotFound here, + // because I think we don't have enough information to retry. + // But maybe we can handle them here by some information the error itself provides. + if resp.RegionError.ServerIsBusy != nil || + resp.RegionError.StaleCommand != nil { + log.Warn("a error occurs on split region", + zap.Int("retry times", i), + zap.Uint64("regionID", regionInfo.Region.Id), + zap.String("error", resp.RegionError.Message), + zap.Any("error verbose", resp.RegionError), + ) + continue + } + return nil, errors.Trace(splitErrors) + } + return resp, nil + } + return nil, errors.Trace(splitErrors) +} + +func (c *pdClient) BatchSplitRegionsWithOrigin( + ctx context.Context, regionInfo *RegionInfo, keys [][]byte, +) (*RegionInfo, []*RegionInfo, error) { + resp, err := c.sendSplitRegionRequest(ctx, regionInfo, keys) + if err != nil { + return nil, nil, errors.Trace(err) + } + + regions := resp.GetRegions() + newRegionInfos := make([]*RegionInfo, 0, len(regions)) + var originRegion *RegionInfo + for _, region := range regions { + var leader *metapb.Peer + + // Assume the leaders will be at the same store. + if regionInfo.Leader != nil { + for _, p := range region.GetPeers() { + if p.GetStoreId() == regionInfo.Leader.GetStoreId() { + leader = p + break + } + } + } + // original region + if region.GetId() == regionInfo.Region.GetId() { + originRegion = &RegionInfo{ + Region: region, + Leader: leader, + } + continue + } + newRegionInfos = append(newRegionInfos, &RegionInfo{ + Region: region, + Leader: leader, + }) + } + return originRegion, newRegionInfos, nil +} + +func (c *pdClient) BatchSplitRegions( + ctx context.Context, regionInfo *RegionInfo, keys [][]byte, +) ([]*RegionInfo, error) { + _, newRegions, err := c.BatchSplitRegionsWithOrigin(ctx, regionInfo, keys) + return newRegions, err +} + +func (c *pdClient) getStoreCount(ctx context.Context) (int, error) { + stores, err := util.GetAllTiKVStores(ctx, c.client, util.SkipTiFlash) + if err != nil { + return 0, err + } + return len(stores), err +} + +func (c *pdClient) getMaxReplica(ctx context.Context) (int, error) { + api := c.getPDAPIAddr() + configAPI := api + "/pd/api/v1/config" + req, err := http.NewRequestWithContext(ctx, "GET", configAPI, nil) + if err != nil { + return 0, errors.Trace(err) + } + res, err := httputil.NewClient(c.tlsConf).Do(req) + if err != nil { + return 0, errors.Trace(err) + } + var conf pdtypes.ReplicationConfig + if err := json.NewDecoder(res.Body).Decode(&conf); err != nil { + return 0, errors.Trace(err) + } + return int(conf.MaxReplicas), nil +} + +func (c *pdClient) checkNeedScatter(ctx context.Context) (bool, error) { + storeCount, err := c.getStoreCount(ctx) + if err != nil { + return false, err + } + maxReplica, err := c.getMaxReplica(ctx) + if err != nil { + return false, err + } + log.Info("checking whether need to scatter", zap.Int("store", storeCount), zap.Int("max-replica", maxReplica)) + // Skipping scatter may lead to leader unbalanced, + // currently, we skip scatter only when: + // 1. max-replica > store-count (Probably a misconfigured or playground cluster.) + // 2. store-count == 1 (No meaning for scattering.) + // We can still omit scatter when `max-replica == store-count`, if we create a BalanceLeader operator here, + // however, there isn't evidence for transform leader is much faster than scattering empty regions. + return storeCount >= maxReplica && storeCount > 1, nil +} + +func (c *pdClient) ScatterRegion(ctx context.Context, regionInfo *RegionInfo) error { + if !c.needScatter(ctx) { + return nil + } + return c.client.ScatterRegion(ctx, regionInfo.Region.GetId()) +} + +func (c *pdClient) GetOperator(ctx context.Context, regionID uint64) (*pdpb.GetOperatorResponse, error) { + return c.client.GetOperator(ctx, regionID) +} + +func (c *pdClient) ScanRegions(ctx context.Context, key, endKey []byte, limit int) ([]*RegionInfo, error) { + regions, err := c.client.ScanRegions(ctx, key, endKey, limit) + if err != nil { + return nil, errors.Trace(err) + } + regionInfos := make([]*RegionInfo, 0, len(regions)) + for _, region := range regions { + regionInfos = append(regionInfos, &RegionInfo{ + Region: region.Meta, + Leader: region.Leader, + }) + } + return regionInfos, nil +} + +func (c *pdClient) GetPlacementRule(ctx context.Context, groupID, ruleID string) (pdtypes.Rule, error) { + var rule pdtypes.Rule + addr := c.getPDAPIAddr() + if addr == "" { + return rule, errors.Annotate(errors2.ErrRestoreSplitFailed, "failed to add stores labels: no leader") + } + req, err := http.NewRequestWithContext(ctx, "GET", addr+path.Join("/pd/api/v1/config/rule", groupID, ruleID), nil) + if err != nil { + return rule, errors.Trace(err) + } + res, err := httputil.NewClient(c.tlsConf).Do(req) + if err != nil { + return rule, errors.Trace(err) + } + b, err := io.ReadAll(res.Body) + if err != nil { + return rule, errors.Trace(err) + } + res.Body.Close() + err = json.Unmarshal(b, &rule) + if err != nil { + return rule, errors.Trace(err) + } + return rule, nil +} + +func (c *pdClient) SetPlacementRule(ctx context.Context, rule pdtypes.Rule) error { + addr := c.getPDAPIAddr() + if addr == "" { + return errors.Annotate(errors2.ErrPDLeaderNotFound, "failed to add stores labels") + } + m, _ := json.Marshal(rule) + req, err := http.NewRequestWithContext(ctx, "POST", addr+path.Join("/pd/api/v1/config/rule"), bytes.NewReader(m)) + if err != nil { + return errors.Trace(err) + } + res, err := httputil.NewClient(c.tlsConf).Do(req) + if err != nil { + return errors.Trace(err) + } + return errors.Trace(res.Body.Close()) +} + +func (c *pdClient) DeletePlacementRule(ctx context.Context, groupID, ruleID string) error { + addr := c.getPDAPIAddr() + if addr == "" { + return errors.Annotate(errors2.ErrPDLeaderNotFound, "failed to add stores labels") + } + req, err := http.NewRequestWithContext(ctx, "DELETE", addr+path.Join("/pd/api/v1/config/rule", groupID, ruleID), nil) + if err != nil { + return errors.Trace(err) + } + res, err := httputil.NewClient(c.tlsConf).Do(req) + if err != nil { + return errors.Trace(err) + } + return errors.Trace(res.Body.Close()) +} + +func (c *pdClient) SetStoresLabel( + ctx context.Context, stores []uint64, labelKey, labelValue string, +) error { + b := []byte(fmt.Sprintf(`{"%s": "%s"}`, labelKey, labelValue)) + addr := c.getPDAPIAddr() + if addr == "" { + return errors.Annotate(errors2.ErrPDLeaderNotFound, "failed to add stores labels") + } + httpCli := httputil.NewClient(c.tlsConf) + for _, id := range stores { + req, err := http.NewRequestWithContext( + ctx, "POST", + addr+path.Join("/pd/api/v1/store", strconv.FormatUint(id, 10), "label"), + bytes.NewReader(b), + ) + if err != nil { + return errors.Trace(err) + } + res, err := httpCli.Do(req) + if err != nil { + return errors.Trace(err) + } + err = res.Body.Close() + if err != nil { + return errors.Trace(err) + } + } + return nil +} + +func (c *pdClient) getPDAPIAddr() string { + addr := c.client.GetLeaderAddr() + if addr != "" && !strings.HasPrefix(addr, "http") { + addr = "http://" + addr + } + return strings.TrimRight(addr, "/") +} + +func CheckRegionEpoch(newInfo, oldInfo *RegionInfo) bool { + return newInfo.Region.GetId() == oldInfo.Region.GetId() && + newInfo.Region.GetRegionEpoch().GetVersion() == oldInfo.Region.GetRegionEpoch().GetVersion() && + newInfo.Region.GetRegionEpoch().GetConfVer() == oldInfo.Region.GetRegionEpoch().GetConfVer() +} diff --git a/br/pkg/utils/utildb/db.go b/br/pkg/utils/utildb/db.go new file mode 100644 index 0000000000000..537cda6b04dbf --- /dev/null +++ b/br/pkg/utils/utildb/db.go @@ -0,0 +1,32 @@ +// Copyright 2021 PingCAP, Inc. Licensed under Apache-2.0. + +package utildb + +import ( + "context" + "database/sql" +) + +var ( + // check sql.DB and sql.Conn implement QueryExecutor and DBExecutor + _ DBExecutor = &sql.DB{} + _ DBExecutor = &sql.Conn{} +) + +// QueryExecutor is a interface for exec query +type QueryExecutor interface { + QueryContext(ctx context.Context, query string, args ...interface{}) (*sql.Rows, error) + QueryRowContext(ctx context.Context, query string, args ...interface{}) *sql.Row +} + +// StmtExecutor define both query and exec methods +type StmtExecutor interface { + QueryExecutor + ExecContext(ctx context.Context, query string, args ...interface{}) (sql.Result, error) +} + +// DBExecutor is a interface for statements and txn +type DBExecutor interface { + StmtExecutor + BeginTx(ctx context.Context, opts *sql.TxOptions) (*sql.Tx, error) +} diff --git a/br/pkg/utils/utildb/retry.go b/br/pkg/utils/utildb/retry.go new file mode 100644 index 0000000000000..2d0ac668aa2d7 --- /dev/null +++ b/br/pkg/utils/utildb/retry.go @@ -0,0 +1,142 @@ +// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. + +package utildb + +import ( + "context" + "database/sql" + stderrors "errors" + "io" + "net" + "reflect" + "regexp" + "strings" + "time" + + "github.com/go-sql-driver/mysql" + "github.com/pingcap/errors" + tmysql "github.com/pingcap/tidb/errno" + tidbkv "github.com/pingcap/tidb/kv" + "go.uber.org/multierr" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +var retryableServerError = []string{ + "server closed", + "connection refused", + "connection reset by peer", + "channel closed", + "error trying to connect", + "connection closed before message completed", + "body write aborted", + "error during dispatch", + "put object timeout", +} + +// RetryableFunc presents a retryable operation. +type RetryableFunc func() error + +// Backoffer implements a backoff policy for retrying operations. +type Backoffer interface { + // NextBackoff returns a duration to wait before retrying again + NextBackoff(err error) time.Duration + // Attempt returns the remain attempt times + Attempt() int +} + +// WithRetry retries a given operation with a backoff policy. +// +// Returns nil if `retryableFunc` succeeded at least once. Otherwise, returns a +// multierr containing all errors encountered. +func WithRetry( + ctx context.Context, + retryableFunc RetryableFunc, + backoffer Backoffer, +) error { + var allErrors error + for backoffer.Attempt() > 0 { + err := retryableFunc() + if err != nil { + allErrors = multierr.Append(allErrors, err) + select { + case <-ctx.Done(): + return allErrors // nolint:wrapcheck + case <-time.After(backoffer.NextBackoff(err)): + } + } else { + return nil + } + } + return allErrors // nolint:wrapcheck +} + +// MessageIsRetryableStorageError checks whether the message returning from TiKV is retryable ExternalStorageError. +func MessageIsRetryableStorageError(msg string) bool { + msgLower := strings.ToLower(msg) + // UNSAFE! TODO: Add a error type for retryable connection error. + for _, errStr := range retryableServerError { + if strings.Contains(msgLower, errStr) { + return true + } + } + return false +} + +// sqlmock uses fmt.Errorf to produce expectation failures, which will cause +// unnecessary retry if not specially handled >:( +var stdFatalErrorsRegexp = regexp.MustCompile( + `^call to (?s:.*) was not expected|arguments do not match:|could not match actual sql|mock non-retryable error`, +) +var stdErrorType = reflect.TypeOf(stderrors.New("")) + +// IsRetryableError returns whether the error is transient (e.g. network +// connection dropped) or irrecoverable (e.g. user pressing Ctrl+C). This +// function returns `false` (irrecoverable) if `err == nil`. +// +// If the error is a multierr, returns true only if all suberrors are retryable. +func IsRetryableError(err error) bool { + for _, singleError := range errors.Errors(err) { + if !isSingleRetryableError(singleError) { + return false + } + } + return true +} + +func isSingleRetryableError(err error) bool { + err = errors.Cause(err) + + switch err { + case nil, context.Canceled, context.DeadlineExceeded, io.EOF, sql.ErrNoRows: + return false + } + if tidbkv.ErrKeyExists.Equal(err) || strings.Contains(err.Error(), "1062") { + return false + } + + switch nerr := err.(type) { + case net.Error: + return nerr.Timeout() + case *mysql.MySQLError: + switch nerr.Number { + // ErrLockDeadlock can retry to commit while meet deadlock + case tmysql.ErrUnknown, tmysql.ErrLockDeadlock, tmysql.ErrWriteConflictInTiDB, tmysql.ErrPDServerTimeout, tmysql.ErrTiKVServerTimeout, tmysql.ErrTiKVServerBusy, tmysql.ErrResolveLockTimeout, tmysql.ErrRegionUnavailable: + return true + default: + return false + } + default: + switch status.Code(err) { + case codes.DeadlineExceeded, codes.NotFound, codes.AlreadyExists, codes.PermissionDenied, codes.ResourceExhausted, codes.Aborted, codes.OutOfRange, codes.Unavailable, codes.DataLoss: + return true + case codes.Unknown: + if reflect.TypeOf(err) == stdErrorType { + return !stdFatalErrorsRegexp.MatchString(err.Error()) + } + return true + default: + return false + } + } +} diff --git a/br/pkg/utils/utildb/retry_test.go b/br/pkg/utils/utildb/retry_test.go new file mode 100644 index 0000000000000..2523d804d5f26 --- /dev/null +++ b/br/pkg/utils/utildb/retry_test.go @@ -0,0 +1,63 @@ +package utildb + +import ( + "context" + "fmt" + "io" + "net" + + "github.com/go-sql-driver/mysql" + . "github.com/pingcap/check" + "github.com/pingcap/errors" + "go.uber.org/multierr" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + + tmysql "github.com/pingcap/tidb/errno" +) + +type utilSuite struct{} + +var _ = Suite(&utilSuite{}) + +func (s *utilSuite) TestIsRetryableError(c *C) { + c.Assert(IsRetryableError(context.Canceled), IsFalse) + c.Assert(IsRetryableError(context.DeadlineExceeded), IsFalse) + c.Assert(IsRetryableError(io.EOF), IsFalse) + c.Assert(IsRetryableError(&net.AddrError{}), IsFalse) + c.Assert(IsRetryableError(&net.DNSError{}), IsFalse) + c.Assert(IsRetryableError(&net.DNSError{IsTimeout: true}), IsTrue) + + // MySQL Errors + c.Assert(IsRetryableError(&mysql.MySQLError{}), IsFalse) + c.Assert(IsRetryableError(&mysql.MySQLError{Number: tmysql.ErrUnknown}), IsTrue) + c.Assert(IsRetryableError(&mysql.MySQLError{Number: tmysql.ErrLockDeadlock}), IsTrue) + c.Assert(IsRetryableError(&mysql.MySQLError{Number: tmysql.ErrPDServerTimeout}), IsTrue) + c.Assert(IsRetryableError(&mysql.MySQLError{Number: tmysql.ErrTiKVServerTimeout}), IsTrue) + c.Assert(IsRetryableError(&mysql.MySQLError{Number: tmysql.ErrTiKVServerBusy}), IsTrue) + c.Assert(IsRetryableError(&mysql.MySQLError{Number: tmysql.ErrResolveLockTimeout}), IsTrue) + c.Assert(IsRetryableError(&mysql.MySQLError{Number: tmysql.ErrRegionUnavailable}), IsTrue) + c.Assert(IsRetryableError(&mysql.MySQLError{Number: tmysql.ErrWriteConflictInTiDB}), IsTrue) + + // gRPC Errors + c.Assert(IsRetryableError(status.Error(codes.Canceled, "")), IsFalse) + c.Assert(IsRetryableError(status.Error(codes.Unknown, "")), IsTrue) + c.Assert(IsRetryableError(status.Error(codes.DeadlineExceeded, "")), IsTrue) + c.Assert(IsRetryableError(status.Error(codes.NotFound, "")), IsTrue) + c.Assert(IsRetryableError(status.Error(codes.AlreadyExists, "")), IsTrue) + c.Assert(IsRetryableError(status.Error(codes.PermissionDenied, "")), IsTrue) + c.Assert(IsRetryableError(status.Error(codes.ResourceExhausted, "")), IsTrue) + c.Assert(IsRetryableError(status.Error(codes.Aborted, "")), IsTrue) + c.Assert(IsRetryableError(status.Error(codes.OutOfRange, "")), IsTrue) + c.Assert(IsRetryableError(status.Error(codes.Unavailable, "")), IsTrue) + c.Assert(IsRetryableError(status.Error(codes.DataLoss, "")), IsTrue) + + // sqlmock errors + c.Assert(IsRetryableError(fmt.Errorf("call to database Close was not expected")), IsFalse) + c.Assert(IsRetryableError(errors.New("call to database Close was not expected")), IsTrue) + + // multierr + c.Assert(IsRetryableError(multierr.Combine(context.Canceled, context.Canceled)), IsFalse) + c.Assert(IsRetryableError(multierr.Combine(&net.DNSError{IsTimeout: true}, &net.DNSError{IsTimeout: true})), IsTrue) + c.Assert(IsRetryableError(multierr.Combine(context.Canceled, &net.DNSError{IsTimeout: true})), IsFalse) +} diff --git a/go.mod b/go.mod index 2d67c63904fc3..1cc1687fb4af4 100644 --- a/go.mod +++ b/go.mod @@ -108,6 +108,7 @@ require ( github.com/kisielk/errcheck v1.6.1 github.com/kyoh86/exportloopref v0.1.8 github.com/nishanths/predeclared v0.2.2 + github.com/pingcap/check v0.0.0-20211026125417-57bd13f7b5f0 github.com/tdakkota/asciicheck v0.1.1 honnef.co/go/tools v0.3.1 ) @@ -181,7 +182,7 @@ require ( github.com/pierrec/lz4 v2.6.1+incompatible // indirect github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 // indirect github.com/pkg/browser v0.0.0-20180916011732-0a3d74bf9ce4 // indirect - github.com/pkg/errors v0.9.1 // indirect + github.com/pkg/errors v0.9.1 github.com/pmezard/go-difflib v1.0.0 // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect github.com/prometheus/procfs v0.7.3 // indirect From 652111a676007949d7f0092214df880fd82390d0 Mon Sep 17 00:00:00 2001 From: Benjamin2037 Date: Mon, 4 Jul 2022 12:55:53 +0800 Subject: [PATCH 02/16] Add index acceleration: Add sysvars and config parameter. --- config/config.go | 5 ++ sessionctx/variable/sysvar.go | 13 +++++ sessionctx/variable/sysvar_test.go | 76 ++++++++++++++++++++++++++++++ sessionctx/variable/tidb_vars.go | 10 ++++ 4 files changed, 104 insertions(+) diff --git a/config/config.go b/config/config.go index 1070e6847f2ba..4b99ff71abf33 100644 --- a/config/config.go +++ b/config/config.go @@ -84,6 +84,8 @@ const ( DefExpensiveQueryTimeThreshold = 60 // DefMemoryUsageAlarmRatio is the threshold triggering an alarm which the memory usage of tidb-server instance exceeds. DefMemoryUsageAlarmRatio = 0.8 + // DefLightningSortPath is the default sort dir for add index lightning solution + DefLightningSortPath = "/tmp/tidb" ) // Valid config maps @@ -271,6 +273,8 @@ type Config struct { CheckMb4ValueInUTF8 AtomicBool `toml:"check-mb4-value-in-utf8" json:"check-mb4-value-in-utf8"` EnableCollectExecutionInfo bool `toml:"enable-collect-execution-info" json:"enable-collect-execution-info"` Plugin Plugin `toml:"plugin" json:"plugin"` + // LightningSortPath used to specific the lighting DDL local sort path. + LightningSortPath string `toml:"lightning-sort-path" json:"lightning-sort-path"` } // UpdateTempStoragePath is to update the `TempStoragePath` if port/statusPort was changed @@ -821,6 +825,7 @@ var defaultConf = Config{ TiDBEdition: "", VersionComment: "", TiDBReleaseVersion: "", + LightningSortPath: DefLightningSortPath, Log: Log{ Level: "info", Format: "text", diff --git a/sessionctx/variable/sysvar.go b/sessionctx/variable/sysvar.go index 4af4de4e12837..12832ecf48617 100644 --- a/sessionctx/variable/sysvar.go +++ b/sessionctx/variable/sysvar.go @@ -1673,6 +1673,19 @@ var defaultSysVars = []*SysVar{ metrics.ToggleSimplifiedMode(TiDBOptOn(s)) return nil }}, + {Scope: ScopeGlobal, Name: TiDBFastDDL, Value: BoolToOnOff(DefTiDBFastDDL), Type: TypeBool, GetGlobal: func(sv *SessionVars) (string, error) { + return BoolToOnOff(FastDDL.Load()), nil + }, SetGlobal: func(s *SessionVars, val string) error { + FastDDL.Store(TiDBOptOn(val)) + return nil + }}, + // This system var is set disk quota for lightning sort dir, from 100 GB to 1PB. + {Scope: ScopeGlobal, Name: TiDBDiskQuota, Value: strconv.Itoa(DefTiDBDiskQuota), Type: TypeInt, MinValue: DefTiDBDiskQuota, MaxValue: 1024 * 1024 * DefTiDBDiskQuota / 100, GetGlobal: func(sv *SessionVars) (string, error) { + return strconv.FormatInt(DiskQuota.Load(), 10), nil + }, SetGlobal: func(s *SessionVars, val string) error { + DiskQuota.Store(TidbOptInt64(val, DefTiDBDiskQuota)) + return nil + }}, } // FeedbackProbability points to the FeedbackProbability in statistics package. diff --git a/sessionctx/variable/sysvar_test.go b/sessionctx/variable/sysvar_test.go index 6d94cb81e8ac0..e1efe44b22897 100644 --- a/sessionctx/variable/sysvar_test.go +++ b/sessionctx/variable/sysvar_test.go @@ -1075,3 +1075,79 @@ func TestTiDBCommitterConcurrency(t *testing.T) { require.Equal(t, val, fmt.Sprintf("%d", expected)) require.NoError(t, err) } + +func TestSetTIDBFastDDL(t *testing.T) { + vars := NewSessionVars() + mock := NewMockGlobalAccessor4Tests() + mock.SessionVars = vars + vars.GlobalVarsAccessor = mock + fastDDL := GetSysVar(TiDBFastDDL) + + // Default off + require.Equal(t, fastDDL.Value, Off) + + // Set to On + err := mock.SetGlobalSysVar(TiDBFastDDL, On) + require.NoError(t, err) + val, err1 := mock.GetGlobalSysVar(TiDBFastDDL) + require.NoError(t, err1) + require.Equal(t, On, val) + + // Set to off + err = mock.SetGlobalSysVar(TiDBFastDDL, Off) + require.NoError(t, err) + val, err1 = mock.GetGlobalSysVar(TiDBFastDDL) + require.NoError(t, err1) + require.Equal(t, Off, val) +} + +func TestSetTIDBDiskQuota(t *testing.T) { + vars := NewSessionVars() + mock := NewMockGlobalAccessor4Tests() + mock.SessionVars = vars + vars.GlobalVarsAccessor = mock + diskQuota := GetSysVar(TiDBDiskQuota) + var ( + gb int64 = 1024 * 1024 * 1024 + pb int64 = 1024 * 1024 * 1024 * 1024 * 1024 + err error + val string + ) + // Default 100 GB + require.Equal(t, diskQuota.Value, strconv.FormatInt(100*gb, 10)) + + // MinValue is 100 GB, set to 50 Gb is not allowed + err = mock.SetGlobalSysVar(TiDBDiskQuota, strconv.FormatInt(50*gb, 10)) + require.NoError(t, err) + val, err = mock.GetGlobalSysVar(TiDBDiskQuota) + require.NoError(t, err) + require.Equal(t, strconv.FormatInt(100*gb, 10), val) + + // Set to 100 GB + err = mock.SetGlobalSysVar(TiDBDiskQuota, strconv.FormatInt(100*gb, 10)) + require.NoError(t, err) + val, err = mock.GetGlobalSysVar(TiDBDiskQuota) + require.NoError(t, err) + require.Equal(t, strconv.FormatInt(100*gb, 10), val) + + // Set to 200 GB + err = mock.SetGlobalSysVar(TiDBDiskQuota, strconv.FormatInt(200*gb, 10)) + require.NoError(t, err) + val, err = mock.GetGlobalSysVar(TiDBDiskQuota) + require.NoError(t, err) + require.Equal(t, strconv.FormatInt(200*gb, 10), val) + + // Set to 1 Pb + err = mock.SetGlobalSysVar(TiDBDiskQuota, strconv.FormatInt(pb, 10)) + require.NoError(t, err) + val, err = mock.GetGlobalSysVar(TiDBDiskQuota) + require.NoError(t, err) + require.Equal(t, strconv.FormatInt(pb, 10), val) + + // MaxValue is 1 PB, set to 2 Pb is not allowed + err = mock.SetGlobalSysVar(TiDBDiskQuota, strconv.FormatInt(2*pb, 10)) + require.NoError(t, err) + val, err = mock.GetGlobalSysVar(TiDBDiskQuota) + require.NoError(t, err) + require.Equal(t, strconv.FormatInt(pb, 10), val) +} diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index 2e55dfdb2353d..337ca024b0160 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -741,6 +741,10 @@ const ( // TiDBMaxAutoAnalyzeTime is the max time that auto analyze can run. If auto analyze runs longer than the value, it // will be killed. 0 indicates that there is no time limit. TiDBMaxAutoAnalyzeTime = "tidb_max_auto_analyze_time" + // TiDBFastDDL indicates whether use lighting to help acceleate adding index stmt. + TiDBFastDDL = "tidb_fast_ddl" + // TiDBDiskQuota used to set disk quota for lightning add index. + TiDBDiskQuota = "tidb_disk_quota" // TiDBEnableConcurrentDDL indicates whether to enable the new DDL framework. TiDBEnableConcurrentDDL = "tidb_enable_concurrent_ddl" ) @@ -949,6 +953,8 @@ const ( DefTiFlashFineGrainedShuffleStreamCount = -1 DefStreamCountWhenMaxThreadsNotSet = 8 DefTiFlashFineGrainedShuffleBatchSize = 8192 + DefTiDBFastDDL = false + DefTiDBDiskQuota = 100 * 1024 * 1024 * 1024 // 100GB ) // Process global variables. @@ -998,6 +1004,10 @@ var ( PreparedPlanCacheMemoryGuardRatio = atomic.NewFloat64(DefTiDBPrepPlanCacheMemoryGuardRatio) EnableConcurrentDDL = atomic.NewBool(DefTiDBEnableConcurrentDDL) EnableNoopVariables = atomic.NewBool(DefTiDBEnableNoopVariables) + // TiDBFastDDL indicates whether to use lightning to enhance DDL reorg performance. + FastDDL = atomic.NewBool(false) + // Temporary Variable for set dist quota for lightning add index, int type, GB as unit + DiskQuota = atomic.NewInt64(DefTiDBDiskQuota) ) var ( From 4461cadef224fe69d8f677f726408f2e4e86b3b2 Mon Sep 17 00:00:00 2001 From: Benjamin2037 Date: Thu, 7 Jul 2022 12:45:32 +0800 Subject: [PATCH 03/16] Add framework for add index acceleration --- ddl/ddl.go | 4 + ddl/lightning/backend.go | 215 +++++++++++++++ ddl/lightning/backend_test.go | 46 ++++ ddl/lightning/engine.go | 265 +++++++++++++++++++ ddl/lightning/engine_mgr.go | 50 ++++ ddl/lightning/env.go | 240 +++++++++++++++++ ddl/lightning/env_test.go | 125 +++++++++ ddl/lightning/message.go | 63 +++++ ddl/lightning/res_mgr.go | 477 ++++++++++++++++++++++++++++++++++ 9 files changed, 1485 insertions(+) create mode 100644 ddl/lightning/backend.go create mode 100644 ddl/lightning/backend_test.go create mode 100644 ddl/lightning/engine.go create mode 100644 ddl/lightning/engine_mgr.go create mode 100644 ddl/lightning/env.go create mode 100644 ddl/lightning/env_test.go create mode 100644 ddl/lightning/message.go create mode 100644 ddl/lightning/res_mgr.go diff --git a/ddl/ddl.go b/ddl/ddl.go index 7ea6969353547..b1884605d4a37 100644 --- a/ddl/ddl.go +++ b/ddl/ddl.go @@ -32,6 +32,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/tidb/config" + lit "github.com/pingcap/tidb/ddl/lightning" "github.com/pingcap/tidb/ddl/util" "github.com/pingcap/tidb/infoschema" "github.com/pingcap/tidb/kv" @@ -532,6 +533,9 @@ func (d *ddl) Start(ctxPool *pools.ResourcePool) error { // Start some background routine to manage TiFlash replica. d.wg.Run(d.PollTiFlashRoutine) + // Init Lighting Global environment. Once met error then the + lit.InitGolbalLightningBackendEnv() + return nil } diff --git a/ddl/lightning/backend.go b/ddl/lightning/backend.go new file mode 100644 index 0000000000000..d46f7c6e9d803 --- /dev/null +++ b/ddl/lightning/backend.go @@ -0,0 +1,215 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lightning + +import ( + "context" + "database/sql" + "path/filepath" + "strconv" + + "github.com/pingcap/tidb/br/pkg/lightning/backend" + "github.com/pingcap/tidb/br/pkg/lightning/backend/local" + "github.com/pingcap/tidb/br/pkg/lightning/checkpoints" + "github.com/pingcap/tidb/br/pkg/lightning/config" + "github.com/pingcap/tidb/br/pkg/lightning/glue" + "github.com/pingcap/tidb/br/pkg/lightning/log" + tidbconf "github.com/pingcap/tidb/config" + "github.com/pingcap/tidb/parser" + "github.com/pingcap/tidb/parser/model" + "go.uber.org/zap" +) + +// BackendContext store a backend info for add index reorg task. +type BackendContext struct { + Key string // Currently, backend key used ddl job id string + Backend *backend.Backend + Ctx context.Context + cfg *config.Config + EngineCache map[string]*engineInfo + sysVars map[string]string + enabled bool + needRestore bool +} + +func newBackendContext(ctx context.Context, key string, be *backend.Backend, cfg *config.Config, vars map[string]string) *BackendContext { + return &BackendContext{ + Key: key, + Backend: be, + Ctx: ctx, + cfg: cfg, + EngineCache: make(map[string]*engineInfo, 10), + sysVars: vars, + } +} + +func generateLightningConfig(ctx context.Context, unique bool, bcKey string) (*config.Config, error) { + cfg := config.NewConfig() + cfg.TikvImporter.Backend = config.BackendLocal + // Each backend will build an single dir in linghtning dir. + cfg.TikvImporter.SortedKVDir = filepath.Join(GlobalEnv.SortPath, bcKey) + // Should not output err, after go through cfg.adjust function. + _, err := cfg.AdjustCommon() + if err != nil { + log.L().Warn(LitWarnConfigError, zap.Error(err)) + return nil, err + } + adjustImportMemory(cfg) + cfg.Checkpoint.Enable = true + if unique { + cfg.TikvImporter.DuplicateResolution = config.DupeResAlgRecord + } else { + cfg.TikvImporter.DuplicateResolution = config.DupeResAlgNone + } + cfg.TiDB.PdAddr = GlobalEnv.PdAddr + cfg.TiDB.Host = "127.0.0.1" + cfg.TiDB.StatusPort = int(GlobalEnv.Status) + // Set TLS related information + cfg.Security.CAPath = tidbconf.GetGlobalConfig().Security.ClusterSSLCA + cfg.Security.CertPath = tidbconf.GetGlobalConfig().Security.ClusterSSLCert + cfg.Security.KeyPath = tidbconf.GetGlobalConfig().Security.ClusterSSLKey + + return cfg, err +} + +func createLocalBackend(ctx context.Context, cfg *config.Config, glue glue.Glue) (backend.Backend, error) { + tls, err := cfg.ToTLS() + if err != nil { + log.L().Error(LitErrCreateBackendFail, zap.Error(err)) + return backend.Backend{}, err + } + + return local.NewLocalBackend(ctx, tls, cfg, glue, int(GlobalEnv.limit), nil) +} + +// CloseBackend close one backend for one add index task. +func CloseBackend(bcKey string) { + log.L().Info(LitInfoCloseBackend, zap.String("backend key", bcKey)) + GlobalEnv.LitMemRoot.DeleteBackendContext(bcKey) +} + +// GenBackendContextKey generate a backend key from job id for a DDL job. +func GenBackendContextKey(jobID int64) string { + return strconv.FormatInt(jobID, 10) +} + +// Adjust lightning memory parameters according memory root's max limitation +func adjustImportMemory(cfg *config.Config) { + var scale int64 + defaultMemSize := int64(cfg.TikvImporter.LocalWriterMemCacheSize) * int64(cfg.TikvImporter.RangeConcurrency) + defaultMemSize += 4 * int64(cfg.TikvImporter.EngineMemCacheSize) + log.L().Info(LitInfoInitMemSetting, + zap.String("LocalWriterMemCacheSize:", strconv.FormatInt(int64(cfg.TikvImporter.LocalWriterMemCacheSize), 10)), + zap.String("EngineMemCacheSize:", strconv.FormatInt(int64(cfg.TikvImporter.LocalWriterMemCacheSize), 10)), + zap.String("rangecounrrency:", strconv.Itoa(cfg.TikvImporter.RangeConcurrency))) + + if defaultMemSize > GlobalEnv.LitMemRoot.maxLimit { + scale = defaultMemSize / GlobalEnv.LitMemRoot.maxLimit + } + + // Scale equal to 1 means there is no need to adjust memory settings for lightning. + // 0 means defaultMemSize is less than memory maxLimit for Lightning. + if scale == 1 || scale == 0 { + return + } + + cfg.TikvImporter.LocalWriterMemCacheSize /= config.ByteSize(scale) + cfg.TikvImporter.EngineMemCacheSize /= config.ByteSize(scale) + // ToDo adjust rangecourrency nubmer to control total concurrency in future. + log.L().Info(LitInfoChgMemSetting, + zap.String("LocalWriterMemCacheSize:", strconv.FormatInt(int64(cfg.TikvImporter.LocalWriterMemCacheSize), 10)), + zap.String("EngineMemCacheSize:", strconv.FormatInt(int64(cfg.TikvImporter.LocalWriterMemCacheSize), 10)), + zap.String("rangecounrrency:", strconv.Itoa(cfg.TikvImporter.RangeConcurrency))) +} + +type glueLit struct{} + +// Implement interface OwnsSQLExecutor +func (g glueLit) OwnsSQLExecutor() bool { + return false +} + +// Implement interface GetSQLExecutor +func (g glueLit) GetSQLExecutor() glue.SQLExecutor { + return nil +} + +// Implement interface GetDB +func (g glueLit) GetDB() (*sql.DB, error) { + return nil, nil +} + +// Implement interface GetParser +func (g glueLit) GetParser() *parser.Parser { + return nil +} + +// Implement interface GetTables +func (g glueLit) GetTables(context.Context, string) ([]*model.TableInfo, error) { + return nil, nil +} + +// Implement interface GetSession +func (g glueLit) GetSession(context.Context) (checkpoints.Session, error) { + return nil, nil +} + +// Implement interface OpenCheckpointsDB +func (g glueLit) OpenCheckpointsDB(context.Context, *config.Config) (checkpoints.DB, error) { + return nil, nil +} + +// Record is used to report some information (key, value) to host TiDB, including progress, stage currently +func (g glueLit) Record(string, uint64) { + +} + +// IsEngineLightningBackfill show if lightning backend env is set up +func IsEngineLightningBackfill(id int64) bool { + bcKey := GenBackendContextKey(id) + bc, exist := GlobalEnv.LitMemRoot.getBackendContext(bcKey, false) + if !exist { + return false + } + return bc.enabled +} + +// SetEnable set backend status. +func SetEnable(id int64, value bool) { + bcKey := GenBackendContextKey(id) + bc, exist := GlobalEnv.LitMemRoot.getBackendContext(bcKey, false) + if exist { + bc.enabled = value + } +} + +// NeedRestore shows if engine is created +func NeedRestore(id int64) bool { + bcKey := GenBackendContextKey(id) + bc, exist := GlobalEnv.LitMemRoot.getBackendContext(bcKey, false) + if !exist { + return false + } + return bc.needRestore +} + +// SetNeedRestore set engine status. +func SetNeedRestore(id int64, value bool) { + bcKey := GenBackendContextKey(id) + bc, exist := GlobalEnv.LitMemRoot.getBackendContext(bcKey, false) + if exist { + bc.needRestore = value + } +} diff --git a/ddl/lightning/backend_test.go b/ddl/lightning/backend_test.go new file mode 100644 index 0000000000000..f66f4c65036ac --- /dev/null +++ b/ddl/lightning/backend_test.go @@ -0,0 +1,46 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lightning + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestAdjustMemory(t *testing.T) { + type TestCase struct { + name string + quota int64 + lsize int64 + ensize int64 + } + tests := []TestCase{ + {"Mem1", 4 * _kb, 256 * _kb, 1 * _mb}, + {"Mem2", 8 * _mb, 256 * _kb, 1 * _mb}, + {"Mem3", 256 * _mb, 8 * _mb, 32 * _mb}, + {"Mem4", 1 * _gb, 32 * _mb, 128 * _mb}, + {"Mem5", 4 * _gb, 128 * _mb, 512 * _mb}, + } + InitGolbalLightningBackendEnv() + for _, test := range tests { + GlobalEnv.LitMemRoot.Reset(test.quota) + cfg, err := generateLightningConfig(context.TODO(), false, "bckey") + require.NoError(t, err) + require.Equal(t, test.lsize, int64(cfg.TikvImporter.LocalWriterMemCacheSize)) + require.Equal(t, test.ensize, int64(cfg.TikvImporter.EngineMemCacheSize)) + } +} diff --git a/ddl/lightning/engine.go b/ddl/lightning/engine.go new file mode 100644 index 0000000000000..03e812758ba7d --- /dev/null +++ b/ddl/lightning/engine.go @@ -0,0 +1,265 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lightning + +import ( + "context" + "strconv" + + "github.com/google/uuid" + "github.com/pingcap/tidb/br/pkg/lightning/backend" + "github.com/pingcap/tidb/br/pkg/lightning/backend/kv" + "github.com/pingcap/tidb/br/pkg/lightning/common" + "github.com/pingcap/tidb/br/pkg/lightning/config" + "github.com/pingcap/tidb/br/pkg/lightning/log" + "github.com/pingcap/tidb/parser/model" + "github.com/pingcap/tidb/parser/mysql" + "github.com/pingcap/tidb/table" + "github.com/pkg/errors" + "go.uber.org/zap" +) + +var ( + compactMem int64 = 1 * _gb + compactConcurr int = 4 +) + +// One engine for one index reorg task, each task will create new writer under the +// Opened Engine. Note engineInfo is not thread safe. +type engineInfo struct { + id int32 + key string + + backCtx *BackendContext + openedEngine *backend.OpenedEngine + uuid uuid.UUID + cfg *backend.EngineConfig + tableName string + WriterCount int + writerCache map[string]*backend.LocalEngineWriter +} + +// NewEngineInfo create a new EngineInfo struct. +func NewEngineInfo( + id int32, key string, cfg *backend.EngineConfig, bCtx *BackendContext, + en *backend.OpenedEngine, tblName string, uuid uuid.UUID, wCnt int) *engineInfo { + ei := engineInfo{ + id: id, + key: key, + cfg: cfg, + backCtx: bCtx, + openedEngine: en, + uuid: uuid, + tableName: tblName, + WriterCount: wCnt, + writerCache: make(map[string]*backend.LocalEngineWriter, wCnt), + } + return &ei +} + +// GenEngineInfoKey generate one engine key with jobID and indexID. +func GenEngineInfoKey(jobID int64, indexID int64) string { + return strconv.FormatInt(jobID, 10) + strconv.FormatInt(indexID, 10) +} + +// CreateEngine will create a engine to do backfill task for one add index reorg task. +func CreateEngine( + ctx context.Context, + job *model.Job, + backendKey string, + engineKey string, + indexID int32, + wCnt int) (err error) { + var cfg backend.EngineConfig + cfg.Local = &backend.LocalEngineConfig{ + Compact: true, + CompactThreshold: compactMem, + CompactConcurrency: compactConcurr, + } + // Get a created backend to create engine under it. + bc := GlobalEnv.LitMemRoot.backendCache[backendKey] + be := bc.Backend + + // Opne one engine under an exist backend + en, err := be.OpenEngine(ctx, &cfg, job.TableName, indexID) + if err != nil { + errMsg := LitErrCreateEngineFail + err.Error() + log.L().Error(errMsg) + return errors.New(errMsg) + } + uuid := en.GetEngineUUID() + ei := NewEngineInfo(indexID, engineKey, &cfg, bc, en, job.TableName, uuid, wCnt) + GlobalEnv.LitMemRoot.EngineMgr.StoreEngineInfo(engineKey, ei) + bc.EngineCache[engineKey] = ei + return nil +} + +// FinishIndexOp will finished local index preparation job and ingest index sst file into TiKV. +func FinishIndexOp(ctx context.Context, engineInfoKey string, tbl table.Table, unique bool) (err error) { + var errMsg string + var keyMsg string + ei, exist := GlobalEnv.LitMemRoot.EngineMgr.LoadEngineInfo(engineInfoKey) + if !exist { + return errors.New(LitErrGetEngineFail) + } + defer func() { + GlobalEnv.LitMemRoot.EngineMgr.ReleaseEngine(engineInfoKey) + }() + + keyMsg = "backend key:" + ei.backCtx.Key + "Engine key:" + ei.key + // Close engine and finish local tasks of lightning. + log.L().Info(LitInfoCloseEngine, zap.String("backend key", ei.backCtx.Key), zap.String("Engine key", ei.key)) + indexEngine := ei.openedEngine + closeEngine, err1 := indexEngine.Close(ei.backCtx.Ctx, ei.cfg) + if err1 != nil { + errMsg = LitErrCloseEngineErr + keyMsg + log.L().Error(errMsg) + return errors.New(errMsg) + } + + // Reset disk quota before ingest, if user changed it. + GlobalEnv.checkAndResetQuota() + + // Ingest data to TiKV + log.L().Info(LitInfoStartImport, zap.String("backend key", ei.backCtx.Key), + zap.String("Engine key", ei.key), + zap.String("Split Region Size", strconv.FormatInt(int64(config.SplitRegionSize), 10))) + err = closeEngine.Import(ctx, int64(config.SplitRegionSize), int64(config.SplitRegionKeys)) + if err != nil { + errMsg = LitErrIngestDataErr + keyMsg + log.L().Error(errMsg) + return errors.New(errMsg) + } + + // Clean up the engine local workspace. + err = closeEngine.Cleanup(ctx) + if err != nil { + errMsg = LitErrCloseEngineErr + keyMsg + log.L().Error(errMsg) + return errors.New(errMsg) + } + + // Check Remote duplicate value for index + if unique { + hasDupe, err := ei.backCtx.Backend.CollectRemoteDuplicateRows(ctx, tbl, ei.tableName, &kv.SessionOptions{ + SQLMode: mysql.ModeStrictAllTables, + SysVars: ei.backCtx.sysVars, + }) + if hasDupe { + errMsg = LitErrRemoteDupExistErr + keyMsg + log.L().Error(errMsg) + return errors.New(errMsg) + } else if err != nil { + errMsg = LitErrRemoteDupCheckrr + keyMsg + log.L().Error(errMsg) + return errors.New(errMsg) + } + } + return nil +} + +// FlushEngine flush an lightning engine memory data into local disk. +func FlushEngine(engineKey string, ei *engineInfo) error { + err := ei.openedEngine.Flush(ei.backCtx.Ctx) + if err != nil { + log.L().Error(LitErrFlushEngineErr, zap.String("Engine key:", engineKey)) + return err + } + return nil +} + +// UnsafeImportEngineData check if disk consumption is over disk quota, if yes then ingest temp file into TiKV +func UnsafeImportEngineData(jobID int64, indexID int64) error { + engineKey := GenEngineInfoKey(jobID, indexID) + ei, exist := GlobalEnv.LitMemRoot.EngineMgr.LoadEngineInfo(engineKey) + if !exist { + log.L().Error(LitErrGetEngineFail, zap.String("Engine key:", engineKey)) + return errors.New(LitErrGetEngineFail) + } + + totalStorageUsed, totalStorageAvail := GlobalEnv.LitMemRoot.DiskStat() + GlobalEnv.checkAndResetQuota() + if GlobalEnv.NeedImportEngineData(totalStorageUsed, totalStorageAvail) { + // ToDo it should be changed according checkpoint solution. + // Flush wirter cached data into local disk for engine first. + err := FlushEngine(engineKey, ei) + if err != nil { + return err + } + log.L().Info(LitInfoUnsafeImport, zap.String("Engine key:", engineKey), zap.String("Current total available disk:", strconv.FormatUint(totalStorageAvail, 10))) + err = ei.backCtx.Backend.UnsafeImportAndReset(ei.backCtx.Ctx, ei.uuid, int64(config.SplitRegionSize)*int64(config.MaxSplitRegionSizeRatio), int64(config.SplitRegionKeys)) + if err != nil { + log.L().Error(LitErrIngestDataErr, zap.String("Engine key:", engineKey), + zap.String("import partial file failed, current disk storage remains", strconv.FormatUint(totalStorageAvail, 10))) + return err + } + } + return nil +} + +// WorkerContext used keep one lightning local writer for one backfill worker. +type WorkerContext struct { + eInfo *engineInfo + lWrite *backend.LocalEngineWriter +} + +// InitWorkerContext will get worker local writer from engine info writer cache first, if exist. +// If local wirter not exist, then create new one and store it into engine info writer cache. +// note: operate ei.writeCache map is not thread safe please make sure there is sync mechaism to +// make sure the safe. +func (wCtx *WorkerContext) InitWorkerContext(engineKey string, workerid int) (err error) { + wCtxKey := engineKey + strconv.Itoa(workerid) + ei, exist := GlobalEnv.LitMemRoot.EngineMgr.enginePool[engineKey] + if !exist { + return errors.New(LitErrGetEngineFail) + } + wCtx.eInfo = ei + + // Fisrt get local writer from engine cache. + wCtx.lWrite, exist = ei.writerCache[wCtxKey] + // If not exist then build one + if !exist { + wCtx.lWrite, err = ei.openedEngine.LocalWriter(ei.backCtx.Ctx, &backend.LocalWriterConfig{}) + if err != nil { + return err + } + // Cache the lwriter, here we do not lock, because this is done under mem root alloc + // process it own the lock already while alloc object. + ei.writerCache[wCtxKey] = wCtx.lWrite + } + return nil +} + +// WriteRow Write one row into local writer buffer. +func (wCtx *WorkerContext) WriteRow(key, idxVal []byte) error { + var kvs []common.KvPair = make([]common.KvPair, 1) + kvs[0].Key = key + kvs[0].Val = idxVal + row := kv.MakeRowsFromKvPairs(kvs) + return wCtx.lWrite.WriteRows(wCtx.eInfo.backCtx.Ctx, nil, row) +} + +// CanRestoreReorgTask only when backend and Engine still be cached, then the task could be restore, +// otherwise return false to let reorg task restart. +func CanRestoreReorgTask(jobID int64, indexID int64) bool { + engineInfoKey := GenEngineInfoKey(jobID, indexID) + bcKey := GenBackendContextKey(jobID) + _, enExist := GlobalEnv.LitMemRoot.EngineMgr.LoadEngineInfo(engineInfoKey) + _, bcExist := GlobalEnv.LitMemRoot.getBackendContext(bcKey, true) + if enExist && bcExist { + return true + } + return false +} diff --git a/ddl/lightning/engine_mgr.go b/ddl/lightning/engine_mgr.go new file mode 100644 index 0000000000000..28cda46ad14db --- /dev/null +++ b/ddl/lightning/engine_mgr.go @@ -0,0 +1,50 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lightning + +import ( + "github.com/pingcap/tidb/br/pkg/lightning/log" + "go.uber.org/zap" +) + +// EngineManager is a manager of engine. +type EngineManager struct { + enginePool map[string]*engineInfo +} + +func (em *EngineManager) init() { + em.enginePool = make(map[string]*engineInfo, 10) +} + +// StoreEngineInfo store one engineInfo into Manager. +func (em *EngineManager) StoreEngineInfo(key string, ei *engineInfo) { + em.enginePool[key] = ei +} + +// LoadEngineInfo load one engineInfo from Manager. +func (em *EngineManager) LoadEngineInfo(key string) (*engineInfo, bool) { + ei, exist := em.enginePool[key] + if !exist { + log.L().Error(LitErrGetEngineFail, zap.String("Engine_Manager:", "Not found")) + return nil, exist + } + return ei, exist +} + +// ReleaseEngine delete an engineInfo from Mangager. +func (em *EngineManager) ReleaseEngine(key string) { + log.L().Info(LitInfoEngineDelete, zap.String("Engine info key:", key)) + delete(em.enginePool, key) +} diff --git a/ddl/lightning/env.go b/ddl/lightning/env.go new file mode 100644 index 0000000000000..b14d4aa533b66 --- /dev/null +++ b/ddl/lightning/env.go @@ -0,0 +1,240 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lightning + +import ( + "errors" + "os" + "path/filepath" + "strconv" + "syscall" + + lcom "github.com/pingcap/tidb/br/pkg/lightning/common" + "github.com/pingcap/tidb/br/pkg/lightning/log" + "github.com/pingcap/tidb/config" + "github.com/pingcap/tidb/sessionctx/variable" + "github.com/pingcap/tidb/util/logutil" + "go.uber.org/zap" +) + +const ( + _kb = 1024 + _mb = 1024 * _kb + _gb = 1024 * _mb + _tb = 1024 * _gb + _pb = 1024 * _tb + flushSize = 8 * _mb + importThreadhold float32 = 0.15 +) + +// ClusterInfo store cluster info struct +type ClusterInfo struct { + PdAddr string + // TidbHost string - 127.0.0.1 + Port uint + Status uint +} + +// Env store lightning global environment. +type Env struct { + limit int64 + ClusterInfo + SortPath string + LitMemRoot MemoryRoot + diskQuota int64 + IsInited bool +} + +var ( + // GlobalEnv global lightning environment var. + GlobalEnv Env + maxMemLimit uint64 = 1 * _gb +) + +func init() { + GlobalEnv.limit = 1024 // Init a default value 1024 for limit. + var rLimit syscall.Rlimit + err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rLimit) + if err != nil { + logutil.BgLogger().Warn(LitErrGetSysLimitErr, zap.String("OS error:", err.Error()), zap.String("Default: ", "1024.")) + } else { + GlobalEnv.limit = int64(rLimit.Cur) + } + GlobalEnv.IsInited = false + GlobalEnv.diskQuota = variable.DiskQuota.Load() + +} + +// InitGolbalLightningBackendEnv initialize Lightning execution environment. +func InitGolbalLightningBackendEnv() { + var ( + bufferSize uint64 + err error + diskQuota int64 + ) + log.SetAppLogger(logutil.BgLogger()) + GlobalEnv.IsInited = false + + cfg := config.GetGlobalConfig() + GlobalEnv.Port = cfg.Port + GlobalEnv.Status = cfg.Status.StatusPort + GlobalEnv.PdAddr = cfg.Path + + // Set Memory usage limitation to 1 GB + sbz := variable.GetSysVar("sort_buffer_size") + bufferSize, err = strconv.ParseUint(sbz.Value, 10, 64) + // If get bufferSize err, then maxMemLimtation is 128 MB + // Otherwise, the ddl maxMemLimitation is 1 GB + if err == nil { + maxMemLimit = bufferSize * 4 * _kb + log.L().Info(LitInfoSetMemLimit, + zap.String("Memory limitation set to:", strconv.FormatUint(maxMemLimit, 10))) + } else { + log.L().Info(LitWarnGenMemLimit, + zap.Error(err), + zap.String("will use default memory limitation:", strconv.FormatUint(maxMemLimit, 10))) + } + GlobalEnv.LitMemRoot.init(int64(maxMemLimit)) + // If Generated sortPath failed, lightning will initial failed. + // also if the disk quota is not a proper value + GlobalEnv.SortPath, err = genLightningDataDir(cfg.LightningSortPath, cfg.Port) + if err != nil { + log.L().Warn(LitWarnEnvInitFail, + zap.String("Sort Path Error:", err.Error()), + zap.String("Lightning is initialized:", strconv.FormatBool(GlobalEnv.IsInited))) + return + } + + diskQuota, err = GlobalEnv.parseDiskQuota(variable.DiskQuota.Load()) + if err != nil { + log.L().Warn(LitWarnEnvInitFail, + zap.String("Sort Path disk quota:", err.Error()), + zap.String("Lightning is initialized:", strconv.FormatBool(GlobalEnv.IsInited)), + zap.String("Return disk quota:", strconv.FormatInt(diskQuota, 10))) + return + } + + GlobalEnv.IsInited = true + log.L().Info(LitInfoEnvInitSucc, + zap.String("Current memory usage:", strconv.FormatInt(GlobalEnv.LitMemRoot.currUsage, 10)), + zap.String("Memory limitation set to:", strconv.FormatUint(maxMemLimit, 10)), + zap.String("Sort Path disk quota:", strconv.FormatInt(GlobalEnv.diskQuota, 10)), + zap.String("Max open file number:", strconv.FormatInt(GlobalEnv.limit, 10)), + zap.String("Lightning is initialized:", strconv.FormatBool(GlobalEnv.IsInited))) +} + +// parseDiskQuota init dist quota for lightning execution environment. it will +// return 0 on err occurs, the quota value when there is no err. +func (l *Env) parseDiskQuota(val int64) (int64, error) { + sz, err := lcom.GetStorageSize(l.SortPath) + if err != nil { + log.L().Error(LitErrGetStorageQuota, + zap.String("Os error:", err.Error()), + zap.String("default disk quota", strconv.FormatInt(l.diskQuota, 10))) + return 0, err + } + + // If the disk quato is less than 100 GB, then disable lightning + if sz.Available < uint64(GlobalEnv.diskQuota) { + log.L().Error(LitErrDiskQuotaLess, + zap.String("disk quota", strconv.FormatInt(int64(sz.Available), 10))) + return 0, errors.New(LitErrDiskQuotaLess) + } + + // The Dist quota should be 100 GB to 1 PB + if val > int64(sz.Available) { + l.diskQuota = int64(sz.Available) + } else { + l.diskQuota = val + } + + return l.diskQuota, nil +} + +// Generate lightning local store dir in TiDB datadir. +// it will append -port to be tmp_ddl surfix. +func genLightningDataDir(sortPath string, port uint) (string, error) { + sortPathSurfix := "/tmp_ddl-" + strconv.Itoa(int(port)) + sortPath = filepath.Join(sortPath, sortPathSurfix) + defaultPath := filepath.Join("/tmp/tidb", sortPathSurfix) + shouldCreate := true + if info, err := os.Stat(sortPath); err != nil { + if !os.IsNotExist(err) { + log.L().Error(LitErrStatDirFail, zap.String("Sort path:", sortPath), + zap.String("Error:", err.Error())) + return defaultPath, err + } + } else if info.IsDir() { + // Currently remove all dir to clean garbage data. + // Todo when do checkpoint should change follow logic. + err := os.RemoveAll(sortPath) + if err != nil { + log.L().Error(LitErrDeleteDirFail, zap.String("Sort path:", sortPath), + zap.String("Error:", err.Error())) + } + } + + if shouldCreate { + err := os.MkdirAll(sortPath, 0o700) + if err != nil { + err := os.MkdirAll(defaultPath, 0o700) + if err != nil { + log.L().Error(LitErrCreateDirFail, zap.String("Sort path:", sortPath), + zap.String("Error:", err.Error())) + return defaultPath, err + } + return defaultPath, nil + } + } + log.L().Info(LitInfoSortDir, zap.String("data path:", sortPath)) + return sortPath, nil +} + +// NeedImportEngineData check whether need import data into TiKV, because disk available space is not enough. +func (l *Env) NeedImportEngineData(usedStorage, availDisk uint64) bool { + // If Lihgting used 85% of diskQuota or there is less than 15% diskQuota left, then should ingest data to TiKV. + if usedStorage >= uint64((1-importThreadhold)*float32(l.diskQuota)) { + log.L().Info(LitInfoDiskMaxLimit, zap.String("Disk used", strconv.FormatUint(usedStorage, 10))) + return true + } + if availDisk <= uint64(importThreadhold*float32(l.diskQuota)) { + log.L().Info(LitWarnDiskShortage, zap.String("Disk available", strconv.FormatUint(availDisk, 10))) + return true + } + return false +} + +// checkAndResetQuota check whether sysvar disk quota is set to a smaller value and adjust according. +func (l *Env) checkAndResetQuota() { + var newQuota int64 = variable.DiskQuota.Load() + if newQuota == l.diskQuota { + return + } + + sz, err := lcom.GetStorageSize(l.SortPath) + // 1, When storage has enough volumn and also there at least 10% available space. + // 2, Set a small quota than before. + if (err != nil && sz.Capacity > uint64(newQuota) && sz.Available > uint64(newQuota/10)) || l.diskQuota >= newQuota { + log.L().Info(LitInfoDiskQuotaChg, zap.String("Sort Path disk quota change from", strconv.FormatInt(GlobalEnv.diskQuota, 10)), + zap.String("To:", strconv.FormatInt(newQuota, 10))) + l.diskQuota = newQuota + } +} + +// SetMinQuota set disk Quota to a low value to let unit test pass. +// Only used for test. +func (l *Env) SetMinQuota() { + l.diskQuota = 50 * _mb +} diff --git a/ddl/lightning/env_test.go b/ddl/lightning/env_test.go new file mode 100644 index 0000000000000..9a46505a6b298 --- /dev/null +++ b/ddl/lightning/env_test.go @@ -0,0 +1,125 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lightning + +import ( + "testing" + + "github.com/pingcap/tidb/sessionctx/variable" + "github.com/stretchr/testify/require" +) + +func TestGenSortPath(t *testing.T) { + type TestCase struct { + name string + inputPath string + outputPath string + } + tests := []TestCase{ + {"path1", "/tmp/", "/tmp/tmp_ddl-4000"}, + {"path2", "/datalit/tidb/data", "/tmp/tidb/tmp_ddl-4000"}, + {"path3", "127.0.0.1", "127.0.0.1/tmp_ddl-4000"}, + {"path4", "~/data1/", "~/data1/tmp_ddl-4000"}, + {"path5", "../data1/", "../data1/tmp_ddl-4000"}, + {"path6", "/datalit/tidb/data/", "/tmp/tidb/tmp_ddl-4000"}, + {"path7", "", "/tmp/tidb/tmp_ddl-4000"}, + {"path8", "/lightning", "/tmp/tidb/tmp_ddl-4000"}, + } + for _, test := range tests { + result, err := genLightningDataDir(test.inputPath, 4000) + if err == nil { + require.Equal(t, test.outputPath, result) + } else { + require.Error(t, err) + } + } +} + +func TestSetDiskQuota(t *testing.T) { + var minQuota int64 = 1 * _gb + type TestCase struct { + name string + sortPath string + inputQuota int64 + outputQuota int64 + } + tests := []TestCase{ + + {"quota1", "/tmp/", 10 * _gb, 100 * _gb}, + {"quota2", "/data/tidb/data", 100 * _gb, 100 * _gb}, + {"quota3", "127.0.0.1", 1000 * _gb, 1000 * _gb}, + {"quota4", "~/data1/", 512 * _gb, 512 * _gb}, + {"quota5", "../data1/", 10000 * _gb, 10000 * _gb}, + {"quota6", "/data/tidb/data/", 100000 * _gb, 100000 * _gb}, + {"quota7", "", 10000 * _gb, 10000 * _gb}, + {"quota8", "/lightning", 10000000 * _gb, 10000 * _gb}, + } + GlobalEnv.diskQuota = minQuota + for _, test := range tests { + result, _ := genLightningDataDir(test.sortPath, 4000) + GlobalEnv.SortPath = result + diskQuota, err := GlobalEnv.parseDiskQuota(test.inputQuota) + if err != nil { + require.Greater(t, minQuota, diskQuota) + } + + require.GreaterOrEqual(t, test.inputQuota, GlobalEnv.diskQuota) + } +} + +func TestAdjustDiskQuota(t *testing.T) { + type TestCase struct { + name string + sortPath string + minQuota int64 + maxQuota int64 + resetVal int64 + outputQuota int64 + } + tests := []TestCase{ + {"quota1", "/tmp/", 1 * _gb, 10 * _gb, 30 * _gb, 10 * _gb}, + {"quota2", "/data/tidb/data", 1 * _gb, 100 * _gb, 50 * _gb, 100 * _gb}, + {"quota3", "127.0.0.1", 10 * _gb, 201 * _gb, 100 * _gb, 201 * _gb}, + {"quota4", "~/data1/", 100 * _gb, 1 * _tb, 2 * _tb, 102 * _gb}, + {"quota5", "../data1/", 100 * _gb, 100 * _tb, 1 * _tb, 103 * _gb}, + {"quota6", "/data/tidb/data/", 100 * _gb, 1 * _pb, 1 * _tb, 104 * _gb}, + {"quota7", "", 100 * _gb, 2 * _pb, 100 * _gb, 205 * _gb}, + {"quota8", "/lightning", 1000 * _tb, 1 * _pb, 100 * _gb, 106 * _gb}, + } + GlobalEnv.SetMinQuota() + for _, test := range tests { + result, _ := genLightningDataDir(test.sortPath, 4000) + GlobalEnv.SortPath = result + // Set GlobalEnv.diskQuota to 1 GB + GlobalEnv.diskQuota = test.minQuota + quota, err := GlobalEnv.parseDiskQuota(test.maxQuota) + // If err means disk available less than test minQuota + if err != nil { + require.Greater(t, test.minQuota, quota) + } + + require.GreaterOrEqual(t, test.maxQuota, GlobalEnv.diskQuota) + + oldDiskQuota := GlobalEnv.diskQuota + variable.DiskQuota.Store(test.resetVal) + GlobalEnv.checkAndResetQuota() + if test.resetVal <= GlobalEnv.diskQuota { + require.Equal(t, test.resetVal, GlobalEnv.diskQuota) + } else { + require.Equal(t, oldDiskQuota, GlobalEnv.diskQuota) + } + + } +} diff --git a/ddl/lightning/message.go b/ddl/lightning/message.go new file mode 100644 index 0000000000000..0915bd7c61a44 --- /dev/null +++ b/ddl/lightning/message.go @@ -0,0 +1,63 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lightning + +// Message const text +const ( + LitErrAllocMemFail string = "Lightning: Allocate memory failed" + LitErrOutMaxMem string = "Lightning: Memory used up for Lightning add index" + LitErrUnknownMemType string = "Lightning: Unknown struct mem required for Lightning add index" + LitErrCreateDirFail string = "Lightning: Create lightning sort path error" + LitErrStatDirFail string = "Lightning: Stat lightning sort path error" + LitErrDeleteDirFail string = "Lightning: Delete lightning sort path error" + LitErrCreateBackendFail string = "Lightning: Build lightning backend failed, will use kernel index reorg method to backfill the index" + LitErrGetBackendFail string = "Lightning: Can not get cached backend" + LitErrCreateEngineFail string = "Lightning: Build lightning engine failed, will use kernel index reorg method to backfill the index" + LitErrCreateContextFail string = "Lightning: Build lightning worker context failed, will use kernel index reorg method to backfill the index" + LitErrGetEngineFail string = "Lightning: Can not get catched engininfo" + LitErrGetStorageQuota string = "Lightning: Get storage quota error" + LitErrGetSysLimitErr string = "Lightning: Get system open file limit error" + LitErrCloseEngineErr string = "Lightning: Close engine error" + LitErrCleanEngineErr string = "Lightning: Clean engine error" + LitErrFlushEngineErr string = "Lightning: Flush engine data err" + LitErrIngestDataErr string = "Lightning: Ingest data into TiKV error" + LitErrRemoteDupCheckrr string = "Lightning: Remote duplicate check error" + LitErrRemoteDupExistErr string = "Lightning: Remote duplicate index key exist" + LitErrDiskQuotaLess string = "Lightning: Specified disk quota is less than 100 GB disable the Lightning" + LitWarnEnvInitFail string = "Lightning: Initialize environment failed" + LitWarnBackendNOTExist string = "Lightning: Backend not exist" + LitWarnConfigError string = "Lightning: Build config for backend failed" + LitWarnGenMemLimit string = "Lightning: Generate memory max limitation" + LitWarnExtentWorker string = "Lightning: Extend worker failed will use worker count number worker to keep doing backfill task" + LitWarnDiskShortage string = "Lightning: Local disk storage shortage" + LitInfoEnvInitSucc string = "Lightning: Init global lightning backend environment finished" + LitInfoSortDir string = "Lightning: The lightning sorted dir" + LitInfoCreateBackend string = "Lightning: Create one backend for an DDL job" + LitInfoCloseBackend string = "Lightning: Close one backend for DDL job" + LitInfoOpenEngine string = "Lightning: Open an engine for index reorg task" + LitInfoCleanUpEngine string = "Lightning: CleanUp one engine for index reorg task" + LitInfoCreateWrite string = "Lightning: Create one local Writer for Index reorg task" + LitInfoCloseEngine string = "Lightning: Flush all writer and get closed engine" + LitInfoDelEngine string = "Lightning: Delete one engine" + LitInfoRemoteDuplCheck string = "Lightning: Start remote duplicate checking" + LitInfoStartImport string = "Lightning: Start to import data" + LitInfoSetMemLimit string = "Lightning: Set max memory limitation" + LitInfoChgMemSetting string = "Lightning: Change memory setting for lightning" + LitInfoInitMemSetting string = "Lightning: Initial memory setting for lightning," + LitInfoEngineDelete string = "Lightning: Delete one engine from engine manager cache" + LitInfoUnsafeImport string = "Lightning: Do a partial import data into TiKV" + LitInfoDiskMaxLimit string = "Lightning: Local disk storage usage arrive up limited" + LitInfoDiskQuotaChg string = "Lightning: Local disk storage quota changed" +) diff --git a/ddl/lightning/res_mgr.go b/ddl/lightning/res_mgr.go new file mode 100644 index 0000000000000..59a5df1cc8e55 --- /dev/null +++ b/ddl/lightning/res_mgr.go @@ -0,0 +1,477 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lightning + +import ( + "context" + "errors" + "strconv" + "sync" + "unsafe" + + "github.com/docker/go-units" + "github.com/pingcap/tidb/br/pkg/lightning/backend" + "github.com/pingcap/tidb/br/pkg/lightning/common" + "github.com/pingcap/tidb/br/pkg/lightning/config" + "github.com/pingcap/tidb/br/pkg/lightning/log" + "github.com/pingcap/tidb/parser/model" + "github.com/pingcap/tidb/parser/mysql" + "github.com/pingcap/tidb/sessionctx/variable" + "go.uber.org/zap" +) + +type defaultType string + +// Default struct need to be count. +const ( + AllocBackendContext defaultType = "AllocBackendContext" + AllocEngineInfo defaultType = "AllocEngineInfo" + AllocWorkerContext defaultType = "AllocWorkerCONTEXT" + + // Used to mark the object size did not stored in map + allocFailed int64 = 0 +) + +// MemoryRoot traces the memory usage of all light DDL environment. +type MemoryRoot struct { + maxLimit int64 + currUsage int64 + engineUsage int64 + writeBuffer int64 + backendCache map[string]*BackendContext + EngineMgr EngineManager + // This map is use to store all object memory allocated size. + structSize map[string]int64 + mLock sync.Mutex +} + +func (m *MemoryRoot) init(maxMemUsage int64) { + // Set lightning memory quota to 2 times flushSize + if maxMemUsage < flushSize { + m.maxLimit = flushSize + } else { + m.maxLimit = maxMemUsage + } + + m.currUsage = 0 + m.engineUsage = 0 + m.writeBuffer = 0 + + m.backendCache = make(map[string]*BackendContext, 10) + m.EngineMgr.init() + m.structSize = make(map[string]int64, 10) + m.initDefaultStruceMemSize() +} + +// init Caculate memory struct size and save it into map. +func (m *MemoryRoot) initDefaultStruceMemSize() { + var ( + bc BackendContext + ei engineInfo + wCtx WorkerContext + ) + + m.structSize[string(AllocBackendContext)] = int64(unsafe.Sizeof(bc)) + m.structSize[string(AllocEngineInfo)] = int64(unsafe.Sizeof(ei)) + m.structSize[string(AllocWorkerContext)] = int64(unsafe.Sizeof(wCtx)) +} + +// Reset memory quota. but not less than flushSize(8 MB) +func (m *MemoryRoot) Reset(maxMemUsage int64) { + m.mLock.Lock() + defer func() { + m.mLock.Unlock() + }() + // Set lightning memory quota to flushSize + if maxMemUsage < flushSize { + m.maxLimit = flushSize + } else { + m.maxLimit = maxMemUsage + } +} + +// checkMemoryUsage check if there is enough memory to allocte struct for lighting execution. +func (m *MemoryRoot) checkMemoryUsage(t defaultType) error { + var requiredMem int64 + switch t { + case AllocBackendContext: + requiredMem = m.structSize[string(AllocBackendContext)] + case AllocEngineInfo: + requiredMem = m.structSize[string(AllocEngineInfo)] + case AllocWorkerContext: + requiredMem = m.structSize[string(AllocWorkerContext)] + default: + return errors.New(LitErrUnknownMemType) + } + + if m.currUsage+requiredMem > m.maxLimit { + return errors.New(LitErrOutMaxMem) + } + return nil +} + +// RegistBackendContext check if exist backend or will create one backend +func (m *MemoryRoot) RegistBackendContext(ctx context.Context, unique bool, key string, sqlMode mysql.SQLMode) error { + var ( + err error + bd backend.Backend + exist bool + cfg *config.Config + ) + m.mLock.Lock() + defer func() { + m.mLock.Unlock() + }() + // Firstly, get backend context from backend cache. + _, exist = m.backendCache[key] + // If bc not exist, build a new backend for reorg task, otherwise reuse exist backend + // to continue the task. + if !exist { + // Firstly, update real time memory usage, check if memory is enough. + m.totalMemoryConsume() + err = m.checkMemoryUsage(AllocBackendContext) + if err != nil { + log.L().Warn(LitErrAllocMemFail, zap.String("backend key", key), + zap.String("Current Memory Usage:", strconv.FormatInt(m.currUsage, 10)), + zap.String("Memory limitation:", strconv.FormatInt(m.maxLimit, 10))) + return err + } + cfg, err = generateLightningConfig(ctx, unique, key) + if err != nil { + log.L().Warn(LitErrAllocMemFail, zap.String("backend key", key), + zap.String("Generate config for lightning error:", err.Error())) + return err + } + glue := glueLit{} + bd, err = createLocalBackend(ctx, cfg, glue) + if err != nil { + log.L().Error(LitErrCreateBackendFail, zap.String("backend key", key), + zap.String("Error", err.Error()), zap.Stack("stack trace")) + return err + } + + // Init important variables + sysVars := obtainImportantVariables() + + m.backendCache[key] = newBackendContext(ctx, key, &bd, cfg, sysVars) + + // Count memory usage. + m.currUsage += m.structSize[string(AllocBackendContext)] + log.L().Info(LitInfoCreateBackend, zap.String("backend key", key), + zap.String("Current Memory Usage:", strconv.FormatInt(m.currUsage, 10)), + zap.String("Memory limitation:", strconv.FormatInt(m.maxLimit, 10)), + zap.String("Unique Index:", strconv.FormatBool(unique))) + } + return err +} + +// DeleteBackendContext uniform entry to close backend and release related memory allocated +func (m *MemoryRoot) DeleteBackendContext(bcKey string) { + // Only acquire/release lock here. + m.mLock.Lock() + defer func() { + delete(m.backendCache, bcKey) + m.mLock.Unlock() + }() + // Close backend logic + bc, exist := m.backendCache[bcKey] + if !exist { + log.L().Error(LitErrGetBackendFail, zap.String("backend key", bcKey)) + return + } + + // Close and delete backend by key + _ = m.deleteBackendEngines(bcKey) + bc.Backend.Close() + + // Reclaim memory. + m.currUsage -= m.structSize[bc.Key] + delete(m.structSize, bcKey) + m.currUsage -= m.structSize[string(AllocBackendContext)] + log.L().Info(LitInfoCloseBackend, zap.String("backend key", bcKey), + zap.String("Current Memory Usage:", strconv.FormatInt(m.currUsage, 10)), + zap.String("Memory limitation:", strconv.FormatInt(m.maxLimit, 10))) +} + +// ClearEngines in exception case, clear intermediate files that lightning engine generated for index. +func (m *MemoryRoot) ClearEngines(jobID int64, indexIDs ...int64) { + for _, indexID := range indexIDs { + eiKey := GenEngineInfoKey(jobID, indexID) + ei, exist := m.EngineMgr.enginePool[eiKey] + if exist { + indexEngine := ei.openedEngine + closedEngine, err := indexEngine.Close(ei.backCtx.Ctx, ei.cfg) + if err != nil { + log.L().Error(LitErrCloseEngineErr, zap.String("Engine key", eiKey)) + } + // Here the local intermediate file will be removed. + err = closedEngine.Cleanup(ei.backCtx.Ctx) + if err != nil { + log.L().Error(LitErrCleanEngineErr, zap.String("Engine key", eiKey)) + } + } + } +} + +// RegistEngineInfo check and allocate one EngineInfo, delete engineInfo are packed into close backend flow +// The worker count means at this time the engine need pre-check memory for workers usage. +func (m *MemoryRoot) RegistEngineInfo(job *model.Job, bcKey string, engineKey string, indexID int32, workerCount int) (int, error) { + var err error = nil + m.mLock.Lock() + defer func() { + m.mLock.Unlock() + }() + bc, exist := m.backendCache[bcKey] + if !exist { + log.L().Warn(LitWarnBackendNOTExist, zap.String("Backend key", bcKey)) + return 0, err + } + + // Caculate lightning concurrecy degree and set memory usage. + // and pre-allocate memory usage for worker + newWorkerCount := m.workerDegree(workerCount, engineKey) + en, exist1 := bc.EngineCache[engineKey] + if !exist1 { + // When return workerCount is 0, means there is no memory available for lightning worker. + if workerCount == int(allocFailed) { + log.L().Warn(LitErrAllocMemFail, zap.String("Backend key", bcKey), + zap.String("Engine key", engineKey), + zap.String("Expected worker count:", strconv.Itoa(workerCount)), + zap.String("Currnt alloc wroker count:", strconv.Itoa(newWorkerCount))) + return 0, errors.New(LitErrCleanEngineErr) + } + // Firstly, update and check the current memory usage + m.totalMemoryConsume() + err = m.checkMemoryUsage(AllocEngineInfo) + if err != nil { + log.L().Warn(LitErrAllocMemFail, zap.String("Backend key", bcKey), + zap.String("Engine key", engineKey), + zap.String("Current Memory Usage:", strconv.FormatInt(m.currUsage, 10)), + zap.String("Memory limitation:", strconv.FormatInt(m.maxLimit, 10))) + return 0, err + } + // Create one slice for one backend on one stmt, current we share one engine + err = CreateEngine(bc.Ctx, job, bcKey, engineKey, indexID, workerCount) + if err != nil { + return 0, errors.New(LitErrCreateEngineFail) + } + + // Count memory usage. + m.currUsage += m.structSize[string(AllocEngineInfo)] + m.engineUsage += m.structSize[string(AllocEngineInfo)] + } else { + // If engine exist, then add newWorkerCount. + en.WriterCount += newWorkerCount + } + log.L().Info(LitInfoOpenEngine, zap.String("backend key", bcKey), + zap.String("Engine key", engineKey), + zap.String("Current Memory Usage:", strconv.FormatInt(m.currUsage, 10)), + zap.String("Memory limitation:", strconv.FormatInt(m.maxLimit, 10)), + zap.String("Expected Worker Count", strconv.Itoa(workerCount)), + zap.String("Allocated worker count", strconv.Itoa(newWorkerCount))) + return newWorkerCount, nil +} + +// RegistWorkerContext create one lightning local writer context for one backfill worker. +// Also it will be clean within close backend process. +func (m *MemoryRoot) RegistWorkerContext(engineInfoKey string, id int) (*WorkerContext, error) { + var ( + err error + wCtx *WorkerContext + memRequire int64 = m.structSize[string(AllocWorkerContext)] + ) + m.mLock.Lock() + defer func() { + m.mLock.Unlock() + }() + // First to check the memory usage + m.totalMemoryConsume() + err = m.checkMemoryUsage(AllocWorkerContext) + if err != nil { + log.L().Error(LitErrAllocMemFail, zap.String("Engine key", engineInfoKey), + zap.String("worer Id:", strconv.Itoa(id)), + zap.String("Memory allocate:", strconv.FormatInt(memRequire, 10)), + zap.String("Current Memory Usage:", strconv.FormatInt(m.currUsage, 10)), + zap.String("Memory limitation:", strconv.FormatInt(m.maxLimit, 10))) + return nil, err + } + + wCtx = &WorkerContext{} + err = wCtx.InitWorkerContext(engineInfoKey, id) + if err != nil { + log.L().Error(LitErrCreateContextFail, zap.String("Engine key", engineInfoKey), + zap.String("worer Id:", strconv.Itoa(id)), + zap.String("Memory allocate:", strconv.FormatInt(memRequire, 10)), + zap.String("Current Memory Usage:", strconv.FormatInt(m.currUsage, 10)), + zap.String("Memory limitation:", strconv.FormatInt(m.maxLimit, 10))) + return nil, err + } + + // Count memory usage. + m.currUsage += memRequire + log.L().Info(LitInfoCreateWrite, zap.String("Engine key", engineInfoKey), + zap.String("worer Id:", strconv.Itoa(id)), + zap.String("Memory allocate:", strconv.FormatInt(memRequire, 10)), + zap.String("Current Memory Usage:", strconv.FormatInt(m.currUsage, 10)), + zap.String("Memory limitation:", strconv.FormatInt(m.maxLimit, 10))) + return wCtx, err +} + +// Uniform entry to release Engine info. +func (m *MemoryRoot) deleteBackendEngines(bcKey string) error { + var ( + err error + count int + ) + bc, exist := m.getBackendContext(bcKey, true) + if !exist { + log.L().Error(LitErrGetBackendFail, zap.String("backend key", bcKey)) + return err + } + count = 0 + // Delete EngienInfo registered in m.engineManager.engineCache + for _, ei := range bc.EngineCache { + eiKey := ei.key + wCnt := ei.WriterCount + m.currUsage -= m.structSize[eiKey] + delete(m.structSize, eiKey) + delete(m.EngineMgr.enginePool, eiKey) + m.currUsage -= m.structSize[string(AllocWorkerContext)] * int64(wCnt) + count++ + log.L().Info(LitInfoCloseEngine, zap.String("backend key", bcKey), + zap.String("engine id", eiKey), + zap.String("Current Memory Usage:", strconv.FormatInt(m.currUsage, 10)), + zap.String("Memory limitation:", strconv.FormatInt(m.maxLimit, 10))) + } + + bc.EngineCache = make(map[string]*engineInfo, 10) + m.currUsage -= m.structSize[string(AllocEngineInfo)] * int64(count) + m.engineUsage -= m.structSize[string(AllocEngineInfo)] * int64(count) + log.L().Info(LitInfoCloseBackend, zap.String("backend key", bcKey), + zap.String("Current Memory Usage:", strconv.FormatInt(m.currUsage, 10)), + zap.String("Memory limitation:", strconv.FormatInt(m.maxLimit, 10))) + return err +} + +func (m *MemoryRoot) getBackendContext(bcKey string, needLog bool) (*BackendContext, bool) { + bc, exist := m.backendCache[bcKey] + if !exist { + if needLog { + log.L().Warn(LitWarnBackendNOTExist, zap.String("backend key:", bcKey)) + } + return nil, false + } + return bc, exist +} + +// totalMemoryConsume caculate current total memory consumption. +func (m *MemoryRoot) totalMemoryConsume() { + var diffSize int64 = 0 + for _, bc := range m.backendCache { + curSize := bc.Backend.TotalMemoryConsume() + bcSize, exist := m.structSize[bc.Key] + if !exist { + diffSize += curSize + m.structSize[bc.Key] = curSize + } else { + diffSize += curSize - bcSize + m.structSize[bc.Key] += curSize - bcSize + } + m.structSize[bc.Key] = curSize + } + m.currUsage += diffSize +} + +// workerDegree adjust worker count according the available memory. +// return 0 means there is no enough memory for one lightning worker. +func (m *MemoryRoot) workerDegree(workerCnt int, engineKey string) int { + var kvp common.KvPair + size := unsafe.Sizeof(kvp) + // If only one worker's memory init requirement still bigger than mem limitation. + if int64(size*units.MiB)+m.currUsage > m.maxLimit { + return int(allocFailed) + } + + for int64(size*units.MiB*uintptr(workerCnt))+m.currUsage > m.maxLimit && workerCnt > 1 { + workerCnt /= 2 + } + + m.currUsage += int64(size * units.MiB * uintptr(workerCnt)) + _, exist := m.structSize[engineKey] + if !exist { + m.structSize[engineKey] = int64(size * units.MiB * uintptr(workerCnt)) + } else { + m.structSize[engineKey] += int64(size * units.MiB * uintptr(workerCnt)) + } + return workerCnt +} + +// DiskStat check total lightning disk usage and storage availale space. +func (m *MemoryRoot) DiskStat() (uint64, uint64) { + var totalDiskUsed int64 + for _, bc := range m.backendCache { + _, _, bcDiskUsed, _ := bc.Backend.CheckDiskQuota(GlobalEnv.diskQuota) + totalDiskUsed += bcDiskUsed + } + sz, err := common.GetStorageSize(GlobalEnv.SortPath) + if err != nil { + log.L().Error(LitErrGetStorageQuota, + zap.String("OS error:", err.Error()), + zap.String("default disk quota", strconv.FormatInt(GlobalEnv.diskQuota, 10))) + return uint64(totalDiskUsed), uint64(GlobalEnv.diskQuota) + } + return uint64(totalDiskUsed), sz.Available +} + +// defaultImportantVariables is used in ObtainImportantVariables to retrieve the system +// variables from downstream which may affect KV encode result. The values record the default +// values if missing. +var defaultImportantVariables = map[string]string{ + "max_allowed_packet": "67108864", + "div_precision_increment": "4", + "time_zone": "SYSTEM", + "lc_time_names": "en_US", + "default_week_format": "0", + "block_encryption_mode": "aes-128-ecb", + "group_concat_max_len": "1024", +} + +// defaultImportVariablesTiDB is used in ObtainImportantVariables to retrieve the system +// variables from downstream in local/importer backend. The values record the default +// values if missing. +var defaultImportVariablesTiDB = map[string]string{ + "tidb_row_format_version": "1", +} + +func obtainImportantVariables() map[string]string { + // convert result into a map. fill in any missing variables with default values. + result := make(map[string]string, len(defaultImportantVariables)+len(defaultImportVariablesTiDB)) + for key, value := range defaultImportantVariables { + result[key] = value + v := variable.GetSysVar(key) + if v.Value != value { + result[key] = value + } + } + + for key, value := range defaultImportVariablesTiDB { + result[key] = value + v := variable.GetSysVar(key) + if v.Value != value { + result[key] = value + } + } + return result +} From 9087af51a0515b6c4d2b66f5177275ef3890de4a Mon Sep 17 00:00:00 2001 From: Benjamin2037 Date: Fri, 8 Jul 2022 10:36:37 +0800 Subject: [PATCH 04/16] New backfill flow implement. --- ddl/backfilling.go | 441 +++++++++++++++++++++++- ddl/column.go | 1 - ddl/ddl_api.go | 2 +- ddl/ddl_worker.go | 20 +- ddl/delete_range.go | 30 +- ddl/foreign_key.go | 2 +- ddl/index.go | 201 ++++++++++- ddl/index_lightning.go | 574 +++++++++++++++++++++++++++++++ ddl/index_lightning_test.go | 97 ++++++ ddl/reorg.go | 202 ++++++++++- infoschema/tables.go | 10 + parser/model/model.go | 21 ++ sessionctx/variable/tidb_vars.go | 4 +- table/index.go | 2 + table/table.go | 3 + table/tables/index.go | 147 +++++++- table/tables/mutation_checker.go | 30 +- table/tables/tables.go | 5 + tablecodec/tablecodec.go | 31 ++ 19 files changed, 1774 insertions(+), 49 deletions(-) create mode 100644 ddl/index_lightning.go create mode 100644 ddl/index_lightning_test.go diff --git a/ddl/backfilling.go b/ddl/backfilling.go index 0b4ff4b6d554e..0f600b44abd44 100644 --- a/ddl/backfilling.go +++ b/ddl/backfilling.go @@ -153,6 +153,8 @@ type backfillWorker struct { table table.Table closed bool priority int + // Mark if it use new backfill flow. + isNewBF bool } func newBackfillWorker(sessCtx sessionctx.Context, id int, t table.PhysicalTable, reorgInfo *reorgInfo) *backfillWorker { @@ -603,6 +605,24 @@ func (w *worker) writePhysicalTableRecord(t table.PhysicalTable, bfWorkerType ba // variable.ddlReorgWorkerCounter can be modified by system variable "tidb_ddl_reorg_worker_cnt". workerCnt := variable.GetDDLReorgWorkerCounter() + // Caculate worker count for lightnint. + // if litWorkerCnt is 0 or err exist, means not good for lightning execution, + // then go back use kernel way to reorg index. + var litWorkerCnt int + if isLightningEnabled(job.ID) && !needRestoreJob(job.ID) { + if !isPiTREnable(w) { + litWorkerCnt, err = prepareLightningEngine(job, indexInfo.ID, int(workerCnt)) + if err == nil && workerCnt >= int32(litWorkerCnt) { + workerCnt = int32(litWorkerCnt) + setNeedRestoreJob(job.ID, true) + } else { + logutil.BgLogger().Error("Lighting Create Engine failed.", zap.Error(err)) + } + } else { + // if enabled PiTR, use txn do backfill with new flow. + logutil.BgLogger().Error("PiTR enabled, disabled lightning backfill.") + } + } backfillWorkers := make([]*backfillWorker, 0, workerCnt) defer func() { closeBackfillWorkers(backfillWorkers) @@ -625,6 +645,17 @@ func (w *worker) writePhysicalTableRecord(t table.PhysicalTable, bfWorkerType ba if len(kvRanges) < int(workerCnt) { workerCnt = int32(len(kvRanges)) } + + if isLightningEnabled(job.ID) && needRestoreJob(job.ID) && workerCnt > int32(litWorkerCnt) { + count, err := prepareLightningEngine(job, indexInfo.ID, int(workerCnt-int32(litWorkerCnt))) + if err != nil { + errMsg := "Lightning engine lost, maybe cause data consistent problem, rollback job." + logutil.BgLogger().Error(errMsg, zap.String("Job ID:", strconv.FormatInt(job.ID, 10))) + return errors.New(errMsg) + } + workerCnt = int32(litWorkerCnt + count) + } + // Enlarge the worker size. for i := len(backfillWorkers); i < int(workerCnt); i++ { sessCtx := newContext(reorgInfo.d.store) @@ -648,10 +679,25 @@ func (w *worker) writePhysicalTableRecord(t table.PhysicalTable, bfWorkerType ba switch bfWorkerType { case typeAddIndexWorker: - idxWorker := newAddIndexWorker(sessCtx, w, i, t, indexInfo, decodeColMap, reorgInfo, jc) - idxWorker.priority = job.Priority - backfillWorkers = append(backfillWorkers, idxWorker.backfillWorker) - go idxWorker.backfillWorker.run(reorgInfo.d, idxWorker, job) + // Firstly, check and try lightning path + if isLightningEnabled(job.ID) && needRestoreJob(job.ID) { + idxWorker, err := newAddIndexWorkerLit(sessCtx, w, i, t, indexInfo, decodeColMap, reorgInfo, jc, job.ID) + if err == nil { + idxWorker.priority = job.Priority + backfillWorkers = append(backfillWorkers, idxWorker.backfillWorker) + go idxWorker.backfillWorker.run(reorgInfo.d, idxWorker, job) + } + } else { + var newBackFlow bool = false + if isLightningEnabled(job.ID) && !needRestoreJob(job.ID) { + newBackFlow = true + } + idxWorker := newAddIndexWorker(sessCtx, w, i, t, indexInfo, decodeColMap, reorgInfo, jc, newBackFlow) + idxWorker.priority = job.Priority + backfillWorkers = append(backfillWorkers, idxWorker.backfillWorker) + idxWorker.isNewBF = newBackFlow + go idxWorker.backfillWorker.run(reorgInfo.d, idxWorker, job) + } case typeUpdateColumnWorker: // Setting InCreateOrAlterStmt tells the difference between SELECT casting and ALTER COLUMN casting. sessCtx.GetSessionVars().StmtCtx.InCreateOrAlterStmt = true @@ -699,6 +745,14 @@ func (w *worker) writePhysicalTableRecord(t table.PhysicalTable, bfWorkerType ba zap.Int("regionCnt", len(kvRanges)), zap.String("startHandle", tryDecodeToHandleString(startKey)), zap.String("endHandle", tryDecodeToHandleString(endKey))) + + // Disk quota checking and import data into TiKV if needed. + // Do lightning flush data to make checkpoint. + if isLightningEnabled(job.ID) && needRestoreJob(job.ID) { + if importPartialDataToTiKV(job.ID, indexInfo.ID) != nil { + return errors.Trace(err) + } + } remains, err := w.sendRangeTaskToWorkers(t, backfillWorkers, reorgInfo, &totalAddedCount, kvRanges) if err != nil { return errors.Trace(err) @@ -715,6 +769,48 @@ func (w *worker) writePhysicalTableRecord(t table.PhysicalTable, bfWorkerType ba // recordIterFunc is used for low-level record iteration. type recordIterFunc func(h kv.Handle, rowKey kv.Key, rawRecord []byte) (more bool, err error) +// indexIterFunc is used for low-level record iteration. +type indexIterFunc func(rowKey kv.Key, rawRecord []byte) (more bool, err error) + +func iterateSnapshotIndexes(ctx *JobContext, store kv.Storage, priority int, t table.Table, version uint64, + startKey kv.Key, endKey kv.Key, fn indexIterFunc) error { + ver := kv.Version{Ver: version} + snap := store.GetSnapshot(ver) + snap.SetOption(kv.Priority, priority) + snap.SetOption(kv.RequestSourceInternal, true) + snap.SetOption(kv.RequestSourceType, ctx.ddlJobSourceType()) + if tagger := ctx.getResourceGroupTaggerForTopSQL(); tagger != nil { + snap.SetOption(kv.ResourceGroupTagger, tagger) + } + + it, err := snap.Iter(startKey, endKey) + if err != nil { + return errors.Trace(err) + } + defer it.Close() + + for it.Valid() { + if !it.Key().HasPrefix(t.IndexPrefix()) { + break + } + + more, err := fn(it.Key(), it.Value()) + if !more || err != nil { + return errors.Trace(err) + } + + err = kv.NextUntil(it, util.RowKeyPrefixFilter(it.Key())) + if err != nil { + if kv.ErrNotExist.Equal(err) { + break + } + return errors.Trace(err) + } + } + + return nil +} + func iterateSnapshotRows(ctx *JobContext, store kv.Storage, priority int, t table.Table, version uint64, startKey kv.Key, endKey kv.Key, fn recordIterFunc) error { var firstKey kv.Key @@ -798,3 +894,340 @@ func getRangeEndKey(ctx *JobContext, store kv.Storage, priority int, t table.Tab return it.Key(), nil } + +func getIndexRangeEndKey(ctx *JobContext, store kv.Storage, priority int, t table.Table, startKey, endKey kv.Key) (kv.Key, error) { + snap := store.GetSnapshot(kv.MaxVersion) + snap.SetOption(kv.Priority, priority) + if tagger := ctx.getResourceGroupTaggerForTopSQL(); tagger != nil { + snap.SetOption(kv.ResourceGroupTagger, tagger) + } + snap.SetOption(kv.RequestSourceInternal, true) + snap.SetOption(kv.RequestSourceType, ctx.ddlJobSourceType()) + it, err := snap.IterReverse(endKey.Next()) + if err != nil { + return nil, errors.Trace(err) + } + defer it.Close() + + if !it.Valid() || !it.Key().HasPrefix(t.IndexPrefix()) { + return startKey, nil + } + if it.Key().Cmp(startKey) < 0 { + return startKey, nil + } + + return it.Key(), nil +} + +func (w *worker) writeTempIndexRecord(t table.PhysicalTable, bfWorkerType backfillWorkerType, indexInfo *model.IndexInfo, oldColInfo, colInfo *model.ColumnInfo, reorgInfo *reorgInfo) error { + job := reorgInfo.Job + totalAddedCount := job.GetRowCount() + + startKey, endKey := reorgInfo.StartKey, reorgInfo.EndKey + + if err := w.isReorgRunnable(reorgInfo.Job); err != nil { + return errors.Trace(err) + } + if startKey == nil && endKey == nil { + return nil + } + + failpoint.Inject("MockCaseWhenParseFailure", func(val failpoint.Value) { + if val.(bool) { + failpoint.Return(errors.New("job.ErrCount:" + strconv.Itoa(int(job.ErrorCount)) + ", mock unknown type: ast.whenClause.")) + } + }) + + // variable.ddlReorgWorkerCounter can be modified by system variable "tidb_ddl_reorg_worker_cnt". + workerCnt := variable.GetDDLReorgWorkerCounter() + + mergeWorkers := make([]*backfillWorker, 0, workerCnt) + defer func() { + closeBackfillWorkers(mergeWorkers) + }() + + for { + kvRanges, err := splitTableRanges(t, reorgInfo.d.store, startKey, endKey) + if err != nil { + return errors.Trace(err) + } + + // For dynamic adjust backfill worker number. + if err := loadDDLReorgVars(w); err != nil { + logutil.BgLogger().Error("[ddl] load DDL reorganization variable failed", zap.Error(err)) + } + workerCnt = variable.GetDDLReorgWorkerCounter() + // If only have 1 range, we can only start 1 worker. + if len(kvRanges) < int(workerCnt) { + workerCnt = int32(len(kvRanges)) + } + + // Enlarge the worker size. + for i := len(mergeWorkers); i < int(workerCnt); i++ { + sessCtx := newContext(reorgInfo.d.store) + sessCtx.GetSessionVars().StmtCtx.IsDDLJobInQueue = true + // Simulate the sql mode environment in the worker sessionCtx. + sqlMode := reorgInfo.ReorgMeta.SQLMode + sessCtx.GetSessionVars().SQLMode = sqlMode + if err := setSessCtxLocation(sessCtx, reorgInfo); err != nil { + return errors.Trace(err) + } + + sessCtx.GetSessionVars().StmtCtx.BadNullAsWarning = !sqlMode.HasStrictMode() + sessCtx.GetSessionVars().StmtCtx.TruncateAsWarning = !sqlMode.HasStrictMode() + sessCtx.GetSessionVars().StmtCtx.OverflowAsWarning = !sqlMode.HasStrictMode() + sessCtx.GetSessionVars().StmtCtx.AllowInvalidDate = sqlMode.HasAllowInvalidDatesMode() + sessCtx.GetSessionVars().StmtCtx.DividedByZeroAsWarning = !sqlMode.HasStrictMode() + sessCtx.GetSessionVars().StmtCtx.IgnoreZeroInDate = !sqlMode.HasStrictMode() || sqlMode.HasAllowInvalidDatesMode() + sessCtx.GetSessionVars().StmtCtx.NoZeroDate = sqlMode.HasStrictMode() + + idxWorker := newTempIndexWorker(sessCtx, w, i, t, indexInfo, reorgInfo) + idxWorker.priority = job.Priority + mergeWorkers = append(mergeWorkers, idxWorker.backfillWorker) + go idxWorker.backfillWorker.runMerge(reorgInfo.d, idxWorker, job) + } + // Shrink the worker size. + if len(mergeWorkers) > int(workerCnt) { + workers := mergeWorkers[workerCnt:] + mergeWorkers = mergeWorkers[:workerCnt] + closeBackfillWorkers(workers) + } + + failpoint.Inject("checkMergeWorkerNum", func(val failpoint.Value) { + if val.(bool) { + num := int(atomic.LoadInt32(&TestCheckWorkerNumber)) + if num != 0 { + if num > len(kvRanges) { + if len(mergeWorkers) != len(kvRanges) { + failpoint.Return(errors.Errorf("check merge worker num error, len kv ranges is: %v, check merge worker num is: %v, actual record num is: %v", len(kvRanges), num, len(mergeWorkers))) + } + } else if num != len(mergeWorkers) { + failpoint.Return(errors.Errorf("check merge worker num error, len kv ranges is: %v, check merge worker num is: %v, actual record num is: %v", len(kvRanges), num, len(mergeWorkers))) + } + var wg sync.WaitGroup + wg.Add(1) + TestCheckWorkerNumCh <- &wg + wg.Wait() + } + } + }) + + logutil.BgLogger().Info("[ddl] start merge workers to merge delta index changes", + zap.Int("workerCnt", len(mergeWorkers)), + zap.Int("regionCnt", len(kvRanges)), + zap.String("startHandle", tryDecodeToHandleString(startKey)), + zap.String("endHandle", tryDecodeToHandleString(endKey))) + + remains, err := w.sendRangeTaskToMergeWorkers(t, mergeWorkers, reorgInfo, &totalAddedCount, kvRanges, t.GetPhysicalID()) + if err != nil { + return errors.Trace(err) + } + if len(remains) == 0 { + break + } + startKey = remains[0].StartKey + if err != nil { + return errors.Trace(err) + } + } + return nil +} + +func (w *backfillWorker) runMerge(d *ddlCtx, bf backfiller, job *model.Job) { + logutil.BgLogger().Info("[ddl] merge worker start", zap.Int("workerID", w.id)) + defer func() { + w.resultCh <- &backfillResult{err: dbterror.ErrReorgPanic} + }() + defer util.Recover(metrics.LabelDDL, "backfillWorker.run", nil, false) + for { + task, more := <-w.taskCh + if !more { + break + } + d.setDDLLabelForTopSQL(job) + + logutil.BgLogger().Debug("[ddl] merge worker got task", zap.Int("workerID", w.id), zap.String("task", task.String())) + failpoint.Inject("mockMergeRunErr", func() { + if w.id == 0 { + result := &backfillResult{addedCount: 0, nextKey: nil, err: errors.Errorf("mock backfill error")} + w.resultCh <- result + failpoint.Continue() + } + }) + + failpoint.Inject("mockHighLoadForMergeIndex", func() { + sqlPrefixes := []string{"alter"} + topsql.MockHighCPULoad(job.Query, sqlPrefixes, 5) + }) + + failpoint.Inject("mockMergeSlow", func() { + time.Sleep(30 * time.Millisecond) + }) + + // Dynamic change batch size. + w.batchCnt = int(variable.GetDDLReorgBatchSize()) + result := w.handleMergeTask(d, task, bf) + w.resultCh <- result + } + logutil.BgLogger().Info("[ddl] merge worker exit", zap.Int("workerID", w.id)) +} + +// handleMergeTask backfills range [task.startHandle, task.endHandle) handle's index to table. +func (w *backfillWorker) handleMergeTask(d *ddlCtx, task *reorgBackfillTask, bf backfiller) *backfillResult { + handleRange := *task + result := &backfillResult{ + err: nil, + addedCount: 0, + nextKey: handleRange.startKey, + } + lastLogCount := 0 + lastLogTime := time.Now() + startTime := lastLogTime + rc := d.getReorgCtx(w.reorgInfo.Job) + + for { + // Give job chance to be canceled, if we not check it here, + // if there is panic in bf.BackfillDataInTxn we will never cancel the job. + // Because reorgRecordTask may run a long time, + // we should check whether this ddl job is still runnable. + err := d.isReorgRunnable(w.reorgInfo.Job) + if err != nil { + result.err = err + return result + } + + taskCtx, err := bf.BackfillDataInTxn(handleRange) + if err != nil { + result.err = err + return result + } + + mergeBackfillCtxToResult(&taskCtx, result) + + // Although `handleRange` is for data in one region, but back fill worker still split it into many + // small reorg batch size slices and reorg them in many different kv txn. + // If a task failed, it may contained some committed small kv txn which has already finished the + // small range reorganization. + // In the next round of reorganization, the target handle range may overlap with last committed + // small ranges. This will cause the `redo` action in reorganization. + // So for added count and warnings collection, it is recommended to collect the statistics in every + // successfully committed small ranges rather than fetching it in the total result. + rc.increaseRowCount(int64(taskCtx.addedCount)) + rc.mergeWarnings(taskCtx.warnings, taskCtx.warningsCount) + + if num := result.scanCount - lastLogCount; num >= 30000 { + lastLogCount = result.scanCount + logutil.BgLogger().Info("[ddl] backfill worker back fill index", + zap.Int("workerID", w.id), + zap.Int("addedCount", result.addedCount), + zap.Int("scanCount", result.scanCount), + zap.String("nextHandle", tryDecodeToHandleString(taskCtx.nextKey)), + zap.Float64("speed(rows/s)", float64(num)/time.Since(lastLogTime).Seconds())) + lastLogTime = time.Now() + } + + handleRange.startKey = taskCtx.nextKey + if taskCtx.done { + break + } + } + logutil.BgLogger().Info("[ddl] merge worker finish task", zap.Int("workerID", w.id), + zap.String("task", task.String()), + zap.Int("addedCount", result.addedCount), + zap.Int("scanCount", result.scanCount), + zap.String("nextHandle", tryDecodeToHandleString(result.nextKey)), + zap.String("takeTime", time.Since(startTime).String())) + return result +} + +// sendRangeTaskToWorkers sends tasks to workers, and returns remaining kvRanges that is not handled. +func (w *worker) sendRangeTaskToMergeWorkers(t table.Table, workers []*backfillWorker, reorgInfo *reorgInfo, + totalAddedCount *int64, kvRanges []kv.KeyRange, phyicID int64) ([]kv.KeyRange, error) { + batchTasks := make([]*reorgBackfillTask, 0, len(workers)) + physicalTableID := phyicID + + // Build reorg tasks. + for _, keyRange := range kvRanges { + endKey := keyRange.EndKey + endK, err := getIndexRangeEndKey(reorgInfo.d.jobContext(reorgInfo.Job), workers[0].sessCtx.GetStore(), workers[0].priority, t, keyRange.StartKey, endKey) + if err != nil { + logutil.BgLogger().Info("[ddl] send range task to workers, get reverse key failed", zap.Error(err)) + } else { + logutil.BgLogger().Info("[ddl] send range task to workers, change end key", + zap.String("end key", tryDecodeToHandleString(endKey)), zap.String("current end key", tryDecodeToHandleString(endK))) + endKey = endK + } + + task := &reorgBackfillTask{ + physicalTableID: physicalTableID, + startKey: keyRange.StartKey, + endKey: endKey} + batchTasks = append(batchTasks, task) + + if len(batchTasks) >= len(workers) { + break + } + } + + if len(batchTasks) == 0 { + return nil, nil + } + + // Wait tasks finish. + err := w.handleMergeTasks(reorgInfo, totalAddedCount, workers, batchTasks) + if err != nil { + return nil, errors.Trace(err) + } + + if len(batchTasks) < len(kvRanges) { + // There are kvRanges not handled. + remains := kvRanges[len(batchTasks):] + return remains, nil + } + + return nil, nil +} + +// handleReorgTasks sends tasks to workers, and waits for all the running workers to return results, +// there are taskCnt running workers. +func (w *worker) handleMergeTasks(reorgInfo *reorgInfo, totalAddedCount *int64, workers []*backfillWorker, batchTasks []*reorgBackfillTask) error { + for i, task := range batchTasks { + workers[i].taskCh <- task + } + + startKey := batchTasks[0].startKey + taskCnt := len(batchTasks) + startTime := time.Now() + nextKey, taskAddedCount, err := w.waitTaskResults(workers, taskCnt, totalAddedCount, startKey) + elapsedTime := time.Since(startTime) + if err == nil { + err = w.isReorgRunnable(reorgInfo.Job) + } + + if err != nil { + err := reorgInfo.UpdateReorgMeta(nextKey) + metrics.BatchAddIdxHistogram.WithLabelValues(metrics.LblError).Observe(elapsedTime.Seconds()) + logutil.BgLogger().Warn("[ddl] merge worker handle batch tasks failed", + zap.ByteString("elementType", reorgInfo.currElement.TypeKey), + zap.Int64("elementID", reorgInfo.currElement.ID), + zap.Int64("totalAddedCount", *totalAddedCount), + zap.String("startHandle", tryDecodeToHandleString(startKey)), + zap.String("nextHandle", tryDecodeToHandleString(nextKey)), + zap.Int64("batchAddedCount", taskAddedCount), + zap.String("taskFailedError", err.Error()), + zap.String("takeTime", elapsedTime.String()), + zap.NamedError("updateHandleError", err)) + return errors.Trace(err) + } + // nextHandle will be updated periodically in runReorgJob, so no need to update it here. + w.getReorgCtx(reorgInfo.Job).setNextKey(nextKey) + logutil.BgLogger().Info("[ddl] Merge workers successfully processed batch", + zap.ByteString("elementType", reorgInfo.currElement.TypeKey), + zap.Int64("elementID", reorgInfo.currElement.ID), + zap.Int64("totalAddedCount", *totalAddedCount), + zap.String("startHandle", tryDecodeToHandleString(startKey)), + zap.String("nextHandle", tryDecodeToHandleString(nextKey)), + zap.Int64("batchAddedCount", taskAddedCount), + zap.String("takeTime", elapsedTime.String())) + return nil +} diff --git a/ddl/column.go b/ddl/column.go index 1ac52bc902242..9bf3411b25f2d 100644 --- a/ddl/column.go +++ b/ddl/column.go @@ -561,7 +561,6 @@ func (w *worker) onModifyColumn(d *ddlCtx, t *meta.Meta, job *model.Job) (ver in if err = changingCol.SetOriginDefaultValue(originDefVal); err != nil { return ver, errors.Trace(err) } - initAndAddColumnToTable(tblInfo, changingCol) indexesToChange := findRelatedIndexesToChange(tblInfo, oldCol.Name) for _, info := range indexesToChange { diff --git a/ddl/ddl_api.go b/ddl/ddl_api.go index c0a1a9fb2e48b..77bab3d5e2e88 100644 --- a/ddl/ddl_api.go +++ b/ddl/ddl_api.go @@ -1903,7 +1903,7 @@ func buildTableInfo( // Use btree as default index type. idxInfo.Tp = model.IndexTypeBtree } - idxInfo.ID = allocateIndexID(tbInfo) + idxInfo.ID = allocateindexID(tbInfo) tbInfo.Indices = append(tbInfo.Indices, idxInfo) } diff --git a/ddl/ddl_worker.go b/ddl/ddl_worker.go index 830579fab895f..b852aca8604b7 100644 --- a/ddl/ddl_worker.go +++ b/ddl/ddl_worker.go @@ -452,11 +452,27 @@ func jobNeedGC(job *model.Job) bool { } switch job.Type { case model.ActionAddIndex, model.ActionAddPrimaryKey: + var needGC bool = false if job.State != model.JobStateRollbackDone { - break + // When using lightning backfill, the job.Args length > 0 + var indexID int64 = 0 + var partitionIDs []int64 + err := job.DecodeArgs(&indexID, &partitionIDs) + if err != nil { + logutil.BgLogger().Info("Lightning clean temp index data failed, please clean it manually,", zap.String("Job Args:", job.String()), + zap.String("RawArgs:", string(job.RawArgs))) + } + + if err == nil && indexID != 0 { + needGC = true + } + } else { + needGC = true } // After rolling back an AddIndex operation, we need to use delete-range to delete the half-done index data. - return true + if needGC { + return true + } case model.ActionDropSchema, model.ActionDropTable, model.ActionTruncateTable, model.ActionDropIndex, model.ActionDropPrimaryKey, model.ActionDropTablePartition, model.ActionTruncateTablePartition, model.ActionDropColumn, model.ActionModifyColumn: return true diff --git a/ddl/delete_range.go b/ddl/delete_range.go index 644ef71eaf874..ab1c7a3d41e42 100644 --- a/ddl/delete_range.go +++ b/ddl/delete_range.go @@ -329,6 +329,14 @@ func insertJobIntoDeleteRangeTable(ctx context.Context, sctx sessionctx.Context, if err := job.DecodeArgs(&indexID, &ifExists, &partitionIDs); err != nil { return errors.Trace(err) } + eid := tablecodec.TempIndexPrefix | indexID + newBackfill := false + // If indexID is temp id, means it is go through new backfill + if eid == indexID { + // eid represent origin index id. + eid = tablecodec.IndexIDMask & indexID + newBackfill = true + } if len(partitionIDs) > 0 { for _, pid := range partitionIDs { startKey := tablecodec.EncodeTableIndexPrefix(pid, indexID) @@ -337,12 +345,32 @@ func insertJobIntoDeleteRangeTable(ctx context.Context, sctx sessionctx.Context, if err := doInsert(ctx, s, job.ID, elemID, startKey, endKey, now, fmt.Sprintf("partition table ID is %d", pid)); err != nil { return errors.Trace(err) } + // Clean temp index data to avoid Garbage data that generate from adding index with lightning backfill data + if job.State == model.JobStateRollbackDone && newBackfill { + startKey := tablecodec.EncodeTableIndexPrefix(pid, eid) + endKey := tablecodec.EncodeTableIndexPrefix(pid, eid+1) + elemID := ea.allocForIndexID(pid, eid) + if err := doInsert(ctx, s, job.ID, elemID, startKey, endKey, now, fmt.Sprintf("partition table ID is %d", pid)); err != nil { + return errors.Trace(err) + } + } } } else { startKey := tablecodec.EncodeTableIndexPrefix(tableID, indexID) endKey := tablecodec.EncodeTableIndexPrefix(tableID, indexID+1) elemID := ea.allocForIndexID(tableID, indexID) - return doInsert(ctx, s, job.ID, elemID, startKey, endKey, now, fmt.Sprintf("table ID is %d", tableID)) + if err := doInsert(ctx, s, job.ID, elemID, startKey, endKey, now, fmt.Sprintf("table ID is %d", tableID)); err != nil { + return errors.Trace(err) + } + // Clean temp index data to avoid Garbage data that generate from adding index with lightning backfill data + if job.State == model.JobStateRollbackDone && newBackfill { + startKey := tablecodec.EncodeTableIndexPrefix(tableID, eid) + endKey := tablecodec.EncodeTableIndexPrefix(tableID, eid+1) + elemID := ea.allocForIndexID(tableID, eid) + if err := doInsert(ctx, s, job.ID, elemID, startKey, endKey, now, fmt.Sprintf("table ID is %d", tableID)); err != nil { + return errors.Trace(err) + } + } } case model.ActionDropIndex, model.ActionDropPrimaryKey: tableID := job.TableID diff --git a/ddl/foreign_key.go b/ddl/foreign_key.go index b81e2729af379..77d518104b128 100644 --- a/ddl/foreign_key.go +++ b/ddl/foreign_key.go @@ -35,7 +35,7 @@ func onCreateForeignKey(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64, _ e job.State = model.JobStateCancelled return ver, errors.Trace(err) } - fkInfo.ID = allocateIndexID(tblInfo) + fkInfo.ID = allocateindexID(tblInfo) tblInfo.ForeignKeys = append(tblInfo.ForeignKeys, &fkInfo) originalState := fkInfo.State diff --git a/ddl/index.go b/ddl/index.go index fa9193d0921ec..104050ecd49a0 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -39,6 +39,7 @@ import ( "github.com/pingcap/tidb/tablecodec" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util" + "github.com/pingcap/tidb/util/codec" "github.com/pingcap/tidb/util/dbterror" "github.com/pingcap/tidb/util/logutil" decoder "github.com/pingcap/tidb/util/rowDecoder" @@ -528,7 +529,7 @@ func (w *worker) onCreateIndex(d *ddlCtx, t *meta.Meta, job *model.Job, isPK boo } indexInfo.Unique = unique indexInfo.Global = global - indexInfo.ID = allocateIndexID(tblInfo) + indexInfo.ID = allocateindexID(tblInfo) tblInfo.Indices = append(tblInfo.Indices, indexInfo) if err = checkTooManyIndexes(tblInfo.Indices); err != nil { job.State = model.JobStateCancelled @@ -598,6 +599,16 @@ func (w *worker) onCreateIndex(d *ddlCtx, t *meta.Meta, job *model.Job, isPK boo return ver, err } + indexInfo.State = model.StatePublic + // Set eid to temp index value, if add index follow the new backfill flow. + var eid uint64 = 0 + if indexInfo.SubState != model.StateNone { + // After merge data into TiKV, then the progress set to 100. + metrics.GetBackfillProgressByLabel(metrics.LblAddIndex).Set(100) + eid = codec.EncodeIntToCmpUint(tablecodec.TempIndexPrefix | indexInfo.ID) + } + // Set sub state to stateNone to stop double write + indexInfo.SubState = model.StateNone // Set column index flag. addIndexColumnFlag(tblInfo, indexInfo) if isPK { @@ -612,6 +623,10 @@ func (w *worker) onCreateIndex(d *ddlCtx, t *meta.Meta, job *model.Job, isPK boo } // Finish this job. job.FinishTableJob(model.JobStateDone, model.StatePublic, ver, tblInfo) + // Clean temp index if needed + if eid != 0 { + job.Args = []interface{}{eid, getPartitionIDs(tblInfo)} + } default: err = dbterror.ErrInvalidDDLState.GenWithStackByArgs("index", tblInfo.State) } @@ -632,8 +647,93 @@ func doReorgWorkForCreateIndexMultiSchema(w *worker, d *ddlCtx, t *meta.Meta, jo return true, ver, err } +func goFastDDLBackfill(w *worker, d *ddlCtx, t *meta.Meta, job *model.Job, + tbl table.Table, indexInfo *model.IndexInfo, reorgInfo *reorgInfo, + elements []*meta.Element, rh *reorgHandler) (reorg bool, ver int64, err error) { + var restoreReorg bool = false + // This is used to restore reorg task if it interrupt during backfill state and TiDB owner not change or restart. + if isLightningEnabled(job.ID) && needRestoreJob(job.ID) { + // If reorg task can not be restore with lightning execution, should restart reorg task to keep data consist. + if !canRestoreReorgTask(job, indexInfo.ID) { + reorgInfo, err = getReorgInfo(d.jobContext(job), d, rh, job, tbl, elements) + if err != nil || reorgInfo.first { + return false, ver, errors.Trace(err) + } + } + } else if !isLightningEnabled(job.ID) && !needRestoreJob(job.ID) && indexInfo.SubState == model.StateBackfill { + // Be here, means the DDL Owner changed or restarted, the reorg state is re-entered. + job.SnapshotVer = 0 + restoreReorg = true + reorgInfo, err = getReorgInfo(d.jobContext(job), d, rh, job, tbl, elements) + } + + // Check and set up lightning Backend. + // Whether use lightning add index will depends on + // 1) TiDBFastDDL sysvars is true or false at this time and index's substate equal to stateNone. + // This means it start to build up lightning backfill environment. + // 2) Restore lightning reorg task,here means DDL owner changed or restarted, need rebuild lightning environment. + if !isLightningEnabled(job.ID) { + // If it is a empty table, do not need start lightning backfiller. + if reorgInfo.StartKey == nil && reorgInfo.EndKey == nil { + return false, ver, nil + } + // Check if the reorg task is re-entry task, If TiDB is restarted, then currently + // reorg task should be restart. + if (IsAllowFastDDL() && indexInfo.SubState == model.StateNone) || restoreReorg { + err = prepareBackend(w.ctx, indexInfo.Unique, job, reorgInfo.ReorgMeta.SQLMode) + if err == nil { + setLightningEnabled(job.ID, true) + } + } + } + + // If enter this backfill flow,then need finished it。 + if isLightningEnabled(job.ID) || indexInfo.SubState != model.StateNone { + switch indexInfo.SubState { + case model.StateNone: + logutil.BgLogger().Info("Lightning backfill start state none") + indexInfo.SubState = model.StateBackfillSync + ver, err = updateVersionAndTableInfo(d, t, job, tbl.Meta(), true) + if err != nil { + return false, ver, errors.Trace(err) + } + return false, ver, nil + case model.StateBackfillSync: + logutil.BgLogger().Info("Lightning backfill state backfill Sync") + indexInfo.SubState = model.StateBackfill + ver, err = updateVersionAndTableInfo(d, t, job, tbl.Meta(), true) + if err != nil { + return false, ver, errors.Trace(err) + } + return false, ver, nil + case model.StateBackfill: + logutil.BgLogger().Info("Lightning backfill state backfill") + return true, ver, nil + case model.StateMergeSync: + logutil.BgLogger().Info("Lightning backfill state merge Sync") + indexInfo.SubState = model.StateMerge + ver, err = updateVersionAndTableInfo(d, t, job, tbl.Meta(), true) + if err != nil { + return false, ver, errors.Trace(err) + } + return false, ver, nil + case model.StateMerge: + if err != nil { + logutil.BgLogger().Info("Lightning start merge init merge reorg info err", zap.Error(err)) + return false, ver, errors.Trace(err) + } + logutil.BgLogger().Info("Lightning start merge the increment part of adding index") + return true, ver, nil + default: + return false, 0, errors.New("Lightning go fast path wrong sub states: should not happened") + } + } + return false, ver, nil +} + func doReorgWorkForCreateIndex(w *worker, d *ddlCtx, t *meta.Meta, job *model.Job, tbl table.Table, indexInfo *model.IndexInfo) (done bool, ver int64, err error) { + var doReorg bool elements := []*meta.Element{{ID: indexInfo.ID, TypeKey: meta.IndexElementKey}} rh := newReorgHandler(t) reorgInfo, err := getReorgInfo(d.jobContext(job), d, rh, job, tbl, elements) @@ -643,13 +743,35 @@ func doReorgWorkForCreateIndex(w *worker, d *ddlCtx, t *meta.Meta, job *model.Jo return false, ver, errors.Trace(err) } - err = w.runReorgJob(rh, reorgInfo, tbl.Meta(), d.lease, func() (addIndexErr error) { - defer util.Recover(metrics.LabelDDL, "onCreateIndex", - func() { - addIndexErr = dbterror.ErrCancelledDDLJob.GenWithStack("add table `%v` index `%v` panic", tbl.Meta().Name, indexInfo.Name) - }, false) - return w.addTableIndex(tbl, indexInfo, reorgInfo) - }) + doReorg, ver, err = goFastDDLBackfill(w, d, t, job, tbl, indexInfo, reorgInfo, elements, rh) + if isLightningEnabled(reorgInfo.ID) || indexInfo.SubState != model.StateNone { + if err != nil { + logutil.BgLogger().Error("Lightning: Add index backfill processing:", zap.String("Error:", err.Error())) + return doReorg, ver, err + } + // Only when SubState is in BackFill state, then need start to start new backfill task. + if !doReorg { + return doReorg, ver, err + } + } + + if indexInfo.SubState == model.StateMerge { + err = w.runMergeJob(rh, reorgInfo, tbl.Meta(), d.lease, func() (addIndexErr error) { + defer util.Recover(metrics.LabelDDL, "onMergeIndex", + func() { + addIndexErr = dbterror.ErrCancelledDDLJob.GenWithStack("merge table `%v` index `%v` panic", tbl.Meta().Name, indexInfo.Name) + }, false) + return w.mergeTempIndex(tbl, indexInfo, reorgInfo) + }) + } else { + err = w.runReorgJob(rh, reorgInfo, tbl.Meta(), d.lease, func() (addIndexErr error) { + defer util.Recover(metrics.LabelDDL, "onCreateIndex", + func() { + addIndexErr = dbterror.ErrCancelledDDLJob.GenWithStack("add table `%v` index `%v` panic", tbl.Meta().Name, indexInfo.Name) + }, false) + return w.addTableIndex(tbl, indexInfo, reorgInfo) + }) + } if err != nil { if dbterror.ErrWaitReorgTimeout.Equal(err) { // if timeout, we should return, check for the owner and re-wait job done. @@ -662,9 +784,45 @@ func doReorgWorkForCreateIndex(w *worker, d *ddlCtx, t *meta.Meta, job *model.Jo logutil.BgLogger().Warn("[ddl] run add index job failed, convert job to rollback, RemoveDDLReorgHandle failed", zap.String("job", job.String()), zap.Error(err1)) } } + // Clean job related lightning backend data, will handle both user cancel ddl job and + // others errors that occurs in reorg processing. + // For error that will rollback the add index statement, here only remove locale lightning + // files, other rollback process will follow add index roll back flow. + cleanUpLightningEnv(reorgInfo, true, indexInfo.ID) + return false, ver, errors.Trace(err) } - return true, ver, errors.Trace(err) + // Ingest data to TiKV + err = importIndexDataToStore(w.ctx, reorgInfo, indexInfo.ID, indexInfo.Unique, tbl) + if err != nil { + logutil.BgLogger().Warn("Lightning import error:", zap.Error(err)) + cleanUpLightningEnv(reorgInfo, true, indexInfo.ID) + return false, ver, errors.Trace(err) + } + + done = false + if isLightningEnabled(job.ID) { + indexInfo.SubState = model.StateMergeSync + ver, err = updateVersionAndTableInfo(d, t, job, tbl.Meta(), true) + + if err != nil { + return false, ver, errors.Trace(err) + } + //Init reorg infor for merge task. + job.SnapshotVer = 0 + reorgInfo, err = getMergeReorgInfo(d.jobContext(job), d, rh, job, tbl, elements, indexInfo.ID) + if err != nil { + return false, ver, errors.Trace(err) + } + } else { + // Check if reorg task finished. + if indexInfo.SubState == model.StateNone || indexInfo.SubState == model.StateMerge { + done = true + } + } + // Cleanup lightning environment + cleanUpLightningEnv(reorgInfo, false) + return done, ver, errors.Trace(err) } func onDropIndex(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64, _ error) { @@ -732,7 +890,12 @@ func onDropIndex(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64, _ error) { // Finish this job. if job.IsRollingback() { job.FinishTableJob(model.JobStateRollbackDone, model.StateNone, ver, tblInfo) - job.Args[0] = indexInfo.ID + if indexInfo.SubState == model.StateNone { + job.Args[0] = indexInfo.ID + } else { + // If go through new backfill flow, set temp index id as index id. + job.Args[0] = codec.EncodeIntToCmpUint(tablecodec.TempIndexPrefix | indexInfo.ID) + } // the partition ids were append by convertAddIdxJob2RollbackJob, it is weird, but for the compatibility, // we should keep appending the partitions in the convertAddIdxJob2RollbackJob. } else { @@ -956,8 +1119,8 @@ type addIndexWorker struct { distinctCheckFlags []bool } -func newAddIndexWorker(sessCtx sessionctx.Context, worker *worker, id int, t table.PhysicalTable, indexInfo *model.IndexInfo, decodeColMap map[int64]decoder.Column, reorgInfo *reorgInfo, jc *JobContext) *addIndexWorker { - index := tables.NewIndex(t.GetPhysicalID(), t.Meta(), indexInfo) +func newAddIndexWorker(sessCtx sessionctx.Context, worker *worker, id int, t table.PhysicalTable, indexInfo *model.IndexInfo, decodeColMap map[int64]decoder.Column, reorgInfo *reorgInfo, jc *JobContext, newBF bool) *addIndexWorker { + index := tables.NewIndex(t.GetPhysicalID(), t.Meta(), indexInfo, newBF) rowDecoder := decoder.NewRowDecoder(t, t.WritableCols(), decodeColMap) return &addIndexWorker{ baseIndexWorker: baseIndexWorker{ @@ -1246,11 +1409,13 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC continue } - // We need to add this lock to make sure pessimistic transaction can realize this operation. - // For the normal pessimistic transaction, it's ok. But if async commmit is used, it may lead to inconsistent data and index. - err := txn.LockKeys(context.Background(), new(kv.LockCtx), idxRecord.key) - if err != nil { - return errors.Trace(err) + if !w.isNewBF { + // We need to add this lock to make sure pessimistic transaction can realize this operation. + // For the normal pessimistic transaction, it's ok. But if async commmit is used, it may lead to inconsistent data and index. + err := txn.LockKeys(context.Background(), new(kv.LockCtx), idxRecord.key) + if err != nil { + return errors.Trace(err) + } } // Create the index. @@ -1367,7 +1532,7 @@ func findNextPartitionID(currentPartition int64, defs []model.PartitionDefinitio return 0, errors.Errorf("partition id not found %d", currentPartition) } -func allocateIndexID(tblInfo *model.TableInfo) int64 { +func allocateindexID(tblInfo *model.TableInfo) int64 { tblInfo.MaxIndexID++ return tblInfo.MaxIndexID } diff --git a/ddl/index_lightning.go b/ddl/index_lightning.go new file mode 100644 index 0000000000000..97e93d3a3ae23 --- /dev/null +++ b/ddl/index_lightning.go @@ -0,0 +1,574 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ddl + +import ( + "bytes" + "context" + "strconv" + "time" + + "github.com/pingcap/errors" + "github.com/pingcap/failpoint" + lit "github.com/pingcap/tidb/ddl/lightning" + "github.com/pingcap/tidb/kv" + "github.com/pingcap/tidb/metrics" + "github.com/pingcap/tidb/parser/model" + "github.com/pingcap/tidb/parser/mysql" + "github.com/pingcap/tidb/sessionctx" + "github.com/pingcap/tidb/sessionctx/variable" + trans "github.com/pingcap/tidb/store/driver/txn" + "github.com/pingcap/tidb/table" + "github.com/pingcap/tidb/table/tables" + "github.com/pingcap/tidb/tablecodec" + "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/util/dbterror" + "github.com/pingcap/tidb/util/logutil" + decoder "github.com/pingcap/tidb/util/rowDecoder" + "github.com/pingcap/tidb/util/sqlexec" + "go.uber.org/zap" +) + +const ( + // BackfillProgressPercent set a backfill ratio in whole reorg task. + BackfillProgressPercent float64 = 0.6 +) + +// IsAllowFastDDL check whether Fast DDl is allowed. +func IsAllowFastDDL() bool { + // Only when both TiDBFastDDL is set to on and Lightning env is inited successful, + // the add index could choose lightning path to do backfill procedure. + // ToDo: need check PiTR is off currently. + if variable.FastDDL.Load() && lit.GlobalEnv.IsInited { + return true + } + return false +} + +// Check if PiTR is enable in cluster. +func isPiTREnable(w *worker) bool { + var ( + ctx sessionctx.Context + valStr string = "show config where name = 'log-backup.enable'" + err error + retVal bool = false + ) + ctx, err = w.sessPool.get() + if err != nil { + return true + } + defer w.sessPool.put(ctx) + rows, fields, errSQL := ctx.(sqlexec.RestrictedSQLExecutor).ExecRestrictedSQL(context.Background(), nil, valStr) + if errSQL != nil { + return true + } + if len(rows) == 0 { + return false + } + for _, row := range rows { + d := row.GetDatum(3, &fields[3].Column.FieldType) + value, errField := d.ToString() + if errField != nil { + return true + } + if value == "true" { + retVal = true + break + } + } + return retVal +} + +func isLightningEnabled(id int64) bool { + return lit.IsEngineLightningBackfill(id) +} + +func setLightningEnabled(id int64, value bool) { + lit.SetEnable(id, value) +} + +func needRestoreJob(id int64) bool { + return lit.NeedRestore(id) +} + +func setNeedRestoreJob(id int64, value bool) { + lit.SetNeedRestore(id, value) +} + +func prepareBackend(ctx context.Context, unique bool, job *model.Job, sqlMode mysql.SQLMode) (err error) { + bcKey := lit.GenBackendContextKey(job.ID) + // Create and regist backend of lightning + err = lit.GlobalEnv.LitMemRoot.RegistBackendContext(ctx, unique, bcKey, sqlMode) + if err != nil { + lit.GlobalEnv.LitMemRoot.DeleteBackendContext(bcKey) + return err + } + + return err +} + +func prepareLightningEngine(job *model.Job, indexID int64, workerCnt int) (wCnt int, err error) { + bcKey := lit.GenBackendContextKey(job.ID) + enginKey := lit.GenEngineInfoKey(job.ID, indexID) + wCnt, err = lit.GlobalEnv.LitMemRoot.RegistEngineInfo(job, bcKey, enginKey, int32(indexID), workerCnt) + if err != nil { + lit.GlobalEnv.LitMemRoot.DeleteBackendContext(bcKey) + } + return wCnt, err +} + +// Import local index sst file into TiKV. +func importIndexDataToStore(ctx context.Context, reorg *reorgInfo, indexID int64, unique bool, tbl table.Table) error { + if isLightningEnabled(reorg.ID) && needRestoreJob(reorg.ID) { + engineInfoKey := lit.GenEngineInfoKey(reorg.ID, indexID) + // just log info. + err := lit.FinishIndexOp(ctx, engineInfoKey, tbl, unique) + if err != nil { + err = errors.Trace(err) + return err + } + // After import local data into TiKV, then the progress set to 85. + metrics.GetBackfillProgressByLabel(metrics.LblAddIndex).Set(85) + } + return nil +} + +// Used to clean backend, +func cleanUpLightningEnv(reorg *reorgInfo, isCanceled bool, indexIDs ...int64) { + if isLightningEnabled(reorg.ID) { + bcKey := lit.GenBackendContextKey(reorg.ID) + // If reorg is cancled, need do clean up engine. + if isCanceled { + lit.GlobalEnv.LitMemRoot.ClearEngines(reorg.ID, indexIDs...) + } + lit.GlobalEnv.LitMemRoot.DeleteBackendContext(bcKey) + } +} + +// Disk quota checking and ingest data. +func importPartialDataToTiKV(jobID int64, indexIDs int64) error { + return lit.UnsafeImportEngineData(jobID, indexIDs) +} + +// Check if this reorg is a restore reorg task +// Check if current lightning reorg task can be executed continuely. +// Otherwise, restart the reorg task. +func canRestoreReorgTask(job *model.Job, indexID int64) bool { + // The reorg just start, do nothing + if job.SnapshotVer == 0 { + return false + } + + // Check if backend and engine are cached. + if !lit.CanRestoreReorgTask(job.ID, indexID) { + job.SnapshotVer = 0 + return false + } + return true +} + +// Below is lightning worker implement +type addIndexWorkerLit struct { + addIndexWorker + + // Lightning related variable. + writerContex *lit.WorkerContext +} + +func newAddIndexWorkerLit(sessCtx sessionctx.Context, worker *worker, id int, t table.PhysicalTable, indexInfo *model.IndexInfo, decodeColMap map[int64]decoder.Column, reorgInfo *reorgInfo, jc *JobContext, jobID int64) (*addIndexWorkerLit, error) { + index := tables.NewIndex(t.GetPhysicalID(), t.Meta(), indexInfo) + rowDecoder := decoder.NewRowDecoder(t, t.WritableCols(), decodeColMap) + // ToDo: Bear Currently, all the lightning worker share one openengine. + engineInfoKey := lit.GenEngineInfoKey(jobID, indexInfo.ID) + + lwCtx, err := lit.GlobalEnv.LitMemRoot.RegistWorkerContext(engineInfoKey, id) + if err != nil { + return nil, err + } + // Add build openengine process. + return &addIndexWorkerLit{ + addIndexWorker: addIndexWorker{ + baseIndexWorker: baseIndexWorker{ + backfillWorker: newBackfillWorker(sessCtx, id, t, reorgInfo), + indexes: []table.Index{index}, + rowDecoder: rowDecoder, + defaultVals: make([]types.Datum, len(t.WritableCols())), + rowMap: make(map[int64]types.Datum, len(decodeColMap)), + metricCounter: metrics.BackfillTotalCounter.WithLabelValues("add_idx_rate"), + sqlMode: reorgInfo.ReorgMeta.SQLMode, + jobContext: jc, + }, + index: index, + }, + writerContex: lwCtx, + }, err +} + +// BackfillDataInTxn will backfill table index in a transaction. A lock corresponds to a rowKey if the value of rowKey is changed, +// Note that index columns values may change, and an index is not allowed to be added, so the txn will rollback and retry. +// BackfillDataInTxn will add w.batchCnt indices once, default value of w.batchCnt is 128. +func (w *addIndexWorkerLit) BackfillDataInTxn(handleRange reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { + failpoint.Inject("errorMockPanic", func(val failpoint.Value) { + if val.(bool) { + panic("panic test") + } + }) + fetchTag := "AddIndexLightningFetchdata" + strconv.Itoa(w.id) + writeTag := "AddIndexLightningWritedata" + strconv.Itoa(w.id) + txnTag := "AddIndexLightningBackfillDataInTxn" + strconv.Itoa(w.id) + + oprStartTime := time.Now() + errInTxn = kv.RunInNewTxn(context.Background(), w.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { + taskCtx.addedCount = 0 + taskCtx.scanCount = 0 + txn.SetOption(kv.Priority, w.priority) + if tagger := w.reorgInfo.d.getResourceGroupTaggerForTopSQL(w.reorgInfo.Job); tagger != nil { + txn.SetOption(kv.ResourceGroupTagger, tagger) + } + + idxRecords, nextKey, taskDone, err := w.fetchRowColVals(txn, handleRange) + logSlowOperations(time.Since(oprStartTime), fetchTag, 1000) + if err != nil { + return errors.Trace(err) + } + taskCtx.nextKey = nextKey + taskCtx.done = taskDone + + err = w.batchCheckUniqueKey(txn, idxRecords) + if err != nil { + return errors.Trace(err) + } + + for _, idxRecord := range idxRecords { + taskCtx.scanCount++ + // The index is already exists, we skip it, no needs to backfill it. + // The following update, delete, insert on these rows, TiDB can handle it correctly. + if idxRecord.skip { + continue + } + + // Create the index. + key, idxVal, _, err := w.index.Create4SST(w.sessCtx, txn, idxRecord.vals, idxRecord.handle, idxRecord.rsData, table.WithIgnoreAssertion) + if err != nil { + return errors.Trace(err) + } + // Currently, only use one kVCache, later may use multi kvCache to parallel compute/io performance. + err = w.writerContex.WriteRow(key, idxVal) + if err != nil { + return errors.Trace(err) + } + taskCtx.addedCount++ + } + logSlowOperations(time.Since(oprStartTime), writeTag, 1000) + return nil + }) + logSlowOperations(time.Since(oprStartTime), txnTag, 3000) + return +} + +func (w *backFillIndexWorker) batchCheckTemporaryUniqueKey(txn kv.Transaction, idxRecords []*temporaryIndexRecord) error { + idxInfo := w.index.Meta() + if !idxInfo.Unique { + // non-unique key need not to check, just overwrite it, + // because in most case, backfilling indices is not exists. + return nil + } + + if len(w.idxKeyBufs) < w.batchCnt { + w.idxKeyBufs = make([][]byte, w.batchCnt) + } + w.batchCheckKeys = w.batchCheckKeys[:0] + w.distinctCheckFlags = w.distinctCheckFlags[:0] + + stmtCtx := w.sessCtx.GetSessionVars().StmtCtx + for i, record := range idxRecords { + distinct := false + if !record.delete && tablecodec.IndexKVIsUnique(record.vals) { + distinct = true + } + // save the buffer to reduce memory allocations. + w.idxKeyBufs[i] = record.key + + w.batchCheckKeys = append(w.batchCheckKeys, record.key) + w.distinctCheckFlags = append(w.distinctCheckFlags, distinct) + } + + batchVals, err := txn.BatchGet(context.Background(), w.batchCheckKeys) + if err != nil { + return errors.Trace(err) + } + + // 1. unique-key/primary-key is duplicate and the handle is equal, skip it. + // 2. unique-key/primary-key is duplicate and the handle is not equal, return duplicate error. + // 3. non-unique-key is duplicate, skip it. + for i, key := range w.batchCheckKeys { + if val, found := batchVals[string(key)]; found { + if w.distinctCheckFlags[i] { + if !bytes.Equal(val, idxRecords[i].vals) { + return kv.ErrKeyExists + } + } + idxRecords[i].skip = true + } else if w.distinctCheckFlags[i] { + // The keys in w.batchCheckKeys also maybe duplicate, + // so we need to backfill the not found key into `batchVals` map. + batchVals[string(key)] = idxRecords[i].vals + } + } + // Constrains is already checked. + stmtCtx.BatchCheck = true + return nil +} + +func (w *backFillIndexWorker) batchSkipKey(txn kv.Transaction, store kv.Storage, idxRecords []*temporaryIndexRecord) error { + if len(w.batchCheckTmpKeys) == 0 { + return nil + } + + // Gen a current snapshot to get latest updated. + snapshot := store.GetSnapshot(kv.MaxVersion) + // Get duplicated key from temp index. + batchVals, err := trans.NewBufferBatchGetter(txn.GetMemBuffer(), nil, snapshot).BatchGet(context.Background(), w.batchCheckTmpKeys) + if err != nil { + return errors.Trace(err) + } + + for i, key := range w.batchCheckTmpKeys { + if val, found := batchVals[string(key)]; found { + var keyVer []byte + length := len(val) + keyVer = append(keyVer, val[length-1:]...) + if bytes.Equal(keyVer, []byte("2")) { + pos := w.tmpKeyPos[i] + idxRecords[pos].skip = true + } + } + } + + return nil +} + +// temporaryIndexRecord is the record information of an index. +type temporaryIndexRecord struct { + key []byte + vals []byte + skip bool // skip indicates that the index key is already exists, we should not add it. + delete bool + unique bool + keyVer []byte +} +type backFillIndexWorker struct { + *backfillWorker + + index table.Index + + // The following attributes are used to reduce memory allocation. + idxKeyBufs [][]byte + batchCheckKeys []kv.Key + distinctCheckFlags []bool + tmpIdxRecords []*temporaryIndexRecord + batchCheckTmpKeys []kv.Key + tmpKeyPos []int32 +} + +func newTempIndexWorker(sessCtx sessionctx.Context, worker *worker, id int, t table.PhysicalTable, indexInfo *model.IndexInfo, reorgInfo *reorgInfo) *backFillIndexWorker { + index := tables.NewIndex(t.GetPhysicalID(), t.Meta(), indexInfo) + + // Add build openengine process. + return &backFillIndexWorker{ + backfillWorker: newBackfillWorker(sessCtx, id, t, reorgInfo), + index: index, + } +} + +// BackfillDataInTxn merge temp index data in txn. +func (w *backFillIndexWorker) BackfillDataInTxn(taskRange reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { + logutil.BgLogger().Info("Merge temp index", zap.ByteString("startKey", taskRange.startKey), zap.ByteString("endKey", taskRange.endKey)) + oprStartTime := time.Now() + errInTxn = kv.RunInNewTxn(context.Background(), w.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { + taskCtx.addedCount = 0 + taskCtx.scanCount = 0 + txn.SetOption(kv.Priority, w.priority) + if tagger := w.reorgInfo.d.getResourceGroupTaggerForTopSQL(w.reorgInfo.Job); tagger != nil { + txn.SetOption(kv.ResourceGroupTagger, tagger) + } + + temporaryIndexRecords, nextKey, taskDone, err := w.fetchTempIndexVals(txn, taskRange) + if err != nil { + return errors.Trace(err) + } + taskCtx.nextKey = nextKey + taskCtx.done = taskDone + + err = w.batchCheckTemporaryUniqueKey(txn, temporaryIndexRecords) + if err != nil { + return errors.Trace(err) + } + + // Should be + err = w.batchSkipKey(txn, w.sessCtx.GetStore(), temporaryIndexRecords) + if err != nil { + return errors.Trace(err) + } + + for _, idxRecord := range temporaryIndexRecords { + taskCtx.scanCount++ + // The index is already exists, we skip it, no needs to backfill it. + // The following update, delete, insert on these rows, TiDB can handle it correctly. + if idxRecord.skip { + continue + } + + if !bytes.Equal(idxRecord.keyVer, []byte("1")) { + err = errors.New("merge temp index should not merge version 2 index data") + panic(err) + } + + if idxRecord.delete { + if idxRecord.unique { + err = txn.GetMemBuffer().DeleteWithFlags(idxRecord.key, kv.SetNeedLocked) + } else { + err = txn.GetMemBuffer().Delete(idxRecord.key) + } + logutil.BgLogger().Info("delete", zap.ByteString("key", idxRecord.key)) + } else { + err = txn.GetMemBuffer().Set(idxRecord.key, idxRecord.vals) + } + if err != nil { + return err + } + taskCtx.addedCount++ + } + + return nil + }) + logSlowOperations(time.Since(oprStartTime), "AddIndexMergeDataInTxn", 3000) + return +} + +func (w *backFillIndexWorker) AddMetricInfo(cnt float64) { +} + +// addTableIndex handles the add index reorganization state for a table. +func (w *worker) mergeTempIndex(t table.Table, idx *model.IndexInfo, reorgInfo *reorgInfo) error { + var err error + if tbl, ok := t.(table.PartitionedTable); ok { + var finish bool + for !finish { + p := tbl.GetPartition(reorgInfo.PhysicalTableID) + if p == nil { + return dbterror.ErrCancelledDDLJob.GenWithStack("Can not find partition id %d for table %d", reorgInfo.PhysicalTableID, t.Meta().ID) + } + err = w.addPhysicalTempIndex(p, idx, reorgInfo) + if err != nil { + break + } + finish, err = w.updateReorgInfo(tbl, reorgInfo) + if err != nil { + return errors.Trace(err) + } + } + } else { + err = w.addPhysicalTempIndex(t.(table.PhysicalTable), idx, reorgInfo) + } + return errors.Trace(err) +} + +func (w *worker) addPhysicalTempIndex(t table.PhysicalTable, indexInfo *model.IndexInfo, reorgInfo *reorgInfo) error { + logutil.BgLogger().Info("[ddl] start to merge temp index", zap.String("job", reorgInfo.Job.String()), zap.String("reorgInfo", reorgInfo.String())) + return w.writeTempIndexRecord(t, typeAddIndexWorker, indexInfo, nil, nil, reorgInfo) +} + +func (w *backFillIndexWorker) fetchTempIndexVals(txn kv.Transaction, taskRange reorgBackfillTask) ([]*temporaryIndexRecord, kv.Key, bool, error) { + startTime := time.Now() + w.tmpIdxRecords = w.tmpIdxRecords[:0] + w.batchCheckTmpKeys = w.batchCheckTmpKeys[:0] + w.tmpKeyPos = w.tmpKeyPos[:0] + var pos int32 = 0 + // taskDone means that the reorged handle is out of taskRange.endHandle. + taskDone := false + oprStartTime := startTime + err := iterateSnapshotIndexes(w.reorgInfo.d.jobContext(w.reorgInfo.Job), w.sessCtx.GetStore(), w.priority, w.table, txn.StartTS(), taskRange.startKey, taskRange.endKey, func(indexKey kv.Key, rawValue []byte) (more bool, err error) { + oprEndTime := time.Now() + logSlowOperations(oprEndTime.Sub(oprStartTime), "iterate temporary index in merge process", 0) + oprStartTime = oprEndTime + + taskDone := indexKey.Cmp(taskRange.endKey) > 0 + + if taskDone || len(w.tmpIdxRecords) >= w.batchCnt { + logutil.BgLogger().Info("return false") + return false, nil + } + + isDelete := false + unique := false + skip := false + var keyVer []byte + length := len(rawValue) + keyVer = append(keyVer, rawValue[length-1:]...) + rawValue = rawValue[:length-1] + length-- + if bytes.Equal(keyVer, []byte("2")) { + skip = true + } + if bytes.Equal(rawValue, []byte("delete")) { + isDelete = true + rawValue = rawValue[:length-6] + } else if bytes.Equal(rawValue, []byte("deleteu")) { + isDelete = true + unique = true + rawValue = rawValue[:length-7] + } + var convertedIndexKey []byte + convertedIndexKey = append(convertedIndexKey, indexKey...) + tablecodec.TempIndexKey2IndexKey(w.index.Meta().ID, convertedIndexKey) + idxRecord := &temporaryIndexRecord{key: convertedIndexKey, delete: isDelete, unique: unique, keyVer: keyVer, skip: skip} + if !isDelete { + idxRecord.vals = rawValue + } + w.tmpIdxRecords = append(w.tmpIdxRecords, idxRecord) + + if bytes.Equal(keyVer, []byte("1")) { + // We need to add this lock to make sure pessimistic transaction can realize this operation. + // For the normal pessimistic transaction, it's ok. But if async commmit is used, it may lead to inconsistent data and index. + err = txn.LockKeys(context.Background(), new(kv.LockCtx), idxRecord.key) + if err != nil { + return false, errors.Trace(err) + } + w.batchCheckTmpKeys = append(w.batchCheckTmpKeys, indexKey) + w.tmpKeyPos = append(w.tmpKeyPos, pos) + } + pos++ + return true, nil + }) + + if len(w.tmpIdxRecords) == 0 { + taskDone = true + } + var nextKey kv.Key + if taskDone { + nextKey = taskRange.endKey + } else { + var convertedNextKey []byte + lastPos := len(w.tmpIdxRecords) + convertedNextKey = append(convertedNextKey, w.tmpIdxRecords[lastPos-1].key...) + tablecodec.IndexKey2TempIndexKey(w.index.Meta().ID, convertedNextKey) + nextKey = convertedNextKey + } + + logutil.BgLogger().Debug("[ddl] txn fetches handle info", zap.Uint64("txnStartTS", txn.StartTS()), + zap.String("taskRange", taskRange.String()), zap.Duration("takeTime", time.Since(startTime))) + return w.tmpIdxRecords, nextKey.Next(), taskDone, errors.Trace(err) +} diff --git a/ddl/index_lightning_test.go b/ddl/index_lightning_test.go new file mode 100644 index 0000000000000..9b0e862a88fad --- /dev/null +++ b/ddl/index_lightning_test.go @@ -0,0 +1,97 @@ +// Copyright 2016 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ddl_test + +import ( + "fmt" + "strconv" + "testing" + "time" + + "github.com/pingcap/tidb/ddl" + lit "github.com/pingcap/tidb/ddl/lightning" + "github.com/pingcap/tidb/domain" + "github.com/pingcap/tidb/sessionctx" + "github.com/pingcap/tidb/testkit" + "github.com/stretchr/testify/require" +) + +func testLitAddIndex(tk *testkit.TestKit, t *testing.T, ctx sessionctx.Context, tblID int64, unique bool, indexName string, colName string, dom *domain.Domain) int64 { + un := "" + if unique { + un = "unique" + } + sql := fmt.Sprintf("alter table t add %s index %s(%s)", un, indexName, colName) + tk.MustExec(sql) + + idi, _ := strconv.Atoi(tk.MustQuery("admin show ddl jobs 1;").Rows()[0][0].(string)) + id := int64(idi) + v := getSchemaVer(t, ctx) + require.NoError(t, dom.Reload()) + tblInfo, exist := dom.InfoSchema().TableByID(tblID) + require.True(t, exist) + checkHistoryJobArgs(t, ctx, id, &historyJobArgs{ver: v, tbl: tblInfo.Meta()}) + return id +} + +func TestEnableLightning(t *testing.T) { + lit.GlobalEnv.SetMinQuota() + store, _, clean := testkit.CreateMockStoreAndDomain(t) + defer clean() + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + // Check default value, Current is off + allow := ddl.IsAllowFastDDL() + require.Equal(t, false, allow) + // Set illedge value + err := tk.ExecToErr("set @@global.tidb_fast_ddl = abc") + require.Error(t, err) + allow = ddl.IsAllowFastDDL() + require.Equal(t, false, allow) + + // set to on + tk.MustExec("set @@global.tidb_fast_ddl = on") + allow = ddl.IsAllowFastDDL() + require.Equal(t, true, allow) +} + +func TestAddIndexLit(t *testing.T) { + lit.GlobalEnv.SetMinQuota() + store, dom, clean := testkit.CreateMockStoreAndDomain(t) + defer clean() + ddl.SetWaitTimeWhenErrorOccurred(1 * time.Microsecond) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t (c1 int primary key, c2 int, c3 int)") + tk.MustExec("insert t values (1, 1, 1), (2, 2, 2), (3, 3, 1);") + tk.MustExec("set @@global.tidb_fast_ddl = on") + + var tableID int64 + rs := tk.MustQuery("select TIDB_TABLE_ID from information_schema.tables where table_name='t' and table_schema='test';") + tableIDi, _ := strconv.Atoi(rs.Rows()[0][0].(string)) + tableID = int64(tableIDi) + + // Non-unique secondary index + jobID := testLitAddIndex(tk, t, testNewContext(store), tableID, false, "idx1", "c2", dom) + testCheckJobDone(t, store, jobID, true) + + // Unique secondary index + jobID = testLitAddIndex(tk, t, testNewContext(store), tableID, true, "idx2", "c2", dom) + testCheckJobDone(t, store, jobID, true) + + // Unique duplicate key + err := tk.ExecToErr("alter table t1 add index unique idx3(c3)") + require.Error(t, err) +} diff --git a/ddl/reorg.go b/ddl/reorg.go index f3c87213c684d..b5bbd6d55f511 100644 --- a/ddl/reorg.go +++ b/ddl/reorg.go @@ -253,7 +253,12 @@ func (w *worker) runReorgJob(rh *reorgHandler, reorgInfo *reorgInfo, tblInfo *mo switch reorgInfo.Type { case model.ActionAddIndex, model.ActionAddPrimaryKey: - metrics.GetBackfillProgressByLabel(metrics.LblAddIndex).Set(100) + // For lightning there is a part import should be counted. + if isLightningEnabled(reorgInfo.ID) { + metrics.GetBackfillProgressByLabel(metrics.LblAddIndex).Set(BackfillProgressPercent * 100) + } else { + metrics.GetBackfillProgressByLabel(metrics.LblAddIndex).Set(100) + } case model.ActionModifyColumn: metrics.GetBackfillProgressByLabel(metrics.LblModifyColumn).Set(100) } @@ -321,7 +326,12 @@ func updateBackfillProgress(w *worker, reorgInfo *reorgInfo, tblInfo *model.Tabl } switch reorgInfo.Type { case model.ActionAddIndex, model.ActionAddPrimaryKey: - metrics.GetBackfillProgressByLabel(metrics.LblAddIndex).Set(progress * 100) + // For lightning there is a part import should be counted. + if isLightningEnabled(reorgInfo.ID) { + metrics.GetBackfillProgressByLabel(metrics.LblAddIndex).Set(BackfillProgressPercent * progress * 100) + } else { + metrics.GetBackfillProgressByLabel(metrics.LblAddIndex).Set(progress * 100) + } case model.ActionModifyColumn: metrics.GetBackfillProgressByLabel(metrics.LblModifyColumn).Set(progress * 100) } @@ -392,7 +402,8 @@ func (r *reorgInfo) String() string { "StartHandle:" + tryDecodeToHandleString(r.StartKey) + "," + "EndHandle:" + tryDecodeToHandleString(r.EndKey) + "," + "First:" + strconv.FormatBool(r.first) + "," + - "PhysicalTableID:" + strconv.FormatInt(r.PhysicalTableID, 10) + "PhysicalTableID:" + strconv.FormatInt(r.PhysicalTableID, 10) + "," + + "Lightning execution:" + strconv.FormatBool(isLightningEnabled(r.ID)) } func constructDescTableScanPB(physicalTableID int64, tblInfo *model.TableInfo, handleCols []*model.ColumnInfo) *tipb.Executor { @@ -776,3 +787,188 @@ func (r *reorgHandler) RemoveDDLReorgHandle(job *model.Job, elements []*meta.Ele func (r *reorgHandler) GetDDLReorgHandle(job *model.Job) (element *meta.Element, startKey, endKey kv.Key, physicalTableID int64, err error) { return r.m.GetDDLReorgHandle(job) } + +func (w *worker) runMergeJob(rh *reorgHandler, reorgInfo *reorgInfo, tblInfo *model.TableInfo, lease time.Duration, f func() error) error { + job := reorgInfo.Job + d := reorgInfo.d + // This is for tests compatible, because most of the early tests try to build the reorg job manually + // without reorg meta info, which will cause nil pointer in here. + if job.ReorgMeta == nil { + job.ReorgMeta = &model.DDLReorgMeta{ + SQLMode: mysql.ModeNone, + Warnings: make(map[errors.ErrorID]*terror.Error), + WarningsCount: make(map[errors.ErrorID]int64), + Location: &model.TimeZoneLocation{Name: time.UTC.String(), Offset: 0}, + } + } + + rc := w.getReorgCtx(job) + if rc == nil { + // Since reorg job will be interrupted for polling the cancel action outside. we don't need to wait for 2.5s + // for the later entrances. + // lease = 0 means it's in an integration test. In this case we don't delay so the test won't run too slowly. + if lease > 0 { + delayForAsyncCommit() + } + // This job is cancelling, we should return ErrCancelledDDLJob directly. + // Q: Is there any possibility that the job is cancelling and has no reorgCtx? + // A: Yes, consider the case that we cancel the job when backfilling the last batch of data, the cancel txn is commit first, + // and then the backfill workers send signal to the `doneCh` of the reorgCtx, and then the DDL worker will remove the reorgCtx and + // update the DDL job to `done`, but at the commit time, the DDL txn will raise a "write conflict" error and retry, and it happens. + if job.IsCancelling() { + return dbterror.ErrCancelledDDLJob + } + rc = w.newReorgCtx(reorgInfo) + w.wg.Add(1) + go func() { + defer w.wg.Done() + rc.doneCh <- f() + }() + } + + waitTimeout := defaultWaitReorgTimeout + // if lease is 0, we are using a local storage, + // and we can wait the reorganization to be done here. + // if lease > 0, we don't need to wait here because + // we should update some job's progress context and try checking again, + // so we use a very little timeout here. + if lease > 0 { + waitTimeout = ReorgWaitTimeout + } + + // wait reorganization job done or timeout + select { + case err := <-rc.doneCh: + // Since job is cancelled,we don't care about its partial counts. + if rc.isReorgCanceled() || terror.ErrorEqual(err, dbterror.ErrCancelledDDLJob) { + d.removeReorgCtx(job) + return dbterror.ErrCancelledDDLJob + } + + // Update a job's warnings. + w.mergeWarningsIntoJob(job) + + d.removeReorgCtx(job) + // For other errors, even err is not nil here, we still wait the partial counts to be collected. + // since in the next round, the startKey is brand new which is stored by last time. + if err != nil { + return errors.Trace(err) + } + + case <-w.ctx.Done(): + logutil.BgLogger().Info("[ddl] run merge job quit") + d.removeReorgCtx(job) + // We return dbterror.ErrWaitReorgTimeout here too, so that outer loop will break. + return dbterror.ErrWaitReorgTimeout + case <-time.After(waitTimeout): + rowCount, doneKey, currentElement := rc.getRowCountAndKey() + // Update a job's warnings. + w.mergeWarningsIntoJob(job) + + rc.resetWarnings() + + // Update a reorgInfo's handle. + // Since daemon-worker is triggered by timer to store the info half-way. + // you should keep these infos is read-only (like job) / atomic (like doneKey & element) / concurrent safe. + err := rh.UpdateDDLReorgStartHandle(job, currentElement, doneKey) + + logutil.BgLogger().Info("[ddl] run merge job wait timeout", + zap.Duration("waitTime", waitTimeout), + zap.ByteString("elementType", currentElement.TypeKey), + zap.Int64("elementID", currentElement.ID), + zap.Int64("totalmergedRowCount", rowCount), + zap.String("doneKey", tryDecodeToHandleString(doneKey)), + zap.Error(err)) + // If timeout, we will return, check the owner and retry to wait job done again. + return dbterror.ErrWaitReorgTimeout + } + return nil +} + +func getMergeReorgInfo(ctx *JobContext, d *ddlCtx, rh *reorgHandler, job *model.Job, tbl table.Table, elements []*meta.Element, tempIdxID int64) (*reorgInfo, error) { + var ( + element *meta.Element + start kv.Key + end kv.Key + pid int64 + info reorgInfo + ) + + if job.SnapshotVer == 0 { + // For the case of the old TiDB version(do not exist the element information) is upgraded to the new TiDB version. + // Third step, we need to remove the element information to make sure we can save the reorganized information to storage. + failpoint.Inject("MockGetIndexRecordErr", func(val failpoint.Value) { + if val.(string) == "addIdxNotOwnerErr" && atomic.CompareAndSwapUint32(&mockNotOwnerErrOnce, 3, 4) { + if err := rh.RemoveReorgElement(job); err != nil { + failpoint.Return(nil, errors.Trace(err)) + } + info.first = true + failpoint.Return(&info, nil) + } + }) + + info.first = true + // get the current version for reorganization if we don't have + ver, err := getValidCurrentVersion(d.store) + if err != nil { + return nil, errors.Trace(err) + } + tblInfo := tbl.Meta() + pid = tblInfo.ID + if pi := tblInfo.GetPartitionInfo(); pi != nil { + pid = pi.Definitions[0].ID + } + + start, end = tablecodec.GetTableIndexKeyRange(pid, tablecodec.TempIndexPrefix|tempIdxID) + if err != nil { + return nil, errors.Trace(err) + } + logutil.BgLogger().Info("[ddl] job get table range", + zap.Int64("jobID", job.ID), zap.Int64("physicalTableID", pid), + zap.String("startHandle", tryDecodeToHandleString(start)), + zap.String("endHandle", tryDecodeToHandleString(end))) + + failpoint.Inject("errorUpdateReorgHandle", func() (*reorgInfo, error) { + return &info, errors.New("occur an error when update reorg handle") + }) + err = rh.UpdateDDLReorgHandle(job, start, end, pid, elements[0]) + if err != nil { + return &info, errors.Trace(err) + } + // Update info should after data persistent. + job.SnapshotVer = ver.Ver + element = elements[0] + } else { + failpoint.Inject("MockGetIndexRecordErr", func(val failpoint.Value) { + // For the case of the old TiDB version(do not exist the element information) is upgraded to the new TiDB version. + // Second step, we need to remove the element information to make sure we can get the error of "ErrDDLReorgElementNotExist". + // However, since "txn.Reset()" will be called later, the reorganized information cannot be saved to storage. + if val.(string) == "addIdxNotOwnerErr" && atomic.CompareAndSwapUint32(&mockNotOwnerErrOnce, 2, 3) { + if err := rh.RemoveReorgElement(job); err != nil { + failpoint.Return(nil, errors.Trace(err)) + } + } + }) + + var err error + element, start, end, pid, err = rh.GetDDLReorgHandle(job) + if err != nil { + // If the reorg element doesn't exist, this reorg info should be saved by the older TiDB versions. + // It's compatible with the older TiDB versions. + // We'll try to remove it in the next major TiDB version. + if meta.ErrDDLReorgElementNotExist.Equal(err) { + job.SnapshotVer = 0 + logutil.BgLogger().Warn("[ddl] get reorg info, the element does not exist", zap.String("job", job.String())) + } + return &info, errors.Trace(err) + } + } + info.Job = job + info.d = d + info.StartKey = start + info.EndKey = end + info.PhysicalTableID = pid + info.currElement = element + info.elements = elements + return &info, nil +} diff --git a/infoschema/tables.go b/infoschema/tables.go index e6c843e45f8bf..383ee5a253fde 100644 --- a/infoschema/tables.go +++ b/infoschema/tables.go @@ -2113,11 +2113,21 @@ func (vt *VirtualTable) Indices() []table.Index { return nil } +// IndexPrefix implements table.Table RecordPrefix interface. +func (vt *VirtualTable) IndexPrefix() kv.Key { + return nil +} + // RecordPrefix implements table.Table RecordPrefix interface. func (vt *VirtualTable) RecordPrefix() kv.Key { return nil } +// IndexPrefix implements table.Table RecordPrefix interface. +func (it *infoschemaTable) IndexPrefix() kv.Key { + return nil +} + // AddRecord implements table.Table AddRecord interface. func (vt *VirtualTable) AddRecord(ctx sessionctx.Context, r []types.Datum, opts ...table.AddRecordOption) (recordID kv.Handle, err error) { return nil, table.ErrUnsupportedOp diff --git a/parser/model/model.go b/parser/model/model.go index 193239dc39321..291e5e0fb5886 100644 --- a/parser/model/model.go +++ b/parser/model/model.go @@ -48,6 +48,18 @@ const ( StateReplicaOnly // StateGlobalTxnOnly means we can only use global txn for operator on this schema element StateGlobalTxnOnly + // Below sub states are only used for add index lightning ways currently, other place should + // if you want to use them in other place, you have to use it carefully. + // StateBackfillSync means we use lightning to do backfill and start to sync to all TiDB + StateBackfillSync + // StateBackfill means now all the user transaction will know that new backfill solution is + // adapted and will write update to temp index to record delta part during backfill process. + StateBackfill + // StateMergeSync means backfill finished and start to sync to all TiDB to update both + // full copy index and delta part temp index. + StateMergeSync + // StateMerge means start merge delta part of index into full copy index. + StateMerge /* * Please add the new state at the end to keep the values consistent across versions. */ @@ -70,6 +82,14 @@ func (s SchemaState) String() string { return "replica only" case StateGlobalTxnOnly: return "global txn only" + case StateBackfillSync: + return "StateBackFillSync" + case StateBackfill: + return "StateBackFill" + case StateMergeSync: + return "StateMergeSync" + case StateMerge: + return "StateMerge" default: return "none" } @@ -1245,6 +1265,7 @@ type IndexInfo struct { Table CIStr `json:"tbl_name"` // Table name. Columns []*IndexColumn `json:"idx_cols"` // Index columns. State SchemaState `json:"state"` + SubState SchemaState `json:"sub_state"` Comment string `json:"comment"` // Comment Tp IndexType `json:"index_type"` // Index type: Btree, Hash or Rtree Unique bool `json:"is_unique"` // Whether the index is unique. diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index 337ca024b0160..b383ac9f496f5 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -741,12 +741,12 @@ const ( // TiDBMaxAutoAnalyzeTime is the max time that auto analyze can run. If auto analyze runs longer than the value, it // will be killed. 0 indicates that there is no time limit. TiDBMaxAutoAnalyzeTime = "tidb_max_auto_analyze_time" + // TiDBEnableConcurrentDDL indicates whether to enable the new DDL framework. + TiDBEnableConcurrentDDL = "tidb_enable_concurrent_ddl" // TiDBFastDDL indicates whether use lighting to help acceleate adding index stmt. TiDBFastDDL = "tidb_fast_ddl" // TiDBDiskQuota used to set disk quota for lightning add index. TiDBDiskQuota = "tidb_disk_quota" - // TiDBEnableConcurrentDDL indicates whether to enable the new DDL framework. - TiDBEnableConcurrentDDL = "tidb_enable_concurrent_ddl" ) // TiDB intentional limits diff --git a/table/index.go b/table/index.go index 62892c135b7c3..7bd5df38c9a63 100644 --- a/table/index.go +++ b/table/index.go @@ -76,4 +76,6 @@ type Index interface { // Param columns is a reused buffer, if it is not nil, FetchValues will fill the index values in it, // and return the buffer, if it is nil, FetchValues will allocate the buffer instead. FetchValues(row []types.Datum, columns []types.Datum) ([]types.Datum, error) + // Add one method for lightning create index + Create4SST(ctx sessionctx.Context, txn kv.Transaction, indexedValues []types.Datum, h kv.Handle, handleRestoreData []types.Datum, opts ...CreateIdxOptFunc) ([]byte, []byte, bool, error) } diff --git a/table/table.go b/table/table.go index 775cb03bb6cf9..fb9393a21e4c5 100644 --- a/table/table.go +++ b/table/table.go @@ -177,6 +177,9 @@ type Table interface { // RecordPrefix returns the record key prefix. RecordPrefix() kv.Key + // IndexPrefix reutruns the indxe key prefix. + IndexPrefix() kv.Key + // AddRecord inserts a row which should contain only public columns AddRecord(ctx sessionctx.Context, r []types.Datum, opts ...AddRecordOption) (recordID kv.Handle, err error) diff --git a/table/tables/index.go b/table/tables/index.go index d7e88a824c7ab..5d1e95500582d 100644 --- a/table/tables/index.go +++ b/table/tables/index.go @@ -15,6 +15,7 @@ package tables import ( + "bytes" "context" "sync" @@ -41,6 +42,8 @@ type index struct { // the collation global variable is initialized *after* `NewIndex()`. initNeedRestoreData sync.Once needRestoredData bool + // Mark this is in backfill process. + Isbackfill bool } // NeedRestoredData checks whether the index columns needs restored data. @@ -55,7 +58,7 @@ func NeedRestoredData(idxCols []*model.IndexColumn, colInfos []*model.ColumnInfo } // NewIndex builds a new Index object. -func NewIndex(physicalID int64, tblInfo *model.TableInfo, indexInfo *model.IndexInfo) table.Index { +func NewIndex(physicalID int64, tblInfo *model.TableInfo, indexInfo *model.IndexInfo, newBF ...bool) table.Index { // The prefix can't encode from tblInfo.ID, because table partition may change the id to partition id. var prefix kv.Key if indexInfo.Global { @@ -65,11 +68,16 @@ func NewIndex(physicalID int64, tblInfo *model.TableInfo, indexInfo *model.Index // Otherwise, start with physicalID. prefix = tablecodec.EncodeTableIndexPrefix(physicalID, indexInfo.ID) } + var newBackfillFlow bool = false + if len(newBF) > 0 { + newBackfillFlow = newBF[0] + } index := &index{ - idxInfo: indexInfo, - tblInfo: tblInfo, - prefix: prefix, - phyTblID: physicalID, + idxInfo: indexInfo, + tblInfo: tblInfo, + prefix: prefix, + phyTblID: physicalID, + Isbackfill: newBackfillFlow, } return index } @@ -108,6 +116,26 @@ func (c *index) Create(sctx sessionctx.Context, txn kv.Transaction, indexedValue return nil, err } + var ( + tempKey []byte + keyVer []byte = []byte("0") + ) + if c.idxInfo.State == model.StateWriteReorganization && !c.Isbackfill { + switch c.idxInfo.SubState { + case model.StateNone: + // do nothing. + case model.StateBackfillSync, model.StateBackfill: + // Write to the temporary index. + keyVer = []byte("1") + tablecodec.IndexKey2TempIndexKey(c.idxInfo.ID, key) + case model.StateMergeSync, model.StateMerge: + // Double write + keyVer = []byte("2") + tempKey = append(tempKey, key...) + tablecodec.IndexKey2TempIndexKey(c.idxInfo.ID, tempKey) + } + } + ctx := opt.Ctx if opt.Untouched { txn, err1 := sctx.Txn(true) @@ -150,10 +178,19 @@ func (c *index) Create(sctx sessionctx.Context, txn kv.Transaction, indexedValue opt.IgnoreAssertion = opt.IgnoreAssertion || c.idxInfo.State != model.StatePublic if !distinct || skipCheck || opt.Untouched { + if !bytes.Equal(keyVer, []byte("0")) { + idxVal = append(idxVal, keyVer...) + } err = txn.GetMemBuffer().Set(key, idxVal) if err != nil { return nil, err } + if len(tempKey) > 0 { + err = txn.GetMemBuffer().Set(tempKey, idxVal) + if err != nil { + return nil, err + } + } if !opt.IgnoreAssertion && (!opt.Untouched) { if sctx.GetSessionVars().LazyCheckKeyNotExists() && !txn.IsPessimistic() { err = txn.SetAssertion(key, kv.SetAssertUnknown) @@ -188,6 +225,9 @@ func (c *index) Create(sctx sessionctx.Context, txn kv.Transaction, indexedValue } if err != nil || len(value) == 0 { lazyCheck := sctx.GetSessionVars().LazyCheckKeyNotExists() && err != nil + if !bytes.Equal(keyVer, []byte("0")) { + idxVal = append(idxVal, keyVer...) + } if lazyCheck { err = txn.GetMemBuffer().SetWithFlags(key, idxVal, kv.SetPresumeKeyNotExists) } else { @@ -196,6 +236,16 @@ func (c *index) Create(sctx sessionctx.Context, txn kv.Transaction, indexedValue if err != nil { return nil, err } + if len(tempKey) > 0 { + if lazyCheck { + err = txn.GetMemBuffer().SetWithFlags(tempKey, idxVal, kv.SetPresumeKeyNotExists) + } else { + err = txn.GetMemBuffer().Set(tempKey, idxVal) + } + if err != nil { + return nil, err + } + } if opt.IgnoreAssertion { return nil, nil } @@ -214,19 +264,96 @@ func (c *index) Create(sctx sessionctx.Context, txn kv.Transaction, indexedValue return handle, kv.ErrKeyExists } +// Create4SST creates a new entry in the kvIndex data for lightning backfiller. +// If the index is unique and there is an existing entry with the same key, +// Create will return the existing entry's handle as the first return value, ErrKeyExists as the second return value. +func (c *index) Create4SST(sctx sessionctx.Context, txn kv.Transaction, indexedValues []types.Datum, h kv.Handle, handleRestoreData []types.Datum, opts ...table.CreateIdxOptFunc) ([]byte, []byte, bool, error) { + if c.Meta().Unique { + txn.CacheTableInfo(c.phyTblID, c.tblInfo) + } + var opt table.CreateIdxOpt + for _, fn := range opts { + fn(&opt) + } + vars := sctx.GetSessionVars() + writeBufs := vars.GetWriteStmtBufs() + key, distinct, err := c.GenIndexKey(vars.StmtCtx, indexedValues, h, writeBufs.IndexKeyBuf) + if err != nil { + return key, nil, distinct, err + } + + // Save the key buffer to reuse. + writeBufs.IndexKeyBuf = key + c.initNeedRestoreData.Do(func() { + c.needRestoredData = NeedRestoredData(c.idxInfo.Columns, c.tblInfo.Columns) + }) + idxVal, err := tablecodec.GenIndexValuePortal(sctx.GetSessionVars().StmtCtx, c.tblInfo, c.idxInfo, c.needRestoredData, distinct, opt.Untouched, indexedValues, h, c.phyTblID, handleRestoreData) + if err != nil { + return key, nil, distinct, err + } + + return key, idxVal, distinct, err +} + // Delete removes the entry for handle h and indexedValues from KV index. func (c *index) Delete(sc *stmtctx.StatementContext, txn kv.Transaction, indexedValues []types.Datum, h kv.Handle) error { key, distinct, err := c.GenIndexKey(sc, indexedValues, h, nil) if err != nil { return err } + var ( + tempKey []byte + keyVer []byte = []byte("0") + val []byte + ) + if c.idxInfo.State == model.StateWriteReorganization { + switch c.idxInfo.SubState { + case model.StateNone: + // Do nothing. + case model.StateBackfillSync, model.StateBackfill: + // Write to the temporary index. + keyVer = []byte("1") + tempKey = append(tempKey, key...) + key = nil + tablecodec.IndexKey2TempIndexKey(c.idxInfo.ID, tempKey) + case model.StateMergeSync, model.StateMerge: + // Double write + keyVer = []byte("2") + tempKey = append(tempKey, key...) + tablecodec.IndexKey2TempIndexKey(c.idxInfo.ID, tempKey) + } + } + if distinct { - err = txn.GetMemBuffer().DeleteWithFlags(key, kv.SetNeedLocked) + if len(key) > 0 { + err = txn.GetMemBuffer().DeleteWithFlags(key, kv.SetNeedLocked) + if err != nil { + return err + } + } + if len(tempKey) > 0 { + val = append(val, []byte("deleteu")...) + val = append(val, keyVer...) + err = txn.GetMemBuffer().Set(tempKey, val) + if err != nil { + return err + } + } } else { - err = txn.GetMemBuffer().Delete(key) - } - if err != nil { - return err + if len(key) > 0 { + err = txn.GetMemBuffer().Delete(key) + if err != nil { + return err + } + } + if len(tempKey) > 0 { + val = append(val, []byte("delete")...) + val = append(val, keyVer...) + err = txn.GetMemBuffer().Set(tempKey, val) + if err != nil { + return err + } + } } if c.idxInfo.State == model.StatePublic { // If the index is in public state, delete this index means it must exists. diff --git a/table/tables/mutation_checker.go b/table/tables/mutation_checker.go index 72fb297cf820d..7efaf49faf0ec 100644 --- a/table/tables/mutation_checker.go +++ b/table/tables/mutation_checker.go @@ -15,6 +15,7 @@ package tables import ( + "bytes" "fmt" "strings" @@ -123,6 +124,10 @@ func CheckDataConsistency( return nil } +func convertTempIdxID(tempIndexID int64) int64 { + return tempIndexID & tablecodec.IndexIDMask +} + // checkHandleConsistency checks whether the handles, with regard to a single-row change, // in row insertions and index insertions are consistent. // A PUT_index implies a PUT_row with the same handle. @@ -144,7 +149,9 @@ func checkHandleConsistency(rowInsertion mutation, indexMutations []mutation, in continue } - indexInfo, ok := indexIDToInfo[m.indexID] + // Generate correct index id for check. + idxID := convertTempIdxID(m.indexID) + indexInfo, ok := indexIDToInfo[idxID] if !ok { return errors.New("index not found") } @@ -183,22 +190,32 @@ func checkIndexKeys( ) error { var indexData []types.Datum for _, m := range indexMutations { - indexInfo, ok := indexIDToInfo[m.indexID] + var value []byte + // Generate correct index id for check. + idxID := convertTempIdxID(m.indexID) + indexInfo, ok := indexIDToInfo[idxID] if !ok { return errors.New("index not found") } - rowColInfos, ok := indexIDToRowColInfos[m.indexID] + rowColInfos, ok := indexIDToRowColInfos[idxID] if !ok { return errors.New("index not found") } + // If this is temp index data, need remove last byte of index data. + if idxID != m.indexID { + value = append(value, m.value[:len(m.value)-1]...) + } else { + value = append(value, m.value...) + } + // when we cannot decode the key to get the original value - if len(m.value) == 0 && NeedRestoredData(indexInfo.Columns, t.Meta().Columns) { + if len(value) == 0 && NeedRestoredData(indexInfo.Columns, t.Meta().Columns) { continue } decodedIndexValues, err := tablecodec.DecodeIndexKV( - m.key, m.value, len(indexInfo.Columns), tablecodec.HandleNotNeeded, rowColInfos, + m.key, value, len(indexInfo.Columns), tablecodec.HandleNotNeeded, rowColInfos, ) if err != nil { return errors.Trace(err) @@ -220,7 +237,8 @@ func checkIndexKeys( indexData = append(indexData, datum) } - if len(m.value) == 0 { + // When it is add index new backfill state. + if len(value) == 0 || (idxID != m.indexID && (bytes.Equal(value, []byte("deleteu")) || bytes.Equal(value, []byte("delete")))) { err = compareIndexData(sessVars.StmtCtx, t.Columns, indexData, rowToRemove, indexInfo, t.Meta()) } else { err = compareIndexData(sessVars.StmtCtx, t.Columns, indexData, rowToInsert, indexInfo, t.Meta()) diff --git a/table/tables/tables.go b/table/tables/tables.go index 9b35a1af8cc3b..a0fc04ca47e92 100644 --- a/table/tables/tables.go +++ b/table/tables/tables.go @@ -321,6 +321,11 @@ func (t *TableCommon) RecordPrefix() kv.Key { return t.recordPrefix } +// IndexPrefix implements table.Table interface. +func (t *TableCommon) IndexPrefix() kv.Key { + return t.indexPrefix +} + // RecordKey implements table.Table interface. func (t *TableCommon) RecordKey(h kv.Handle) kv.Key { return tablecodec.EncodeRecordKey(t.recordPrefix, h) diff --git a/tablecodec/tablecodec.go b/tablecodec/tablecodec.go index 3db97ef11f9c0..f47acad4372c9 100644 --- a/tablecodec/tablecodec.go +++ b/tablecodec/tablecodec.go @@ -923,6 +923,19 @@ func DecodeIndexHandle(key, value []byte, colsLen int) (kv.Handle, error) { return nil, errors.Errorf("no handle in index key: %v, value: %v", key, value) } +// IndexKVIsUnique uses to judge if an index is unique, it can handle the KV committed by txn already, it doesn't consider the untouch flag. +func IndexKVIsUnique(value []byte) bool { + if len(value) <= MaxOldEncodeValueLen { + return len(value) == 8 + } + if getIndexVersion(value) == 1 { + segs := SplitIndexValueForClusteredIndexVersion1(value) + return segs.CommonHandle != nil + } + segs := SplitIndexValue(value) + return segs.IntHandle != nil || segs.CommonHandle != nil +} + func decodeHandleInIndexKey(keySuffix []byte) (kv.Handle, error) { remain, d, err := codec.DecodeOne(keySuffix) if err != nil { @@ -1124,6 +1137,24 @@ func GenIndexKey(sc *stmtctx.StatementContext, tblInfo *model.TableInfo, idxInfo return } +// TempIndexPrefix used to gen temp idx id from index id. +const TempIndexPrefix = 0x7fff000000000000 + +// IndexIDMask used to get index id from index id/temp idx id +const IndexIDMask = 0xffffffffffff + +// IndexKey2TempIndexKey gen a temp index Key +func IndexKey2TempIndexKey(indexID int64, key []byte) { + eid := codec.EncodeIntToCmpUint(TempIndexPrefix | indexID) + binary.BigEndian.PutUint64(key[11:], eid) +} + +// TempIndexKey2IndexKey gen a index key from temp index key +func TempIndexKey2IndexKey(indexID int64, key []byte) { + eid := codec.EncodeIntToCmpUint(indexID) + binary.BigEndian.PutUint64(key[11:], eid) +} + // GenIndexValuePortal is the portal for generating index value. // Value layout: // +-- IndexValueVersion0 (with restore data, or common handle, or index is global) From 382c781c1517a1915de2476ba1f7d05f3ee58ace Mon Sep 17 00:00:00 2001 From: Benjamin2037 Date: Fri, 8 Jul 2022 10:53:36 +0800 Subject: [PATCH 05/16] Update --- br/pkg/conn/util/BUILD.bazel | 15 ++++++ br/pkg/lightning/BUILD.bazel | 2 + br/pkg/lightning/backend/kv/BUILD.bazel | 33 +------------ .../lightning/backend/kv/kvtest/BUILD.bazel | 31 ++++++++++++ br/pkg/lightning/backend/local/BUILD.bazel | 4 +- .../lightning/backend/local/mock/BUILD.bazel | 15 ++++++ br/pkg/lightning/backend/noop/BUILD.bazel | 1 + br/pkg/restore/BUILD.bazel | 2 + br/pkg/restore/split/BUILD.bazel | 34 +++++++++++++ br/pkg/storage/BUILD.bazel | 1 + br/pkg/utils/utildb/BUILD.bazel | 35 +++++++++++++ ddl/BUILD.bazel | 4 ++ ddl/lightning/BUILD.bazel | 49 +++++++++++++++++++ infoschema/BUILD.bazel | 2 + planner/core/BUILD.bazel | 1 + 15 files changed, 195 insertions(+), 34 deletions(-) create mode 100644 br/pkg/conn/util/BUILD.bazel create mode 100644 br/pkg/lightning/backend/kv/kvtest/BUILD.bazel create mode 100644 br/pkg/lightning/backend/local/mock/BUILD.bazel create mode 100644 br/pkg/restore/split/BUILD.bazel create mode 100644 br/pkg/utils/utildb/BUILD.bazel create mode 100644 ddl/lightning/BUILD.bazel diff --git a/br/pkg/conn/util/BUILD.bazel b/br/pkg/conn/util/BUILD.bazel new file mode 100644 index 0000000000000..4b658a4d39524 --- /dev/null +++ b/br/pkg/conn/util/BUILD.bazel @@ -0,0 +1,15 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "util", + srcs = ["util.go"], + importpath = "github.com/pingcap/tidb/br/pkg/conn/util", + visibility = ["//visibility:public"], + deps = [ + "//br/pkg/errors", + "//br/pkg/version", + "@com_github_pingcap_errors//:errors", + "@com_github_pingcap_kvproto//pkg/metapb", + "@com_github_tikv_pd_client//:client", + ], +) diff --git a/br/pkg/lightning/BUILD.bazel b/br/pkg/lightning/BUILD.bazel index 99d534762fc69..e658f759a570c 100644 --- a/br/pkg/lightning/BUILD.bazel +++ b/br/pkg/lightning/BUILD.bazel @@ -26,6 +26,8 @@ go_library( "//br/pkg/storage", "//br/pkg/utils", "//br/pkg/version/build", + "//expression", + "//planner/core", "//util/promutil", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", diff --git a/br/pkg/lightning/backend/kv/BUILD.bazel b/br/pkg/lightning/backend/kv/BUILD.bazel index f0b8c5545c330..ff7cfc76c8f4c 100644 --- a/br/pkg/lightning/backend/kv/BUILD.bazel +++ b/br/pkg/lightning/backend/kv/BUILD.bazel @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("@io_bazel_rules_go//go:def.bzl", "go_library") go_library( name = "kv", @@ -25,7 +25,6 @@ go_library( "//meta/autoid", "//parser/model", "//parser/mysql", - "//planner/core", "//sessionctx", "//sessionctx/variable", "//table", @@ -42,33 +41,3 @@ go_library( "@org_uber_go_zap//zapcore", ], ) - -go_test( - name = "kv_test", - srcs = [ - "session_test.go", - "sql2kv_test.go", - ], - embed = [":kv"], - deps = [ - "//br/pkg/lightning/common", - "//br/pkg/lightning/log", - "//br/pkg/lightning/verification", - "//ddl", - "//kv", - "//meta/autoid", - "//parser", - "//parser/ast", - "//parser/model", - "//parser/mysql", - "//sessionctx", - "//table", - "//table/tables", - "//tablecodec", - "//types", - "//util/mock", - "@com_github_stretchr_testify//require", - "@org_uber_go_zap//:zap", - "@org_uber_go_zap//zapcore", - ], -) diff --git a/br/pkg/lightning/backend/kv/kvtest/BUILD.bazel b/br/pkg/lightning/backend/kv/kvtest/BUILD.bazel new file mode 100644 index 0000000000000..c06dab7be3b32 --- /dev/null +++ b/br/pkg/lightning/backend/kv/kvtest/BUILD.bazel @@ -0,0 +1,31 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_test") + +go_test( + name = "kvtest_test", + srcs = [ + "session_test.go", + "sql2kv_test.go", + ], + deps = [ + "//br/pkg/lightning/backend/kv", + "//br/pkg/lightning/common", + "//br/pkg/lightning/log", + "//br/pkg/lightning/verification", + "//ddl", + "//kv", + "//meta/autoid", + "//parser", + "//parser/ast", + "//parser/model", + "//parser/mysql", + "//sessionctx", + "//table", + "//table/tables", + "//tablecodec", + "//types", + "//util/mock", + "@com_github_stretchr_testify//require", + "@org_uber_go_zap//:zap", + "@org_uber_go_zap//zapcore", + ], +) diff --git a/br/pkg/lightning/backend/local/BUILD.bazel b/br/pkg/lightning/backend/local/BUILD.bazel index 02358eb492d32..716866f411846 100644 --- a/br/pkg/lightning/backend/local/BUILD.bazel +++ b/br/pkg/lightning/backend/local/BUILD.bazel @@ -32,7 +32,7 @@ go_library( "//br/pkg/logutil", "//br/pkg/membuf", "//br/pkg/pdutil", - "//br/pkg/restore", + "//br/pkg/restore/split", "//br/pkg/utils", "//br/pkg/version", "//distsql", @@ -98,7 +98,7 @@ go_test( "//br/pkg/membuf", "//br/pkg/mock", "//br/pkg/pdutil", - "//br/pkg/restore", + "//br/pkg/restore/split", "//br/pkg/utils", "//br/pkg/version", "//kv", diff --git a/br/pkg/lightning/backend/local/mock/BUILD.bazel b/br/pkg/lightning/backend/local/mock/BUILD.bazel new file mode 100644 index 0000000000000..82b5b0ef04696 --- /dev/null +++ b/br/pkg/lightning/backend/local/mock/BUILD.bazel @@ -0,0 +1,15 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_test") + +go_test( + name = "mock_test", + srcs = ["local_test.go"], + deps = [ + "//br/pkg/lightning/backend", + "//br/pkg/lightning/backend/local", + "//br/pkg/lightning/mydump", + "//br/pkg/mock", + "@com_github_coreos_go_semver//semver", + "@com_github_golang_mock//gomock", + "@com_github_stretchr_testify//require", + ], +) diff --git a/br/pkg/lightning/backend/noop/BUILD.bazel b/br/pkg/lightning/backend/noop/BUILD.bazel index a38fcd8195761..c75adf7035fba 100644 --- a/br/pkg/lightning/backend/noop/BUILD.bazel +++ b/br/pkg/lightning/backend/noop/BUILD.bazel @@ -8,6 +8,7 @@ go_library( deps = [ "//br/pkg/lightning/backend", "//br/pkg/lightning/backend/kv", + "//br/pkg/lightning/common", "//br/pkg/lightning/config", "//br/pkg/lightning/log", "//br/pkg/lightning/verification", diff --git a/br/pkg/restore/BUILD.bazel b/br/pkg/restore/BUILD.bazel index e18abc5e82b59..58c1f13782b9f 100644 --- a/br/pkg/restore/BUILD.bazel +++ b/br/pkg/restore/BUILD.bazel @@ -45,6 +45,7 @@ go_library( "//statistics/handle", "//store/pdtypes", "//tablecodec", + "//util", "//util/codec", "//util/hack", "//util/mathutil", @@ -110,6 +111,7 @@ go_test( "//br/pkg/rtree", "//br/pkg/storage", "//br/pkg/utils", + "//infoschema", "//kv", "//meta/autoid", "//parser/model", diff --git a/br/pkg/restore/split/BUILD.bazel b/br/pkg/restore/split/BUILD.bazel new file mode 100644 index 0000000000000..10575d5849ecc --- /dev/null +++ b/br/pkg/restore/split/BUILD.bazel @@ -0,0 +1,34 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "split", + srcs = [ + "region.go", + "split.go", + "split_client.go", + ], + importpath = "github.com/pingcap/tidb/br/pkg/restore/split", + visibility = ["//visibility:public"], + deps = [ + "//br/pkg/conn/util", + "//br/pkg/errors", + "//br/pkg/httputil", + "//br/pkg/logutil", + "//br/pkg/redact", + "//br/pkg/utils/utildb", + "//store/pdtypes", + "@com_github_pingcap_errors//:errors", + "@com_github_pingcap_failpoint//:failpoint", + "@com_github_pingcap_kvproto//pkg/errorpb", + "@com_github_pingcap_kvproto//pkg/kvrpcpb", + "@com_github_pingcap_kvproto//pkg/metapb", + "@com_github_pingcap_kvproto//pkg/pdpb", + "@com_github_pingcap_kvproto//pkg/tikvpb", + "@com_github_pingcap_log//:log", + "@com_github_tikv_pd_client//:client", + "@org_golang_google_grpc//:grpc", + "@org_golang_google_grpc//credentials", + "@org_uber_go_multierr//:multierr", + "@org_uber_go_zap//:zap", + ], +) diff --git a/br/pkg/storage/BUILD.bazel b/br/pkg/storage/BUILD.bazel index 46150497b872b..762df1ae59957 100644 --- a/br/pkg/storage/BUILD.bazel +++ b/br/pkg/storage/BUILD.bazel @@ -33,6 +33,7 @@ go_library( "@com_github_aws_aws_sdk_go//aws/session", "@com_github_aws_aws_sdk_go//service/s3", "@com_github_aws_aws_sdk_go//service/s3/s3iface", + "@com_github_aws_aws_sdk_go//service/s3/s3manager", "@com_github_azure_azure_sdk_for_go_sdk_azidentity//:azidentity", "@com_github_azure_azure_sdk_for_go_sdk_storage_azblob//:azblob", "@com_github_google_uuid//:uuid", diff --git a/br/pkg/utils/utildb/BUILD.bazel b/br/pkg/utils/utildb/BUILD.bazel new file mode 100644 index 0000000000000..d91bc0ebb62b2 --- /dev/null +++ b/br/pkg/utils/utildb/BUILD.bazel @@ -0,0 +1,35 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "utildb", + srcs = [ + "db.go", + "retry.go", + ], + importpath = "github.com/pingcap/tidb/br/pkg/utils/utildb", + visibility = ["//visibility:public"], + deps = [ + "//errno", + "//kv", + "@com_github_go_sql_driver_mysql//:mysql", + "@com_github_pingcap_errors//:errors", + "@org_golang_google_grpc//codes", + "@org_golang_google_grpc//status", + "@org_uber_go_multierr//:multierr", + ], +) + +go_test( + name = "utildb_test", + srcs = ["retry_test.go"], + embed = [":utildb"], + deps = [ + "//errno", + "@com_github_go_sql_driver_mysql//:mysql", + "@com_github_pingcap_check//:check", + "@com_github_pingcap_errors//:errors", + "@org_golang_google_grpc//codes", + "@org_golang_google_grpc//status", + "@org_uber_go_multierr//:multierr", + ], +) diff --git a/ddl/BUILD.bazel b/ddl/BUILD.bazel index 3327bf550acb9..174f59609ccc8 100644 --- a/ddl/BUILD.bazel +++ b/ddl/BUILD.bazel @@ -16,6 +16,7 @@ go_library( "foreign_key.go", "generated_column.go", "index.go", + "index_lightning.go", "mock.go", "multi_schema_change.go", "options.go", @@ -37,6 +38,7 @@ go_library( deps = [ "//config", "//ddl/label", + "//ddl/lightning", "//ddl/placement", "//ddl/util", "//distsql", @@ -65,6 +67,7 @@ go_library( "//statistics/handle", "//store/copr", "//store/driver/backoff", + "//store/driver/txn", "//store/helper", "//table", "//table/tables", @@ -142,6 +145,7 @@ go_test( "fail_test.go", "foreign_key_test.go", "index_change_test.go", + "index_lightning_test.go", "index_modify_test.go", "integration_test.go", "main_test.go", diff --git a/ddl/lightning/BUILD.bazel b/ddl/lightning/BUILD.bazel new file mode 100644 index 0000000000000..b2bb886d72db2 --- /dev/null +++ b/ddl/lightning/BUILD.bazel @@ -0,0 +1,49 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "lightning", + srcs = [ + "backend.go", + "engine.go", + "engine_mgr.go", + "env.go", + "message.go", + "res_mgr.go", + ], + importpath = "github.com/pingcap/tidb/ddl/lightning", + visibility = ["//visibility:public"], + deps = [ + "//br/pkg/lightning/backend", + "//br/pkg/lightning/backend/kv", + "//br/pkg/lightning/backend/local", + "//br/pkg/lightning/checkpoints", + "//br/pkg/lightning/common", + "//br/pkg/lightning/config", + "//br/pkg/lightning/glue", + "//br/pkg/lightning/log", + "//config", + "//parser", + "//parser/model", + "//parser/mysql", + "//sessionctx/variable", + "//table", + "//util/logutil", + "@com_github_docker_go_units//:go-units", + "@com_github_google_uuid//:uuid", + "@com_github_pkg_errors//:errors", + "@org_uber_go_zap//:zap", + ], +) + +go_test( + name = "lightning_test", + srcs = [ + "backend_test.go", + "env_test.go", + ], + embed = [":lightning"], + deps = [ + "//sessionctx/variable", + "@com_github_stretchr_testify//require", + ], +) diff --git a/infoschema/BUILD.bazel b/infoschema/BUILD.bazel index 1a76d99316cca..ab05fc990e7ee 100644 --- a/infoschema/BUILD.bazel +++ b/infoschema/BUILD.bazel @@ -7,6 +7,7 @@ go_library( "cache.go", "cluster.go", "error.go", + "info_store.go", "infoschema.go", "metric_table_def.go", "metrics_schema.go", @@ -61,6 +62,7 @@ go_test( srcs = [ "cache_test.go", "cluster_tables_test.go", + "info_store_test.go", "infoschema_test.go", "main_test.go", "metrics_schema_test.go", diff --git a/planner/core/BUILD.bazel b/planner/core/BUILD.bazel index 83342809499ce..50bcbf41f62f3 100644 --- a/planner/core/BUILD.bazel +++ b/planner/core/BUILD.bazel @@ -3,6 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "core", srcs = [ + "access_object.go", "cache.go", "cacheable_checker.go", "collect_column_stats_usage.go", From f498d2a1941d6430584b4792bcc9cd5476076f76 Mon Sep 17 00:00:00 2001 From: Benjamin2037 Date: Mon, 11 Jul 2022 13:11:37 +0800 Subject: [PATCH 06/16] fix ut problems --- ddl/util/main_test.go | 1 + owner/main_test.go | 1 + 2 files changed, 2 insertions(+) diff --git a/ddl/util/main_test.go b/ddl/util/main_test.go index a28cdcb4b5bfc..1cb1f40846368 100644 --- a/ddl/util/main_test.go +++ b/ddl/util/main_test.go @@ -26,6 +26,7 @@ func TestMain(m *testing.M) { opts := []goleak.Option{ goleak.IgnoreTopFunction("github.com/golang/glog.(*loggingT).flushDaemon"), goleak.IgnoreTopFunction("go.etcd.io/etcd/client/pkg/v3/logutil.(*MergeLogger).outputLoop"), + goleak.IgnoreTopFunction("go.opencensus.io/stats/view.(*worker).start"), } goleak.VerifyTestMain(m, opts...) } diff --git a/owner/main_test.go b/owner/main_test.go index 501cae73e4c5b..96976b2db824b 100644 --- a/owner/main_test.go +++ b/owner/main_test.go @@ -26,6 +26,7 @@ func TestMain(m *testing.M) { opts := []goleak.Option{ goleak.IgnoreTopFunction("github.com/golang/glog.(*loggingT).flushDaemon"), goleak.IgnoreTopFunction("go.etcd.io/etcd/client/pkg/v3/logutil.(*MergeLogger).outputLoop"), + goleak.IgnoreTopFunction("go.opencensus.io/stats/view.(*worker).start"), } goleak.VerifyTestMain(m, opts...) } From 02bc9247a11b9fd6d38d481e9e7932a986332937 Mon Sep 17 00:00:00 2001 From: Benjamin2037 Date: Tue, 12 Jul 2022 13:27:21 +0800 Subject: [PATCH 07/16] fix merge index problem. --- ddl/backfilling.go | 3 ++- ddl/ddl_worker.go | 18 ++++++++---------- ddl/index.go | 1 + ddl/index_lightning.go | 27 +++++++++++++++------------ ddl/index_lightning_test.go | 8 ++++---- sessionctx/variable/tidb_vars.go | 2 +- 6 files changed, 31 insertions(+), 28 deletions(-) diff --git a/ddl/backfilling.go b/ddl/backfilling.go index 0f600b44abd44..8faeecaecbec8 100644 --- a/ddl/backfilling.go +++ b/ddl/backfilling.go @@ -937,6 +937,7 @@ func (w *worker) writeTempIndexRecord(t table.PhysicalTable, bfWorkerType backfi failpoint.Return(errors.New("job.ErrCount:" + strconv.Itoa(int(job.ErrorCount)) + ", mock unknown type: ast.whenClause.")) } }) + jc := w.jobContext(job) // variable.ddlReorgWorkerCounter can be modified by system variable "tidb_ddl_reorg_worker_cnt". workerCnt := variable.GetDDLReorgWorkerCounter() @@ -981,7 +982,7 @@ func (w *worker) writeTempIndexRecord(t table.PhysicalTable, bfWorkerType backfi sessCtx.GetSessionVars().StmtCtx.IgnoreZeroInDate = !sqlMode.HasStrictMode() || sqlMode.HasAllowInvalidDatesMode() sessCtx.GetSessionVars().StmtCtx.NoZeroDate = sqlMode.HasStrictMode() - idxWorker := newTempIndexWorker(sessCtx, w, i, t, indexInfo, reorgInfo) + idxWorker := newTempIndexWorker(sessCtx, w, i, t, indexInfo, reorgInfo, jc) idxWorker.priority = job.Priority mergeWorkers = append(mergeWorkers, idxWorker.backfillWorker) go idxWorker.backfillWorker.runMerge(reorgInfo.d, idxWorker, job) diff --git a/ddl/ddl_worker.go b/ddl/ddl_worker.go index b852aca8604b7..de3c3ed05c756 100644 --- a/ddl/ddl_worker.go +++ b/ddl/ddl_worker.go @@ -452,27 +452,25 @@ func jobNeedGC(job *model.Job) bool { } switch job.Type { case model.ActionAddIndex, model.ActionAddPrimaryKey: - var needGC bool = false if job.State != model.JobStateRollbackDone { // When using lightning backfill, the job.Args length > 0 var indexID int64 = 0 var partitionIDs []int64 err := job.DecodeArgs(&indexID, &partitionIDs) + if indexID == 0 { + // Means there is no temp index ID stored in jobArgs. + return false + } if err != nil { logutil.BgLogger().Info("Lightning clean temp index data failed, please clean it manually,", zap.String("Job Args:", job.String()), zap.String("RawArgs:", string(job.RawArgs))) + return false } - - if err == nil && indexID != 0 { - needGC = true + if indexID != 0 { + return true } - } else { - needGC = true - } - // After rolling back an AddIndex operation, we need to use delete-range to delete the half-done index data. - if needGC { - return true } + return true case model.ActionDropSchema, model.ActionDropTable, model.ActionTruncateTable, model.ActionDropIndex, model.ActionDropPrimaryKey, model.ActionDropTablePartition, model.ActionTruncateTablePartition, model.ActionDropColumn, model.ActionModifyColumn: return true diff --git a/ddl/index.go b/ddl/index.go index 104050ecd49a0..7c0311f629ef7 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -625,6 +625,7 @@ func (w *worker) onCreateIndex(d *ddlCtx, t *meta.Meta, job *model.Job, isPK boo job.FinishTableJob(model.JobStateDone, model.StatePublic, ver, tblInfo) // Clean temp index if needed if eid != 0 { + job.Args =job.Args[:0] job.Args = []interface{}{eid, getPartitionIDs(tblInfo)} } default: diff --git a/ddl/index_lightning.go b/ddl/index_lightning.go index 97e93d3a3ae23..96cbdbef9a746 100644 --- a/ddl/index_lightning.go +++ b/ddl/index_lightning.go @@ -230,7 +230,8 @@ func (w *addIndexWorkerLit) BackfillDataInTxn(handleRange reorgBackfillTask) (ta txnTag := "AddIndexLightningBackfillDataInTxn" + strconv.Itoa(w.id) oprStartTime := time.Now() - errInTxn = kv.RunInNewTxn(context.Background(), w.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { + ctx := kv.WithInternalSourceType(context.Background(), w.jobContext.ddlJobSourceType()) + errInTxn = kv.RunInNewTxn(ctx, w.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { taskCtx.addedCount = 0 taskCtx.scanCount = 0 txn.SetOption(kv.Priority, w.priority) @@ -350,9 +351,16 @@ func (w *backFillIndexWorker) batchSkipKey(txn kv.Transaction, store kv.Storage, var keyVer []byte length := len(val) keyVer = append(keyVer, val[length-1:]...) + pos := w.tmpKeyPos[i] if bytes.Equal(keyVer, []byte("2")) { - pos := w.tmpKeyPos[i] idxRecords[pos].skip = true + } else { + // We need to add this lock to make sure pessimistic transaction can realize this operation. + // For the normal pessimistic transaction, it's ok. But if async commmit is used, it may lead to inconsistent data and index. + err = txn.LockKeys(context.Background(), new(kv.LockCtx), idxRecords[pos].key) + if err != nil { + return errors.Trace(err) + } } } } @@ -381,23 +389,25 @@ type backFillIndexWorker struct { tmpIdxRecords []*temporaryIndexRecord batchCheckTmpKeys []kv.Key tmpKeyPos []int32 + jobContext *JobContext } -func newTempIndexWorker(sessCtx sessionctx.Context, worker *worker, id int, t table.PhysicalTable, indexInfo *model.IndexInfo, reorgInfo *reorgInfo) *backFillIndexWorker { +func newTempIndexWorker(sessCtx sessionctx.Context, worker *worker, id int, t table.PhysicalTable, indexInfo *model.IndexInfo, reorgInfo *reorgInfo, jc *JobContext) *backFillIndexWorker { index := tables.NewIndex(t.GetPhysicalID(), t.Meta(), indexInfo) // Add build openengine process. return &backFillIndexWorker{ backfillWorker: newBackfillWorker(sessCtx, id, t, reorgInfo), index: index, + jobContext: jc, } } // BackfillDataInTxn merge temp index data in txn. func (w *backFillIndexWorker) BackfillDataInTxn(taskRange reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error) { - logutil.BgLogger().Info("Merge temp index", zap.ByteString("startKey", taskRange.startKey), zap.ByteString("endKey", taskRange.endKey)) oprStartTime := time.Now() - errInTxn = kv.RunInNewTxn(context.Background(), w.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { + ctx := kv.WithInternalSourceType(context.Background(), w.jobContext.ddlJobSourceType()) + errInTxn = kv.RunInNewTxn(ctx, w.sessCtx.GetStore(), true, func(ctx context.Context, txn kv.Transaction) error { taskCtx.addedCount = 0 taskCtx.scanCount = 0 txn.SetOption(kv.Priority, w.priority) @@ -508,7 +518,6 @@ func (w *backFillIndexWorker) fetchTempIndexVals(txn kv.Transaction, taskRange r taskDone := indexKey.Cmp(taskRange.endKey) > 0 if taskDone || len(w.tmpIdxRecords) >= w.batchCnt { - logutil.BgLogger().Info("return false") return false, nil } @@ -541,12 +550,6 @@ func (w *backFillIndexWorker) fetchTempIndexVals(txn kv.Transaction, taskRange r w.tmpIdxRecords = append(w.tmpIdxRecords, idxRecord) if bytes.Equal(keyVer, []byte("1")) { - // We need to add this lock to make sure pessimistic transaction can realize this operation. - // For the normal pessimistic transaction, it's ok. But if async commmit is used, it may lead to inconsistent data and index. - err = txn.LockKeys(context.Background(), new(kv.LockCtx), idxRecord.key) - if err != nil { - return false, errors.Trace(err) - } w.batchCheckTmpKeys = append(w.batchCheckTmpKeys, indexKey) w.tmpKeyPos = append(w.tmpKeyPos, pos) } diff --git a/ddl/index_lightning_test.go b/ddl/index_lightning_test.go index 9b0e862a88fad..36a4769d6b064 100644 --- a/ddl/index_lightning_test.go +++ b/ddl/index_lightning_test.go @@ -29,11 +29,11 @@ import ( ) func testLitAddIndex(tk *testkit.TestKit, t *testing.T, ctx sessionctx.Context, tblID int64, unique bool, indexName string, colName string, dom *domain.Domain) int64 { - un := "" + uniqueStr := "" if unique { - un = "unique" + uniqueStr = "unique" } - sql := fmt.Sprintf("alter table t add %s index %s(%s)", un, indexName, colName) + sql := fmt.Sprintf("alter table t add %s index %s(%s)", uniqueStr, indexName, colName) tk.MustExec(sql) idi, _ := strconv.Atoi(tk.MustQuery("admin show ddl jobs 1;").Rows()[0][0].(string)) @@ -77,7 +77,7 @@ func TestAddIndexLit(t *testing.T) { tk.MustExec("create table t (c1 int primary key, c2 int, c3 int)") tk.MustExec("insert t values (1, 1, 1), (2, 2, 2), (3, 3, 1);") tk.MustExec("set @@global.tidb_fast_ddl = on") - + var tableID int64 rs := tk.MustQuery("select TIDB_TABLE_ID from information_schema.tables where table_name='t' and table_schema='test';") tableIDi, _ := strconv.Atoi(rs.Rows()[0][0].(string)) diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index b383ac9f496f5..a37ab30543267 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -954,7 +954,7 @@ const ( DefStreamCountWhenMaxThreadsNotSet = 8 DefTiFlashFineGrainedShuffleBatchSize = 8192 DefTiDBFastDDL = false - DefTiDBDiskQuota = 100 * 1024 * 1024 * 1024 // 100GB + DefTiDBDiskQuota = 10 * 1024 * 1024 * 1024 // 100GB ) // Process global variables. From d6280cf93c5a329fa4a6d1619e022612e4e6f616 Mon Sep 17 00:00:00 2001 From: Benjamin2037 Date: Tue, 12 Jul 2022 22:38:59 +0800 Subject: [PATCH 08/16] frame changes --- ddl/lightning/backend.go | 21 +++++++++++++++++++++ ddl/lightning/env.go | 4 ++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/ddl/lightning/backend.go b/ddl/lightning/backend.go index d46f7c6e9d803..982f13a0d77a2 100644 --- a/ddl/lightning/backend.go +++ b/ddl/lightning/backend.go @@ -108,6 +108,11 @@ func GenBackendContextKey(jobID int64) string { // Adjust lightning memory parameters according memory root's max limitation func adjustImportMemory(cfg *config.Config) { var scale int64 + // Try agressive resource usage successful. + if tryAgressiveMemory(cfg) { + return + } + defaultMemSize := int64(cfg.TikvImporter.LocalWriterMemCacheSize) * int64(cfg.TikvImporter.RangeConcurrency) defaultMemSize += 4 * int64(cfg.TikvImporter.EngineMemCacheSize) log.L().Info(LitInfoInitMemSetting, @@ -134,6 +139,22 @@ func adjustImportMemory(cfg *config.Config) { zap.String("rangecounrrency:", strconv.Itoa(cfg.TikvImporter.RangeConcurrency))) } +// tryAgressiveMemory lightning memory parameters according memory root's max limitation +func tryAgressiveMemory(cfg *config.Config) bool { + var defaultMemSize int64 + defaultMemSize = int64(128 * _mb * cfg.TikvImporter.RangeConcurrency) + defaultMemSize += int64(cfg.TikvImporter.EngineMemCacheSize) + + if (defaultMemSize + GlobalEnv.LitMemRoot.currUsage) > GlobalEnv.LitMemRoot.maxLimit { + return false + } + log.L().Info(LitInfoChgMemSetting, + zap.String("LocalWriterMemCacheSize:", strconv.FormatInt(int64(cfg.TikvImporter.LocalWriterMemCacheSize), 10)), + zap.String("EngineMemCacheSize:", strconv.FormatInt(int64(cfg.TikvImporter.LocalWriterMemCacheSize), 10)), + zap.String("rangecounrrency:", strconv.Itoa(cfg.TikvImporter.RangeConcurrency))) + return true +} + type glueLit struct{} // Implement interface OwnsSQLExecutor diff --git a/ddl/lightning/env.go b/ddl/lightning/env.go index b14d4aa533b66..4a6b2d0eff9a8 100644 --- a/ddl/lightning/env.go +++ b/ddl/lightning/env.go @@ -96,9 +96,9 @@ func InitGolbalLightningBackendEnv() { sbz := variable.GetSysVar("sort_buffer_size") bufferSize, err = strconv.ParseUint(sbz.Value, 10, 64) // If get bufferSize err, then maxMemLimtation is 128 MB - // Otherwise, the ddl maxMemLimitation is 1 GB + // Otherwise, the ddl maxMemLimitation is 2 GB if err == nil { - maxMemLimit = bufferSize * 4 * _kb + maxMemLimit = bufferSize * 8 * _kb log.L().Info(LitInfoSetMemLimit, zap.String("Memory limitation set to:", strconv.FormatUint(maxMemLimit, 10))) } else { From 6f6dc384e465240c67f1d2d0d97f14f43d77752a Mon Sep 17 00:00:00 2001 From: Benjamin2037 Date: Tue, 12 Jul 2022 22:41:43 +0800 Subject: [PATCH 09/16] new backfill logic --- ddl/backfilling.go | 297 +++++------------------------------- ddl/ddl_worker.go | 18 --- ddl/delete_range.go | 37 ++--- ddl/index.go | 87 ++++++----- ddl/index_lightning.go | 40 ++++- ddl/index_lightning_test.go | 2 +- table/tables/index.go | 10 +- 7 files changed, 143 insertions(+), 348 deletions(-) diff --git a/ddl/backfilling.go b/ddl/backfilling.go index 8faeecaecbec8..902e4a1812f9b 100644 --- a/ddl/backfilling.go +++ b/ddl/backfilling.go @@ -921,7 +921,6 @@ func getIndexRangeEndKey(ctx *JobContext, store kv.Storage, priority int, t tabl func (w *worker) writeTempIndexRecord(t table.PhysicalTable, bfWorkerType backfillWorkerType, indexInfo *model.IndexInfo, oldColInfo, colInfo *model.ColumnInfo, reorgInfo *reorgInfo) error { job := reorgInfo.Job - totalAddedCount := job.GetRowCount() startKey, endKey := reorgInfo.StartKey, reorgInfo.EndKey @@ -932,159 +931,60 @@ func (w *worker) writeTempIndexRecord(t table.PhysicalTable, bfWorkerType backfi return nil } - failpoint.Inject("MockCaseWhenParseFailure", func(val failpoint.Value) { - if val.(bool) { - failpoint.Return(errors.New("job.ErrCount:" + strconv.Itoa(int(job.ErrorCount)) + ", mock unknown type: ast.whenClause.")) - } - }) jc := w.jobContext(job) - // variable.ddlReorgWorkerCounter can be modified by system variable "tidb_ddl_reorg_worker_cnt". - workerCnt := variable.GetDDLReorgWorkerCounter() - - mergeWorkers := make([]*backfillWorker, 0, workerCnt) + mergeWorkers := make([]*backfillWorker, 1) defer func() { closeBackfillWorkers(mergeWorkers) }() - for { - kvRanges, err := splitTableRanges(t, reorgInfo.d.store, startKey, endKey) - if err != nil { - return errors.Trace(err) - } - - // For dynamic adjust backfill worker number. - if err := loadDDLReorgVars(w); err != nil { - logutil.BgLogger().Error("[ddl] load DDL reorganization variable failed", zap.Error(err)) - } - workerCnt = variable.GetDDLReorgWorkerCounter() - // If only have 1 range, we can only start 1 worker. - if len(kvRanges) < int(workerCnt) { - workerCnt = int32(len(kvRanges)) - } - - // Enlarge the worker size. - for i := len(mergeWorkers); i < int(workerCnt); i++ { - sessCtx := newContext(reorgInfo.d.store) - sessCtx.GetSessionVars().StmtCtx.IsDDLJobInQueue = true - // Simulate the sql mode environment in the worker sessionCtx. - sqlMode := reorgInfo.ReorgMeta.SQLMode - sessCtx.GetSessionVars().SQLMode = sqlMode - if err := setSessCtxLocation(sessCtx, reorgInfo); err != nil { - return errors.Trace(err) - } - - sessCtx.GetSessionVars().StmtCtx.BadNullAsWarning = !sqlMode.HasStrictMode() - sessCtx.GetSessionVars().StmtCtx.TruncateAsWarning = !sqlMode.HasStrictMode() - sessCtx.GetSessionVars().StmtCtx.OverflowAsWarning = !sqlMode.HasStrictMode() - sessCtx.GetSessionVars().StmtCtx.AllowInvalidDate = sqlMode.HasAllowInvalidDatesMode() - sessCtx.GetSessionVars().StmtCtx.DividedByZeroAsWarning = !sqlMode.HasStrictMode() - sessCtx.GetSessionVars().StmtCtx.IgnoreZeroInDate = !sqlMode.HasStrictMode() || sqlMode.HasAllowInvalidDatesMode() - sessCtx.GetSessionVars().StmtCtx.NoZeroDate = sqlMode.HasStrictMode() - - idxWorker := newTempIndexWorker(sessCtx, w, i, t, indexInfo, reorgInfo, jc) - idxWorker.priority = job.Priority - mergeWorkers = append(mergeWorkers, idxWorker.backfillWorker) - go idxWorker.backfillWorker.runMerge(reorgInfo.d, idxWorker, job) - } - // Shrink the worker size. - if len(mergeWorkers) > int(workerCnt) { - workers := mergeWorkers[workerCnt:] - mergeWorkers = mergeWorkers[:workerCnt] - closeBackfillWorkers(workers) - } - - failpoint.Inject("checkMergeWorkerNum", func(val failpoint.Value) { - if val.(bool) { - num := int(atomic.LoadInt32(&TestCheckWorkerNumber)) - if num != 0 { - if num > len(kvRanges) { - if len(mergeWorkers) != len(kvRanges) { - failpoint.Return(errors.Errorf("check merge worker num error, len kv ranges is: %v, check merge worker num is: %v, actual record num is: %v", len(kvRanges), num, len(mergeWorkers))) - } - } else if num != len(mergeWorkers) { - failpoint.Return(errors.Errorf("check merge worker num error, len kv ranges is: %v, check merge worker num is: %v, actual record num is: %v", len(kvRanges), num, len(mergeWorkers))) - } - var wg sync.WaitGroup - wg.Add(1) - TestCheckWorkerNumCh <- &wg - wg.Wait() - } - } - }) - - logutil.BgLogger().Info("[ddl] start merge workers to merge delta index changes", - zap.Int("workerCnt", len(mergeWorkers)), - zap.Int("regionCnt", len(kvRanges)), - zap.String("startHandle", tryDecodeToHandleString(startKey)), - zap.String("endHandle", tryDecodeToHandleString(endKey))) - - remains, err := w.sendRangeTaskToMergeWorkers(t, mergeWorkers, reorgInfo, &totalAddedCount, kvRanges, t.GetPhysicalID()) - if err != nil { - return errors.Trace(err) - } - if len(remains) == 0 { - break - } - startKey = remains[0].StartKey - if err != nil { - return errors.Trace(err) - } + // For dynamic adjust backfill worker number. + if err := loadDDLReorgVars(w); err != nil { + logutil.BgLogger().Error("[ddl] load DDL reorganization variable failed", zap.Error(err)) } - return nil -} - -func (w *backfillWorker) runMerge(d *ddlCtx, bf backfiller, job *model.Job) { - logutil.BgLogger().Info("[ddl] merge worker start", zap.Int("workerID", w.id)) - defer func() { - w.resultCh <- &backfillResult{err: dbterror.ErrReorgPanic} - }() - defer util.Recover(metrics.LabelDDL, "backfillWorker.run", nil, false) - for { - task, more := <-w.taskCh - if !more { - break - } - d.setDDLLabelForTopSQL(job) - logutil.BgLogger().Debug("[ddl] merge worker got task", zap.Int("workerID", w.id), zap.String("task", task.String())) - failpoint.Inject("mockMergeRunErr", func() { - if w.id == 0 { - result := &backfillResult{addedCount: 0, nextKey: nil, err: errors.Errorf("mock backfill error")} - w.resultCh <- result - failpoint.Continue() - } - }) + sessCtx := newContext(reorgInfo.d.store) + sessCtx.GetSessionVars().StmtCtx.IsDDLJobInQueue = true + // Simulate the sql mode environment in the worker sessionCtx. + sqlMode := reorgInfo.ReorgMeta.SQLMode + sessCtx.GetSessionVars().SQLMode = sqlMode + if err := setSessCtxLocation(sessCtx, reorgInfo); err != nil { + return errors.Trace(err) + } - failpoint.Inject("mockHighLoadForMergeIndex", func() { - sqlPrefixes := []string{"alter"} - topsql.MockHighCPULoad(job.Query, sqlPrefixes, 5) - }) + sessCtx.GetSessionVars().StmtCtx.BadNullAsWarning = !sqlMode.HasStrictMode() + sessCtx.GetSessionVars().StmtCtx.TruncateAsWarning = !sqlMode.HasStrictMode() + sessCtx.GetSessionVars().StmtCtx.OverflowAsWarning = !sqlMode.HasStrictMode() + sessCtx.GetSessionVars().StmtCtx.AllowInvalidDate = sqlMode.HasAllowInvalidDatesMode() + sessCtx.GetSessionVars().StmtCtx.DividedByZeroAsWarning = !sqlMode.HasStrictMode() + sessCtx.GetSessionVars().StmtCtx.IgnoreZeroInDate = !sqlMode.HasStrictMode() || sqlMode.HasAllowInvalidDatesMode() + sessCtx.GetSessionVars().StmtCtx.NoZeroDate = sqlMode.HasStrictMode() - failpoint.Inject("mockMergeSlow", func() { - time.Sleep(30 * time.Millisecond) - }) + logutil.BgLogger().Info("[ddl] start merge workers to merge delta index changes", + zap.String("startHandle", tryDecodeToHandleString(startKey)), + zap.String("endHandle", tryDecodeToHandleString(endKey))) - // Dynamic change batch size. - w.batchCnt = int(variable.GetDDLReorgBatchSize()) - result := w.handleMergeTask(d, task, bf) - w.resultCh <- result + idxWorker := newTempIndexWorker(sessCtx, w, t, indexInfo, reorgInfo, jc) + idxWorker.priority = job.Priority + mergeWorkers = append(mergeWorkers, idxWorker.backfillWorker) + // Dynamic change batch size. + idxWorker.batchCnt = int(variable.GetDDLReorgBatchSize()) + task := &reorgBackfillTask{ + physicalTableID: t.GetPhysicalID(), + startKey: startKey, + endKey: endKey} + err := idxWorker.backfillWorker.handleMergeTask(reorgInfo.d, task, idxWorker) + if err != nil { + return errors.Trace(err) } - logutil.BgLogger().Info("[ddl] merge worker exit", zap.Int("workerID", w.id)) + return nil } // handleMergeTask backfills range [task.startHandle, task.endHandle) handle's index to table. -func (w *backfillWorker) handleMergeTask(d *ddlCtx, task *reorgBackfillTask, bf backfiller) *backfillResult { +func (w *backfillWorker) handleMergeTask(d *ddlCtx, task *reorgBackfillTask, bf backfiller) error { handleRange := *task - result := &backfillResult{ - err: nil, - addedCount: 0, - nextKey: handleRange.startKey, - } - lastLogCount := 0 lastLogTime := time.Now() startTime := lastLogTime - rc := d.getReorgCtx(w.reorgInfo.Job) for { // Give job chance to be canceled, if we not check it here, @@ -1093,38 +993,12 @@ func (w *backfillWorker) handleMergeTask(d *ddlCtx, task *reorgBackfillTask, bf // we should check whether this ddl job is still runnable. err := d.isReorgRunnable(w.reorgInfo.Job) if err != nil { - result.err = err - return result + return err } taskCtx, err := bf.BackfillDataInTxn(handleRange) if err != nil { - result.err = err - return result - } - - mergeBackfillCtxToResult(&taskCtx, result) - - // Although `handleRange` is for data in one region, but back fill worker still split it into many - // small reorg batch size slices and reorg them in many different kv txn. - // If a task failed, it may contained some committed small kv txn which has already finished the - // small range reorganization. - // In the next round of reorganization, the target handle range may overlap with last committed - // small ranges. This will cause the `redo` action in reorganization. - // So for added count and warnings collection, it is recommended to collect the statistics in every - // successfully committed small ranges rather than fetching it in the total result. - rc.increaseRowCount(int64(taskCtx.addedCount)) - rc.mergeWarnings(taskCtx.warnings, taskCtx.warningsCount) - - if num := result.scanCount - lastLogCount; num >= 30000 { - lastLogCount = result.scanCount - logutil.BgLogger().Info("[ddl] backfill worker back fill index", - zap.Int("workerID", w.id), - zap.Int("addedCount", result.addedCount), - zap.Int("scanCount", result.scanCount), - zap.String("nextHandle", tryDecodeToHandleString(taskCtx.nextKey)), - zap.Float64("speed(rows/s)", float64(num)/time.Since(lastLogTime).Seconds())) - lastLogTime = time.Now() + return err } handleRange.startKey = taskCtx.nextKey @@ -1134,101 +1008,6 @@ func (w *backfillWorker) handleMergeTask(d *ddlCtx, task *reorgBackfillTask, bf } logutil.BgLogger().Info("[ddl] merge worker finish task", zap.Int("workerID", w.id), zap.String("task", task.String()), - zap.Int("addedCount", result.addedCount), - zap.Int("scanCount", result.scanCount), - zap.String("nextHandle", tryDecodeToHandleString(result.nextKey)), zap.String("takeTime", time.Since(startTime).String())) - return result -} - -// sendRangeTaskToWorkers sends tasks to workers, and returns remaining kvRanges that is not handled. -func (w *worker) sendRangeTaskToMergeWorkers(t table.Table, workers []*backfillWorker, reorgInfo *reorgInfo, - totalAddedCount *int64, kvRanges []kv.KeyRange, phyicID int64) ([]kv.KeyRange, error) { - batchTasks := make([]*reorgBackfillTask, 0, len(workers)) - physicalTableID := phyicID - - // Build reorg tasks. - for _, keyRange := range kvRanges { - endKey := keyRange.EndKey - endK, err := getIndexRangeEndKey(reorgInfo.d.jobContext(reorgInfo.Job), workers[0].sessCtx.GetStore(), workers[0].priority, t, keyRange.StartKey, endKey) - if err != nil { - logutil.BgLogger().Info("[ddl] send range task to workers, get reverse key failed", zap.Error(err)) - } else { - logutil.BgLogger().Info("[ddl] send range task to workers, change end key", - zap.String("end key", tryDecodeToHandleString(endKey)), zap.String("current end key", tryDecodeToHandleString(endK))) - endKey = endK - } - - task := &reorgBackfillTask{ - physicalTableID: physicalTableID, - startKey: keyRange.StartKey, - endKey: endKey} - batchTasks = append(batchTasks, task) - - if len(batchTasks) >= len(workers) { - break - } - } - - if len(batchTasks) == 0 { - return nil, nil - } - - // Wait tasks finish. - err := w.handleMergeTasks(reorgInfo, totalAddedCount, workers, batchTasks) - if err != nil { - return nil, errors.Trace(err) - } - - if len(batchTasks) < len(kvRanges) { - // There are kvRanges not handled. - remains := kvRanges[len(batchTasks):] - return remains, nil - } - - return nil, nil -} - -// handleReorgTasks sends tasks to workers, and waits for all the running workers to return results, -// there are taskCnt running workers. -func (w *worker) handleMergeTasks(reorgInfo *reorgInfo, totalAddedCount *int64, workers []*backfillWorker, batchTasks []*reorgBackfillTask) error { - for i, task := range batchTasks { - workers[i].taskCh <- task - } - - startKey := batchTasks[0].startKey - taskCnt := len(batchTasks) - startTime := time.Now() - nextKey, taskAddedCount, err := w.waitTaskResults(workers, taskCnt, totalAddedCount, startKey) - elapsedTime := time.Since(startTime) - if err == nil { - err = w.isReorgRunnable(reorgInfo.Job) - } - - if err != nil { - err := reorgInfo.UpdateReorgMeta(nextKey) - metrics.BatchAddIdxHistogram.WithLabelValues(metrics.LblError).Observe(elapsedTime.Seconds()) - logutil.BgLogger().Warn("[ddl] merge worker handle batch tasks failed", - zap.ByteString("elementType", reorgInfo.currElement.TypeKey), - zap.Int64("elementID", reorgInfo.currElement.ID), - zap.Int64("totalAddedCount", *totalAddedCount), - zap.String("startHandle", tryDecodeToHandleString(startKey)), - zap.String("nextHandle", tryDecodeToHandleString(nextKey)), - zap.Int64("batchAddedCount", taskAddedCount), - zap.String("taskFailedError", err.Error()), - zap.String("takeTime", elapsedTime.String()), - zap.NamedError("updateHandleError", err)) - return errors.Trace(err) - } - // nextHandle will be updated periodically in runReorgJob, so no need to update it here. - w.getReorgCtx(reorgInfo.Job).setNextKey(nextKey) - logutil.BgLogger().Info("[ddl] Merge workers successfully processed batch", - zap.ByteString("elementType", reorgInfo.currElement.TypeKey), - zap.Int64("elementID", reorgInfo.currElement.ID), - zap.Int64("totalAddedCount", *totalAddedCount), - zap.String("startHandle", tryDecodeToHandleString(startKey)), - zap.String("nextHandle", tryDecodeToHandleString(nextKey)), - zap.Int64("batchAddedCount", taskAddedCount), - zap.String("takeTime", elapsedTime.String())) return nil } diff --git a/ddl/ddl_worker.go b/ddl/ddl_worker.go index de3c3ed05c756..0353280a578b4 100644 --- a/ddl/ddl_worker.go +++ b/ddl/ddl_worker.go @@ -452,24 +452,6 @@ func jobNeedGC(job *model.Job) bool { } switch job.Type { case model.ActionAddIndex, model.ActionAddPrimaryKey: - if job.State != model.JobStateRollbackDone { - // When using lightning backfill, the job.Args length > 0 - var indexID int64 = 0 - var partitionIDs []int64 - err := job.DecodeArgs(&indexID, &partitionIDs) - if indexID == 0 { - // Means there is no temp index ID stored in jobArgs. - return false - } - if err != nil { - logutil.BgLogger().Info("Lightning clean temp index data failed, please clean it manually,", zap.String("Job Args:", job.String()), - zap.String("RawArgs:", string(job.RawArgs))) - return false - } - if indexID != 0 { - return true - } - } return true case model.ActionDropSchema, model.ActionDropTable, model.ActionTruncateTable, model.ActionDropIndex, model.ActionDropPrimaryKey, model.ActionDropTablePartition, model.ActionTruncateTablePartition, model.ActionDropColumn, model.ActionModifyColumn: diff --git a/ddl/delete_range.go b/ddl/delete_range.go index ab1c7a3d41e42..bd2a1ccc7408a 100644 --- a/ddl/delete_range.go +++ b/ddl/delete_range.go @@ -329,44 +329,37 @@ func insertJobIntoDeleteRangeTable(ctx context.Context, sctx sessionctx.Context, if err := job.DecodeArgs(&indexID, &ifExists, &partitionIDs); err != nil { return errors.Trace(err) } - eid := tablecodec.TempIndexPrefix | indexID - newBackfill := false - // If indexID is temp id, means it is go through new backfill - if eid == indexID { - // eid represent origin index id. - eid = tablecodec.IndexIDMask & indexID - newBackfill = true - } + tmpID := tablecodec.TempIndexPrefix | indexID if len(partitionIDs) > 0 { for _, pid := range partitionIDs { - startKey := tablecodec.EncodeTableIndexPrefix(pid, indexID) - endKey := tablecodec.EncodeTableIndexPrefix(pid, indexID+1) - elemID := ea.allocForIndexID(pid, indexID) + startKey := tablecodec.EncodeTableIndexPrefix(pid, tmpID) + endKey := tablecodec.EncodeTableIndexPrefix(pid, tmpID+1) + elemID := ea.allocForIndexID(pid, tmpID) if err := doInsert(ctx, s, job.ID, elemID, startKey, endKey, now, fmt.Sprintf("partition table ID is %d", pid)); err != nil { return errors.Trace(err) } // Clean temp index data to avoid Garbage data that generate from adding index with lightning backfill data - if job.State == model.JobStateRollbackDone && newBackfill { - startKey := tablecodec.EncodeTableIndexPrefix(pid, eid) - endKey := tablecodec.EncodeTableIndexPrefix(pid, eid+1) - elemID := ea.allocForIndexID(pid, eid) + if job.State == model.JobStateRollbackDone { + startKey := tablecodec.EncodeTableIndexPrefix(pid, indexID) + endKey := tablecodec.EncodeTableIndexPrefix(pid, indexID+1) + elemID := ea.allocForIndexID(pid, indexID) if err := doInsert(ctx, s, job.ID, elemID, startKey, endKey, now, fmt.Sprintf("partition table ID is %d", pid)); err != nil { return errors.Trace(err) } } } } else { - startKey := tablecodec.EncodeTableIndexPrefix(tableID, indexID) - endKey := tablecodec.EncodeTableIndexPrefix(tableID, indexID+1) - elemID := ea.allocForIndexID(tableID, indexID) + startKey := tablecodec.EncodeTableIndexPrefix(tableID, tmpID) + endKey := tablecodec.EncodeTableIndexPrefix(tableID, tmpID+1) + elemID := ea.allocForIndexID(tableID, tmpID) if err := doInsert(ctx, s, job.ID, elemID, startKey, endKey, now, fmt.Sprintf("table ID is %d", tableID)); err != nil { return errors.Trace(err) } // Clean temp index data to avoid Garbage data that generate from adding index with lightning backfill data - if job.State == model.JobStateRollbackDone && newBackfill { - startKey := tablecodec.EncodeTableIndexPrefix(tableID, eid) - endKey := tablecodec.EncodeTableIndexPrefix(tableID, eid+1) - elemID := ea.allocForIndexID(tableID, eid) + if job.State == model.JobStateRollbackDone { + startKey := tablecodec.EncodeTableIndexPrefix(tableID, indexID) + endKey := tablecodec.EncodeTableIndexPrefix(tableID, indexID+1) + elemID := ea.allocForIndexID(tableID, indexID) if err := doInsert(ctx, s, job.ID, elemID, startKey, endKey, now, fmt.Sprintf("table ID is %d", tableID)); err != nil { return errors.Trace(err) } diff --git a/ddl/index.go b/ddl/index.go index 7c0311f629ef7..92420cd00d5ab 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -600,13 +600,8 @@ func (w *worker) onCreateIndex(d *ddlCtx, t *meta.Meta, job *model.Job, isPK boo } indexInfo.State = model.StatePublic - // Set eid to temp index value, if add index follow the new backfill flow. - var eid uint64 = 0 - if indexInfo.SubState != model.StateNone { - // After merge data into TiKV, then the progress set to 100. - metrics.GetBackfillProgressByLabel(metrics.LblAddIndex).Set(100) - eid = codec.EncodeIntToCmpUint(tablecodec.TempIndexPrefix | indexInfo.ID) - } + // After merge data into TiKV, then the progress set to 100. + metrics.GetBackfillProgressByLabel(metrics.LblAddIndex).Set(100) // Set sub state to stateNone to stop double write indexInfo.SubState = model.StateNone // Set column index flag. @@ -624,10 +619,8 @@ func (w *worker) onCreateIndex(d *ddlCtx, t *meta.Meta, job *model.Job, isPK boo // Finish this job. job.FinishTableJob(model.JobStateDone, model.StatePublic, ver, tblInfo) // Clean temp index if needed - if eid != 0 { - job.Args =job.Args[:0] - job.Args = []interface{}{eid, getPartitionIDs(tblInfo)} - } + job.Args = job.Args[:0] + job.Args = []interface{}{indexInfo.ID, getPartitionIDs(tblInfo)} default: err = dbterror.ErrInvalidDDLState.GenWithStackByArgs("index", tblInfo.State) } @@ -719,16 +712,40 @@ func goFastDDLBackfill(w *worker, d *ddlCtx, t *meta.Meta, job *model.Job, } return false, ver, nil case model.StateMerge: + logutil.BgLogger().Info("Lightning start merge the increment part of adding index") + err = w.runMergeJob(rh, reorgInfo, tbl.Meta(), d.lease, func() (addIndexErr error) { + defer util.Recover(metrics.LabelDDL, "onMergeIndex", + func() { + addIndexErr = dbterror.ErrCancelledDDLJob.GenWithStack("merge table `%v` index `%v` panic", tbl.Meta().Name, indexInfo.Name) + }, false) + return w.mergeTempIndex(tbl, indexInfo, reorgInfo) + }) if err != nil { - logutil.BgLogger().Info("Lightning start merge init merge reorg info err", zap.Error(err)) - return false, ver, errors.Trace(err) + return false, 0, errors.Trace(err) } - logutil.BgLogger().Info("Lightning start merge the increment part of adding index") + logutil.BgLogger().Info("Lightning finished merge the increment part of adding index") return true, ver, nil default: return false, 0, errors.New("Lightning go fast path wrong sub states: should not happened") } } + + // Original backfill need also merge temp index data. + if indexInfo.SubState == model.StatePublic { + logutil.BgLogger().Info("Not Lightning start merge the increment part of adding index") + err = w.runMergeJob(rh, reorgInfo, tbl.Meta(), d.lease, func() (addIndexErr error) { + defer util.Recover(metrics.LabelDDL, "onMergeIndex", + func() { + addIndexErr = dbterror.ErrCancelledDDLJob.GenWithStack("merge table `%v` index `%v` panic", tbl.Meta().Name, indexInfo.Name) + }, false) + return w.mergeTempIndex(tbl, indexInfo, reorgInfo) + }) + if err != nil { + return false, 0, errors.Trace(err) + } + logutil.BgLogger().Info("Not Lightning finished merge the increment part of adding index") + return true, ver, nil + } return false, ver, nil } @@ -756,23 +773,13 @@ func doReorgWorkForCreateIndex(w *worker, d *ddlCtx, t *meta.Meta, job *model.Jo } } - if indexInfo.SubState == model.StateMerge { - err = w.runMergeJob(rh, reorgInfo, tbl.Meta(), d.lease, func() (addIndexErr error) { - defer util.Recover(metrics.LabelDDL, "onMergeIndex", - func() { - addIndexErr = dbterror.ErrCancelledDDLJob.GenWithStack("merge table `%v` index `%v` panic", tbl.Meta().Name, indexInfo.Name) - }, false) - return w.mergeTempIndex(tbl, indexInfo, reorgInfo) - }) - } else { - err = w.runReorgJob(rh, reorgInfo, tbl.Meta(), d.lease, func() (addIndexErr error) { - defer util.Recover(metrics.LabelDDL, "onCreateIndex", - func() { - addIndexErr = dbterror.ErrCancelledDDLJob.GenWithStack("add table `%v` index `%v` panic", tbl.Meta().Name, indexInfo.Name) - }, false) - return w.addTableIndex(tbl, indexInfo, reorgInfo) - }) - } + err = w.runReorgJob(rh, reorgInfo, tbl.Meta(), d.lease, func() (addIndexErr error) { + defer util.Recover(metrics.LabelDDL, "onCreateIndex", + func() { + addIndexErr = dbterror.ErrCancelledDDLJob.GenWithStack("add table `%v` index `%v` panic", tbl.Meta().Name, indexInfo.Name) + }, false) + return w.addTableIndex(tbl, indexInfo, reorgInfo) + }) if err != nil { if dbterror.ErrWaitReorgTimeout.Equal(err) { // if timeout, we should return, check for the owner and re-wait job done. @@ -801,29 +808,31 @@ func doReorgWorkForCreateIndex(w *worker, d *ddlCtx, t *meta.Meta, job *model.Jo return false, ver, errors.Trace(err) } - done = false if isLightningEnabled(job.ID) { indexInfo.SubState = model.StateMergeSync ver, err = updateVersionAndTableInfo(d, t, job, tbl.Meta(), true) - if err != nil { return false, ver, errors.Trace(err) } - //Init reorg infor for merge task. + //Init merge reorgInfo for merge temp index task. job.SnapshotVer = 0 - reorgInfo, err = getMergeReorgInfo(d.jobContext(job), d, rh, job, tbl, elements, indexInfo.ID) + _, err = getMergeReorgInfo(d.jobContext(job), d, rh, job, tbl, elements, indexInfo.ID) if err != nil { return false, ver, errors.Trace(err) } } else { - // Check if reorg task finished. - if indexInfo.SubState == model.StateNone || indexInfo.SubState == model.StateMerge { - done = true + // Only indexInfo.SubState == model.StateNone, origin backfill flow. + if indexInfo.SubState == model.StateNone { + indexInfo.SubState = model.StatePublic + ver, err = updateVersionAndTableInfo(d, t, job, tbl.Meta(), true) + if err != nil { + return false, ver, errors.Trace(err) + } } } // Cleanup lightning environment cleanUpLightningEnv(reorgInfo, false) - return done, ver, errors.Trace(err) + return false, ver, errors.Trace(err) } func onDropIndex(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64, _ error) { diff --git a/ddl/index_lightning.go b/ddl/index_lightning.go index 96cbdbef9a746..af046f1ad46c1 100644 --- a/ddl/index_lightning.go +++ b/ddl/index_lightning.go @@ -389,15 +389,15 @@ type backFillIndexWorker struct { tmpIdxRecords []*temporaryIndexRecord batchCheckTmpKeys []kv.Key tmpKeyPos []int32 - jobContext *JobContext + jobContext *JobContext } -func newTempIndexWorker(sessCtx sessionctx.Context, worker *worker, id int, t table.PhysicalTable, indexInfo *model.IndexInfo, reorgInfo *reorgInfo, jc *JobContext) *backFillIndexWorker { +func newTempIndexWorker(sessCtx sessionctx.Context, worker *worker, t table.PhysicalTable, indexInfo *model.IndexInfo, reorgInfo *reorgInfo, jc *JobContext) *backFillIndexWorker { index := tables.NewIndex(t.GetPhysicalID(), t.Meta(), indexInfo) // Add build openengine process. return &backFillIndexWorker{ - backfillWorker: newBackfillWorker(sessCtx, id, t, reorgInfo), + backfillWorker: newBackfillWorker(sessCtx, 0, t, reorgInfo), index: index, jobContext: jc, } @@ -485,7 +485,7 @@ func (w *worker) mergeTempIndex(t table.Table, idx *model.IndexInfo, reorgInfo * if err != nil { break } - finish, err = w.updateReorgInfo(tbl, reorgInfo) + finish, err = w.updateMergeInfo(tbl, idx.ID, reorgInfo) if err != nil { return errors.Trace(err) } @@ -496,6 +496,38 @@ func (w *worker) mergeTempIndex(t table.Table, idx *model.IndexInfo, reorgInfo * return errors.Trace(err) } +// updateReorgInfo will find the next partition according to current reorgInfo. +// If no more partitions, or table t is not a partitioned table, returns true to +// indicate that the reorganize work is finished. +func (w *worker) updateMergeInfo(t table.PartitionedTable, idxID int64, reorg *reorgInfo) (bool, error) { + pi := t.Meta().GetPartitionInfo() + if pi == nil { + return true, nil + } + + pid, err := findNextPartitionID(reorg.PhysicalTableID, pi.Definitions) + if err != nil { + // Fatal error, should not run here. + logutil.BgLogger().Error("[ddl] find next partition ID failed", zap.Reflect("table", t), zap.Error(err)) + return false, errors.Trace(err) + } + if pid == 0 { + // Next partition does not exist, all the job done. + return true, nil + } + + start, end := tablecodec.GetTableIndexKeyRange(pid, tablecodec.TempIndexPrefix|idxID) + + reorg.StartKey, reorg.EndKey, reorg.PhysicalTableID = start, end, pid + // Write the reorg info to store so the whole reorganize process can recover from panic. + err = reorg.UpdateReorgMeta(reorg.StartKey) + logutil.BgLogger().Info("[ddl] job update MergeInfo", zap.Int64("jobID", reorg.Job.ID), + zap.ByteString("elementType", reorg.currElement.TypeKey), zap.Int64("elementID", reorg.currElement.ID), + zap.Int64("partitionTableID", pid), zap.String("startHandle", tryDecodeToHandleString(start)), + zap.String("endHandle", tryDecodeToHandleString(end)), zap.Error(err)) + return false, errors.Trace(err) +} + func (w *worker) addPhysicalTempIndex(t table.PhysicalTable, indexInfo *model.IndexInfo, reorgInfo *reorgInfo) error { logutil.BgLogger().Info("[ddl] start to merge temp index", zap.String("job", reorgInfo.Job.String()), zap.String("reorgInfo", reorgInfo.String())) return w.writeTempIndexRecord(t, typeAddIndexWorker, indexInfo, nil, nil, reorgInfo) diff --git a/ddl/index_lightning_test.go b/ddl/index_lightning_test.go index 36a4769d6b064..87700ce72f93d 100644 --- a/ddl/index_lightning_test.go +++ b/ddl/index_lightning_test.go @@ -77,7 +77,7 @@ func TestAddIndexLit(t *testing.T) { tk.MustExec("create table t (c1 int primary key, c2 int, c3 int)") tk.MustExec("insert t values (1, 1, 1), (2, 2, 2), (3, 3, 1);") tk.MustExec("set @@global.tidb_fast_ddl = on") - + var tableID int64 rs := tk.MustQuery("select TIDB_TABLE_ID from information_schema.tables where table_name='t' and table_schema='test';") tableIDi, _ := strconv.Atoi(rs.Rows()[0][0].(string)) diff --git a/table/tables/index.go b/table/tables/index.go index 5d1e95500582d..3e56719999266 100644 --- a/table/tables/index.go +++ b/table/tables/index.go @@ -122,9 +122,9 @@ func (c *index) Create(sctx sessionctx.Context, txn kv.Transaction, indexedValue ) if c.idxInfo.State == model.StateWriteReorganization && !c.Isbackfill { switch c.idxInfo.SubState { - case model.StateNone: - // do nothing. - case model.StateBackfillSync, model.StateBackfill: + case model.StatePublic: + // Do nothing. + case model.StateNone, model.StateBackfillSync, model.StateBackfill: // Write to the temporary index. keyVer = []byte("1") tablecodec.IndexKey2TempIndexKey(c.idxInfo.ID, key) @@ -308,9 +308,9 @@ func (c *index) Delete(sc *stmtctx.StatementContext, txn kv.Transaction, indexed ) if c.idxInfo.State == model.StateWriteReorganization { switch c.idxInfo.SubState { - case model.StateNone: + case model.StatePublic: // Do nothing. - case model.StateBackfillSync, model.StateBackfill: + case model.StateNone, model.StateBackfillSync, model.StateBackfill: // Write to the temporary index. keyVer = []byte("1") tempKey = append(tempKey, key...) From 89eb51f6fb6f7765b9feb25314466b23e8c4c701 Mon Sep 17 00:00:00 2001 From: Benjamin2037 Date: Tue, 12 Jul 2022 22:48:04 +0800 Subject: [PATCH 10/16] set disk quota min value to 100 GB --- sessionctx/variable/tidb_vars.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index a37ab30543267..b383ac9f496f5 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -954,7 +954,7 @@ const ( DefStreamCountWhenMaxThreadsNotSet = 8 DefTiFlashFineGrainedShuffleBatchSize = 8192 DefTiDBFastDDL = false - DefTiDBDiskQuota = 10 * 1024 * 1024 * 1024 // 100GB + DefTiDBDiskQuota = 100 * 1024 * 1024 * 1024 // 100GB ) // Process global variables. From bdd3936a243b5f03c3e11b2b8e5c2a69827a4925 Mon Sep 17 00:00:00 2001 From: Benjamin2037 Date: Wed, 13 Jul 2022 00:41:34 +0800 Subject: [PATCH 11/16] refactor new backfiller --- ddl/column.go | 2 +- ddl/ddl_worker.go | 26 +++++- ddl/index.go | 198 ++++++++++++++++++++--------------------- ddl/index_lightning.go | 108 +++++++++++++--------- table/tables/index.go | 9 +- 5 files changed, 195 insertions(+), 148 deletions(-) diff --git a/ddl/column.go b/ddl/column.go index 9bf3411b25f2d..2a8898a3dbdba 100644 --- a/ddl/column.go +++ b/ddl/column.go @@ -564,7 +564,7 @@ func (w *worker) onModifyColumn(d *ddlCtx, t *meta.Meta, job *model.Job) (ver in initAndAddColumnToTable(tblInfo, changingCol) indexesToChange := findRelatedIndexesToChange(tblInfo, oldCol.Name) for _, info := range indexesToChange { - newIdxID := allocateIndexID(tblInfo) + newIdxID := allocateindexID(tblInfo) if !info.isTemp { // We create a temp index for each normal index. tmpIdx := info.indexInfo.Clone() diff --git a/ddl/ddl_worker.go b/ddl/ddl_worker.go index 0353280a578b4..5f9780957846a 100644 --- a/ddl/ddl_worker.go +++ b/ddl/ddl_worker.go @@ -452,7 +452,31 @@ func jobNeedGC(job *model.Job) bool { } switch job.Type { case model.ActionAddIndex, model.ActionAddPrimaryKey: - return true + var needGC bool = false + if job.State != model.JobStateRollbackDone { + // When using lightning backfill, the job.Args length > 0 + var indexID int64 = 0 + var partitionIDs []int64 + err := job.DecodeArgs(&indexID, &partitionIDs) + if err != nil { + // Get index id error. + if indexID != 0 { + logutil.BgLogger().Info("Lightning clean temp index data failed, please clean it manually,", + zap.String("Job Args:", job.String()), + zap.String("RawArgs:", string(job.RawArgs))) + } + return false + } + if indexID != 0 { + needGC = true + } + } else { + needGC = true + } + // After rolling back an AddIndex operation, we need to use delete-range to delete the half-done index data. + if needGC { + return true + } case model.ActionDropSchema, model.ActionDropTable, model.ActionTruncateTable, model.ActionDropIndex, model.ActionDropPrimaryKey, model.ActionDropTablePartition, model.ActionTruncateTablePartition, model.ActionDropColumn, model.ActionModifyColumn: return true diff --git a/ddl/index.go b/ddl/index.go index 92420cd00d5ab..693b95ea16cf4 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -547,6 +547,17 @@ func (w *worker) onCreateIndex(d *ddlCtx, t *meta.Meta, job *model.Job, isPK boo originalState := indexInfo.State switch indexInfo.State { case model.StateNone: + // whether the new backfiller will be used or not. + if IsAllowFastDDL() { + err = prepareBackend(w.ctx, indexInfo.Unique, job, job.ReorgMeta.SQLMode) + if err == nil { + setLightningEnabled(job.ID, true) + } else { + indexInfo.SubState = model.StatePublic + } + } else { + indexInfo.SubState = model.StatePublic + } // none -> delete only indexInfo.State = model.StateDeleteOnly moveAndUpdateHiddenColumnsToPublic(tblInfo, indexInfo) @@ -600,10 +611,15 @@ func (w *worker) onCreateIndex(d *ddlCtx, t *meta.Meta, job *model.Job, isPK boo } indexInfo.State = model.StatePublic - // After merge data into TiKV, then the progress set to 100. - metrics.GetBackfillProgressByLabel(metrics.LblAddIndex).Set(100) - // Set sub state to stateNone to stop double write - indexInfo.SubState = model.StateNone + var tmpIndexID uint64 + if indexInfo.SubState != model.StatePublic { + // After merge data into TiKV, then the progress set to 100. + metrics.GetBackfillProgressByLabel(metrics.LblAddIndex).Set(100) + // Set sub state to stateNone to stop double write + indexInfo.SubState = model.StatePublic + tmpIndexID = codec.EncodeIntToCmpUint(tablecodec.TempIndexPrefix | indexInfo.ID) + } + // Set column index flag. addIndexColumnFlag(tblInfo, indexInfo) if isPK { @@ -618,9 +634,11 @@ func (w *worker) onCreateIndex(d *ddlCtx, t *meta.Meta, job *model.Job, isPK boo } // Finish this job. job.FinishTableJob(model.JobStateDone, model.StatePublic, ver, tblInfo) - // Clean temp index if needed - job.Args = job.Args[:0] - job.Args = []interface{}{indexInfo.ID, getPartitionIDs(tblInfo)} + if tmpIndexID != 0 { + // Clean temp index if needed + job.Args = job.Args[:0] + job.Args = []interface{}{tmpIndexID, getPartitionIDs(tblInfo)} + } default: err = dbterror.ErrInvalidDDLState.GenWithStackByArgs("index", tblInfo.State) } @@ -645,94 +663,77 @@ func goFastDDLBackfill(w *worker, d *ddlCtx, t *meta.Meta, job *model.Job, tbl table.Table, indexInfo *model.IndexInfo, reorgInfo *reorgInfo, elements []*meta.Element, rh *reorgHandler) (reorg bool, ver int64, err error) { var restoreReorg bool = false - // This is used to restore reorg task if it interrupt during backfill state and TiDB owner not change or restart. - if isLightningEnabled(job.ID) && needRestoreJob(job.ID) { - // If reorg task can not be restore with lightning execution, should restart reorg task to keep data consist. - if !canRestoreReorgTask(job, indexInfo.ID) { - reorgInfo, err = getReorgInfo(d.jobContext(job), d, rh, job, tbl, elements) - if err != nil || reorgInfo.first { - return false, ver, errors.Trace(err) - } + // When sub state is StatePublic means not go fast ddl path. + if indexInfo.SubState == model.StatePublic { + return false, 0, nil + } + + // If enter this backfill flow,then need finished it。 + switch indexInfo.SubState { + case model.StateNone: + logutil.BgLogger().Info("Lightning backfill start state none") + indexInfo.SubState = model.StateBackfillSync + ver, err = updateVersionAndTableInfo(d, t, job, tbl.Meta(), true) + if err != nil { + return false, ver, errors.Trace(err) } - } else if !isLightningEnabled(job.ID) && !needRestoreJob(job.ID) && indexInfo.SubState == model.StateBackfill { - // Be here, means the DDL Owner changed or restarted, the reorg state is re-entered. - job.SnapshotVer = 0 - restoreReorg = true - reorgInfo, err = getReorgInfo(d.jobContext(job), d, rh, job, tbl, elements) - } - - // Check and set up lightning Backend. - // Whether use lightning add index will depends on - // 1) TiDBFastDDL sysvars is true or false at this time and index's substate equal to stateNone. - // This means it start to build up lightning backfill environment. - // 2) Restore lightning reorg task,here means DDL owner changed or restarted, need rebuild lightning environment. - if !isLightningEnabled(job.ID) { - // If it is a empty table, do not need start lightning backfiller. - if reorgInfo.StartKey == nil && reorgInfo.EndKey == nil { - return false, ver, nil + return false, ver, nil + case model.StateBackfillSync: + logutil.BgLogger().Info("Lightning backfill state backfill Sync") + indexInfo.SubState = model.StateBackfill + ver, err = updateVersionAndTableInfo(d, t, job, tbl.Meta(), true) + if err != nil { + return false, ver, errors.Trace(err) } - // Check if the reorg task is re-entry task, If TiDB is restarted, then currently - // reorg task should be restart. - if (IsAllowFastDDL() && indexInfo.SubState == model.StateNone) || restoreReorg { - err = prepareBackend(w.ctx, indexInfo.Unique, job, reorgInfo.ReorgMeta.SQLMode) - if err == nil { - setLightningEnabled(job.ID, true) + return false, ver, nil + case model.StateBackfill: + logutil.BgLogger().Info("Lightning backfill state backfill") + // This is used to restore reorg task if it interrupt during backfill state and TiDB owner not change or restart. + if isLightningEnabled(job.ID) && needRestoreJob(job.ID) { + // If reorg task can not be restore with lightning execution, should restart reorg task to keep data consist. + if !canRestoreReorgTask(job, indexInfo.ID) { + reorgInfo, err = getReorgInfo(d.jobContext(job), d, rh, job, tbl, elements) + if err != nil || reorgInfo.first { + return false, ver, errors.Trace(err) + } } + } else if !isLightningEnabled(job.ID) && !needRestoreJob(job.ID) && indexInfo.SubState == model.StateBackfill { + // Be here, means the DDL Owner changed or restarted, the reorg state is re-entered. + job.SnapshotVer = 0 + restoreReorg = true + reorgInfo, err = getReorgInfo(d.jobContext(job), d, rh, job, tbl, elements) } - } - // If enter this backfill flow,then need finished it。 - if isLightningEnabled(job.ID) || indexInfo.SubState != model.StateNone { - switch indexInfo.SubState { - case model.StateNone: - logutil.BgLogger().Info("Lightning backfill start state none") - indexInfo.SubState = model.StateBackfillSync - ver, err = updateVersionAndTableInfo(d, t, job, tbl.Meta(), true) - if err != nil { - return false, ver, errors.Trace(err) - } - return false, ver, nil - case model.StateBackfillSync: - logutil.BgLogger().Info("Lightning backfill state backfill Sync") - indexInfo.SubState = model.StateBackfill - ver, err = updateVersionAndTableInfo(d, t, job, tbl.Meta(), true) - if err != nil { - return false, ver, errors.Trace(err) - } - return false, ver, nil - case model.StateBackfill: - logutil.BgLogger().Info("Lightning backfill state backfill") - return true, ver, nil - case model.StateMergeSync: - logutil.BgLogger().Info("Lightning backfill state merge Sync") - indexInfo.SubState = model.StateMerge - ver, err = updateVersionAndTableInfo(d, t, job, tbl.Meta(), true) - if err != nil { - return false, ver, errors.Trace(err) + // Check and set up lightning Backend. + // Whether use lightning add index will depends on + // 1) TiDBFastDDL sysvars is true or false at this time and index's substate equal to stateNone. + // This means it start to build up lightning backfill environment. + // 2) Restore lightning reorg task,here means DDL owner changed or restarted, need rebuild lightning environment. + if !isLightningEnabled(job.ID) { + // If it is a empty table, do not need start lightning backfiller. + if reorgInfo.StartKey == nil && reorgInfo.EndKey == nil { + return false, ver, nil } - return false, ver, nil - case model.StateMerge: - logutil.BgLogger().Info("Lightning start merge the increment part of adding index") - err = w.runMergeJob(rh, reorgInfo, tbl.Meta(), d.lease, func() (addIndexErr error) { - defer util.Recover(metrics.LabelDDL, "onMergeIndex", - func() { - addIndexErr = dbterror.ErrCancelledDDLJob.GenWithStack("merge table `%v` index `%v` panic", tbl.Meta().Name, indexInfo.Name) - }, false) - return w.mergeTempIndex(tbl, indexInfo, reorgInfo) - }) - if err != nil { - return false, 0, errors.Trace(err) + // Check if the reorg task is re-entry task, If TiDB is restarted, then currently + // reorg task should be restart. + if (IsAllowFastDDL() && indexInfo.SubState == model.StateNone) || restoreReorg { + err = prepareBackend(w.ctx, indexInfo.Unique, job, reorgInfo.ReorgMeta.SQLMode) + if err == nil { + setLightningEnabled(job.ID, true) + } } - logutil.BgLogger().Info("Lightning finished merge the increment part of adding index") - return true, ver, nil - default: - return false, 0, errors.New("Lightning go fast path wrong sub states: should not happened") } - } - - // Original backfill need also merge temp index data. - if indexInfo.SubState == model.StatePublic { - logutil.BgLogger().Info("Not Lightning start merge the increment part of adding index") + return true, ver, nil + case model.StateMergeSync: + logutil.BgLogger().Info("Lightning backfill state merge Sync") + indexInfo.SubState = model.StateMerge + ver, err = updateVersionAndTableInfo(d, t, job, tbl.Meta(), true) + if err != nil { + return false, ver, errors.Trace(err) + } + return false, ver, nil + case model.StateMerge: + logutil.BgLogger().Info("Lightning start merge the increment part of adding index") err = w.runMergeJob(rh, reorgInfo, tbl.Meta(), d.lease, func() (addIndexErr error) { defer util.Recover(metrics.LabelDDL, "onMergeIndex", func() { @@ -743,10 +744,11 @@ func goFastDDLBackfill(w *worker, d *ddlCtx, t *meta.Meta, job *model.Job, if err != nil { return false, 0, errors.Trace(err) } - logutil.BgLogger().Info("Not Lightning finished merge the increment part of adding index") + logutil.BgLogger().Info("Lightning finished merge the increment part of adding index") return true, ver, nil + default: + return false, 0, errors.New("Lightning go fast path wrong sub states: should not happened") } - return false, ver, nil } func doReorgWorkForCreateIndex(w *worker, d *ddlCtx, t *meta.Meta, job *model.Job, @@ -762,7 +764,7 @@ func doReorgWorkForCreateIndex(w *worker, d *ddlCtx, t *meta.Meta, job *model.Jo } doReorg, ver, err = goFastDDLBackfill(w, d, t, job, tbl, indexInfo, reorgInfo, elements, rh) - if isLightningEnabled(reorgInfo.ID) || indexInfo.SubState != model.StateNone { + if indexInfo.SubState != model.StatePublic { if err != nil { logutil.BgLogger().Error("Lightning: Add index backfill processing:", zap.String("Error:", err.Error())) return doReorg, ver, err @@ -808,7 +810,8 @@ func doReorgWorkForCreateIndex(w *worker, d *ddlCtx, t *meta.Meta, job *model.Jo return false, ver, errors.Trace(err) } - if isLightningEnabled(job.ID) { + // Go through new backfill path + if indexInfo.SubState != model.StatePublic { indexInfo.SubState = model.StateMergeSync ver, err = updateVersionAndTableInfo(d, t, job, tbl.Meta(), true) if err != nil { @@ -818,17 +821,11 @@ func doReorgWorkForCreateIndex(w *worker, d *ddlCtx, t *meta.Meta, job *model.Jo job.SnapshotVer = 0 _, err = getMergeReorgInfo(d.jobContext(job), d, rh, job, tbl, elements, indexInfo.ID) if err != nil { - return false, ver, errors.Trace(err) + return done, ver, errors.Trace(err) } } else { - // Only indexInfo.SubState == model.StateNone, origin backfill flow. - if indexInfo.SubState == model.StateNone { - indexInfo.SubState = model.StatePublic - ver, err = updateVersionAndTableInfo(d, t, job, tbl.Meta(), true) - if err != nil { - return false, ver, errors.Trace(err) - } - } + // Original backfill finished from here. + return true, ver, errors.Trace(err) } // Cleanup lightning environment cleanUpLightningEnv(reorgInfo, false) @@ -1419,6 +1416,7 @@ func (w *addIndexWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (taskC continue } + // When backfill go new backfill path, but use original worker then no need to lock index key. if !w.isNewBF { // We need to add this lock to make sure pessimistic transaction can realize this operation. // For the normal pessimistic transaction, it's ok. But if async commmit is used, it may lead to inconsistent data and index. diff --git a/ddl/index_lightning.go b/ddl/index_lightning.go index af046f1ad46c1..04b19cd292950 100644 --- a/ddl/index_lightning.go +++ b/ddl/index_lightning.go @@ -228,6 +228,8 @@ func (w *addIndexWorkerLit) BackfillDataInTxn(handleRange reorgBackfillTask) (ta fetchTag := "AddIndexLightningFetchdata" + strconv.Itoa(w.id) writeTag := "AddIndexLightningWritedata" + strconv.Itoa(w.id) txnTag := "AddIndexLightningBackfillDataInTxn" + strconv.Itoa(w.id) + // Set a big batch size to enhance performance. + w.batchCnt *= 16 oprStartTime := time.Now() ctx := kv.WithInternalSourceType(context.Background(), w.jobContext.ddlJobSourceType()) @@ -337,7 +339,13 @@ func (w *backFillIndexWorker) batchSkipKey(txn kv.Transaction, store kv.Storage, if len(w.batchCheckTmpKeys) == 0 { return nil } - + w.skipAll = false + // We need to add this lock to make sure pessimistic transaction can realize this operation. + // For the normal pessimistic transaction, it's ok. But if async commmit is used, it may lead to inconsistent data and index. + err := txn.LockKeys(context.Background(), new(kv.LockCtx), w.batchCheckTmpKeys...) + if err != nil { + return errors.Trace(err) + } // Gen a current snapshot to get latest updated. snapshot := store.GetSnapshot(kv.MaxVersion) // Get duplicated key from temp index. @@ -345,26 +353,26 @@ func (w *backFillIndexWorker) batchSkipKey(txn kv.Transaction, store kv.Storage, if err != nil { return errors.Trace(err) } - + count := len(w.batchCheckTmpKeys) for i, key := range w.batchCheckTmpKeys { if val, found := batchVals[string(key)]; found { var keyVer []byte length := len(val) keyVer = append(keyVer, val[length-1:]...) - pos := w.tmpKeyPos[i] if bytes.Equal(keyVer, []byte("2")) { - idxRecords[pos].skip = true - } else { - // We need to add this lock to make sure pessimistic transaction can realize this operation. - // For the normal pessimistic transaction, it's ok. But if async commmit is used, it may lead to inconsistent data and index. - err = txn.LockKeys(context.Background(), new(kv.LockCtx), idxRecords[pos].key) - if err != nil { - return errors.Trace(err) + idxRecords[i].skip = true + count-- + if i == 0 { + // catch val for later use. + w.firstVal = w.firstVal[:0] + w.firstVal = append(w.firstVal, val...) } } } } - + if count == 0 { + w.skipAll = true + } return nil } @@ -388,8 +396,9 @@ type backFillIndexWorker struct { distinctCheckFlags []bool tmpIdxRecords []*temporaryIndexRecord batchCheckTmpKeys []kv.Key - tmpKeyPos []int32 jobContext *JobContext + skipAll bool + firstVal []byte } func newTempIndexWorker(sessCtx sessionctx.Context, worker *worker, t table.PhysicalTable, indexInfo *model.IndexInfo, reorgInfo *reorgInfo, jc *JobContext) *backFillIndexWorker { @@ -427,41 +436,65 @@ func (w *backFillIndexWorker) BackfillDataInTxn(taskRange reorgBackfillTask) (ta return errors.Trace(err) } - // Should be + // Skip merge change after mergeSync err = w.batchSkipKey(txn, w.sessCtx.GetStore(), temporaryIndexRecords) if err != nil { return errors.Trace(err) } for _, idxRecord := range temporaryIndexRecords { - taskCtx.scanCount++ // The index is already exists, we skip it, no needs to backfill it. // The following update, delete, insert on these rows, TiDB can handle it correctly. - if idxRecord.skip { + // If all batch are skiped, update first index key to make txn commit to release lock. + if idxRecord.skip && !w.skipAll { continue } - - if !bytes.Equal(idxRecord.keyVer, []byte("1")) { - err = errors.New("merge temp index should not merge version 2 index data") - panic(err) - } - - if idxRecord.delete { - if idxRecord.unique { - err = txn.GetMemBuffer().DeleteWithFlags(idxRecord.key, kv.SetNeedLocked) + if w.skipAll { + isDelete := false + unique := false + length := len(w.firstVal) + w.firstVal = w.firstVal[:length-1] + length-- + + if bytes.Equal(w.firstVal, []byte("delete")) { + isDelete = true + w.firstVal = w.firstVal[:length-6] + } else if bytes.Equal(w.firstVal, []byte("deleteu")) { + isDelete = true + unique = true + w.firstVal = w.firstVal[:length-7] + } + if isDelete { + if unique { + err = txn.GetMemBuffer().DeleteWithFlags(w.batchCheckTmpKeys[0], kv.SetNeedLocked) + } else { + err = txn.GetMemBuffer().Delete(w.batchCheckTmpKeys[0]) + } + logutil.BgLogger().Info("delete", zap.ByteString("key", w.batchCheckTmpKeys[0])) } else { - err = txn.GetMemBuffer().Delete(idxRecord.key) + // set latest key/val back to temp index. + err = txn.GetMemBuffer().Set(w.batchCheckTmpKeys[0], w.firstVal) + } + if err != nil { + return err } - logutil.BgLogger().Info("delete", zap.ByteString("key", idxRecord.key)) + break } else { - err = txn.GetMemBuffer().Set(idxRecord.key, idxRecord.vals) - } - if err != nil { - return err + if idxRecord.delete { + if idxRecord.unique { + err = txn.GetMemBuffer().DeleteWithFlags(idxRecord.key, kv.SetNeedLocked) + } else { + err = txn.GetMemBuffer().Delete(idxRecord.key) + } + logutil.BgLogger().Info("delete", zap.ByteString("key", idxRecord.key)) + } else { + err = txn.GetMemBuffer().Set(idxRecord.key, idxRecord.vals) + } + if err != nil { + return err + } } - taskCtx.addedCount++ } - return nil }) logSlowOperations(time.Since(oprStartTime), "AddIndexMergeDataInTxn", 3000) @@ -537,8 +570,6 @@ func (w *backFillIndexWorker) fetchTempIndexVals(txn kv.Transaction, taskRange r startTime := time.Now() w.tmpIdxRecords = w.tmpIdxRecords[:0] w.batchCheckTmpKeys = w.batchCheckTmpKeys[:0] - w.tmpKeyPos = w.tmpKeyPos[:0] - var pos int32 = 0 // taskDone means that the reorged handle is out of taskRange.endHandle. taskDone := false oprStartTime := startTime @@ -561,8 +592,10 @@ func (w *backFillIndexWorker) fetchTempIndexVals(txn kv.Transaction, taskRange r keyVer = append(keyVer, rawValue[length-1:]...) rawValue = rawValue[:length-1] length-- + // Just skip it. if bytes.Equal(keyVer, []byte("2")) { skip = true + return true, nil } if bytes.Equal(rawValue, []byte("delete")) { isDelete = true @@ -580,12 +613,7 @@ func (w *backFillIndexWorker) fetchTempIndexVals(txn kv.Transaction, taskRange r idxRecord.vals = rawValue } w.tmpIdxRecords = append(w.tmpIdxRecords, idxRecord) - - if bytes.Equal(keyVer, []byte("1")) { - w.batchCheckTmpKeys = append(w.batchCheckTmpKeys, indexKey) - w.tmpKeyPos = append(w.tmpKeyPos, pos) - } - pos++ + w.batchCheckTmpKeys = append(w.batchCheckTmpKeys, indexKey) return true, nil }) diff --git a/table/tables/index.go b/table/tables/index.go index 3e56719999266..6570524777aa4 100644 --- a/table/tables/index.go +++ b/table/tables/index.go @@ -120,10 +120,9 @@ func (c *index) Create(sctx sessionctx.Context, txn kv.Transaction, indexedValue tempKey []byte keyVer []byte = []byte("0") ) - if c.idxInfo.State == model.StateWriteReorganization && !c.Isbackfill { + // Isbackfill set to true, means this is a backfill worker should not write to temp index. + if c.idxInfo.State != model.StatePublic && c.idxInfo.SubState != model.StatePublic && !c.Isbackfill { switch c.idxInfo.SubState { - case model.StatePublic: - // Do nothing. case model.StateNone, model.StateBackfillSync, model.StateBackfill: // Write to the temporary index. keyVer = []byte("1") @@ -306,10 +305,8 @@ func (c *index) Delete(sc *stmtctx.StatementContext, txn kv.Transaction, indexed keyVer []byte = []byte("0") val []byte ) - if c.idxInfo.State == model.StateWriteReorganization { + if c.idxInfo.State != model.StatePublic && c.idxInfo.SubState != model.StatePublic { switch c.idxInfo.SubState { - case model.StatePublic: - // Do nothing. case model.StateNone, model.StateBackfillSync, model.StateBackfill: // Write to the temporary index. keyVer = []byte("1") From 7ead81947f5bb34c8147b067b78c179e7ca54337 Mon Sep 17 00:00:00 2001 From: Benjamin2037 Date: Wed, 13 Jul 2022 10:27:45 +0800 Subject: [PATCH 12/16] Do not print log --- ddl/index_lightning.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/ddl/index_lightning.go b/ddl/index_lightning.go index 04b19cd292950..17b0979b2ef6d 100644 --- a/ddl/index_lightning.go +++ b/ddl/index_lightning.go @@ -470,7 +470,6 @@ func (w *backFillIndexWorker) BackfillDataInTxn(taskRange reorgBackfillTask) (ta } else { err = txn.GetMemBuffer().Delete(w.batchCheckTmpKeys[0]) } - logutil.BgLogger().Info("delete", zap.ByteString("key", w.batchCheckTmpKeys[0])) } else { // set latest key/val back to temp index. err = txn.GetMemBuffer().Set(w.batchCheckTmpKeys[0], w.firstVal) @@ -486,7 +485,6 @@ func (w *backFillIndexWorker) BackfillDataInTxn(taskRange reorgBackfillTask) (ta } else { err = txn.GetMemBuffer().Delete(idxRecord.key) } - logutil.BgLogger().Info("delete", zap.ByteString("key", idxRecord.key)) } else { err = txn.GetMemBuffer().Set(idxRecord.key, idxRecord.vals) } From 87d99675c503b733e7c63b3b722c2a74b0bad586 Mon Sep 17 00:00:00 2001 From: Benjamin2037 Date: Wed, 13 Jul 2022 13:06:38 +0800 Subject: [PATCH 13/16] fix merge exit logic --- ddl/index.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddl/index.go b/ddl/index.go index 693b95ea16cf4..3f6c931954ce5 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -770,7 +770,7 @@ func doReorgWorkForCreateIndex(w *worker, d *ddlCtx, t *meta.Meta, job *model.Jo return doReorg, ver, err } // Only when SubState is in BackFill state, then need start to start new backfill task. - if !doReorg { + if !doReorg || indexInfo.SubState == model.StateMerge { return doReorg, ver, err } } From e8f4d5f20ec4b3937471391d7519c00edd1673ab Mon Sep 17 00:00:00 2001 From: Benjamin2037 Date: Wed, 13 Jul 2022 13:16:54 +0800 Subject: [PATCH 14/16] parallel merge logic. --- ddl/backfilling.go | 293 +++++++++++++++++++++++++++---- ddl/index.go | 2 +- ddl/index_lightning.go | 4 +- sessionctx/variable/tidb_vars.go | 2 +- 4 files changed, 258 insertions(+), 43 deletions(-) diff --git a/ddl/backfilling.go b/ddl/backfilling.go index 902e4a1812f9b..67b48a74e2855 100644 --- a/ddl/backfilling.go +++ b/ddl/backfilling.go @@ -921,6 +921,7 @@ func getIndexRangeEndKey(ctx *JobContext, store kv.Storage, priority int, t tabl func (w *worker) writeTempIndexRecord(t table.PhysicalTable, bfWorkerType backfillWorkerType, indexInfo *model.IndexInfo, oldColInfo, colInfo *model.ColumnInfo, reorgInfo *reorgInfo) error { job := reorgInfo.Job + totalAddedCount := job.GetRowCount() startKey, endKey := reorgInfo.StartKey, reorgInfo.EndKey @@ -930,61 +931,154 @@ func (w *worker) writeTempIndexRecord(t table.PhysicalTable, bfWorkerType backfi if startKey == nil && endKey == nil { return nil } - jc := w.jobContext(job) + // variable.ddlReorgWorkerCounter can be modified by system variable "tidb_ddl_reorg_worker_cnt". + workerCnt := variable.GetDDLReorgWorkerCounter() + + mergeWorkers := make([]*backfillWorker, 0, workerCnt) - mergeWorkers := make([]*backfillWorker, 1) defer func() { closeBackfillWorkers(mergeWorkers) }() - // For dynamic adjust backfill worker number. - if err := loadDDLReorgVars(w); err != nil { - logutil.BgLogger().Error("[ddl] load DDL reorganization variable failed", zap.Error(err)) - } + for { + kvRanges, err := splitTableRanges(t, reorgInfo.d.store, startKey, endKey) + if err != nil { + return errors.Trace(err) + } - sessCtx := newContext(reorgInfo.d.store) - sessCtx.GetSessionVars().StmtCtx.IsDDLJobInQueue = true - // Simulate the sql mode environment in the worker sessionCtx. - sqlMode := reorgInfo.ReorgMeta.SQLMode - sessCtx.GetSessionVars().SQLMode = sqlMode - if err := setSessCtxLocation(sessCtx, reorgInfo); err != nil { - return errors.Trace(err) - } + // For dynamic adjust backfill worker number. + if err := loadDDLReorgVars(w); err != nil { + logutil.BgLogger().Error("[ddl] load DDL reorganization variable failed", zap.Error(err)) + } + workerCnt = variable.GetDDLReorgWorkerCounter() + // If only have 1 range, we can only start 1 worker. + if len(kvRanges) < int(workerCnt) { + workerCnt = int32(len(kvRanges)) + } - sessCtx.GetSessionVars().StmtCtx.BadNullAsWarning = !sqlMode.HasStrictMode() - sessCtx.GetSessionVars().StmtCtx.TruncateAsWarning = !sqlMode.HasStrictMode() - sessCtx.GetSessionVars().StmtCtx.OverflowAsWarning = !sqlMode.HasStrictMode() - sessCtx.GetSessionVars().StmtCtx.AllowInvalidDate = sqlMode.HasAllowInvalidDatesMode() - sessCtx.GetSessionVars().StmtCtx.DividedByZeroAsWarning = !sqlMode.HasStrictMode() - sessCtx.GetSessionVars().StmtCtx.IgnoreZeroInDate = !sqlMode.HasStrictMode() || sqlMode.HasAllowInvalidDatesMode() - sessCtx.GetSessionVars().StmtCtx.NoZeroDate = sqlMode.HasStrictMode() + // Enlarge the worker size. + for i := len(mergeWorkers); i < int(workerCnt); i++ { + sessCtx := newContext(reorgInfo.d.store) + sessCtx.GetSessionVars().StmtCtx.IsDDLJobInQueue = true + // Simulate the sql mode environment in the worker sessionCtx. + sqlMode := reorgInfo.ReorgMeta.SQLMode + sessCtx.GetSessionVars().SQLMode = sqlMode + if err := setSessCtxLocation(sessCtx, reorgInfo); err != nil { + return errors.Trace(err) + } - logutil.BgLogger().Info("[ddl] start merge workers to merge delta index changes", - zap.String("startHandle", tryDecodeToHandleString(startKey)), - zap.String("endHandle", tryDecodeToHandleString(endKey))) + sessCtx.GetSessionVars().StmtCtx.BadNullAsWarning = !sqlMode.HasStrictMode() + sessCtx.GetSessionVars().StmtCtx.TruncateAsWarning = !sqlMode.HasStrictMode() + sessCtx.GetSessionVars().StmtCtx.OverflowAsWarning = !sqlMode.HasStrictMode() + sessCtx.GetSessionVars().StmtCtx.AllowInvalidDate = sqlMode.HasAllowInvalidDatesMode() + sessCtx.GetSessionVars().StmtCtx.DividedByZeroAsWarning = !sqlMode.HasStrictMode() + sessCtx.GetSessionVars().StmtCtx.IgnoreZeroInDate = !sqlMode.HasStrictMode() || sqlMode.HasAllowInvalidDatesMode() + sessCtx.GetSessionVars().StmtCtx.NoZeroDate = sqlMode.HasStrictMode() - idxWorker := newTempIndexWorker(sessCtx, w, t, indexInfo, reorgInfo, jc) - idxWorker.priority = job.Priority - mergeWorkers = append(mergeWorkers, idxWorker.backfillWorker) - // Dynamic change batch size. - idxWorker.batchCnt = int(variable.GetDDLReorgBatchSize()) - task := &reorgBackfillTask{ - physicalTableID: t.GetPhysicalID(), - startKey: startKey, - endKey: endKey} - err := idxWorker.backfillWorker.handleMergeTask(reorgInfo.d, task, idxWorker) - if err != nil { - return errors.Trace(err) + idxWorker := newTempIndexWorker(sessCtx, w, i, t, indexInfo, reorgInfo, jc) + idxWorker.priority = job.Priority + mergeWorkers = append(mergeWorkers, idxWorker.backfillWorker) + go idxWorker.backfillWorker.runMerge(reorgInfo.d, idxWorker, job) + } + // Shrink the worker size. + if len(mergeWorkers) > int(workerCnt) { + workers := mergeWorkers[workerCnt:] + mergeWorkers = mergeWorkers[:workerCnt] + closeBackfillWorkers(workers) + } + + failpoint.Inject("checkMergeWorkerNum", func(val failpoint.Value) { + if val.(bool) { + num := int(atomic.LoadInt32(&TestCheckWorkerNumber)) + if num != 0 { + if num > len(kvRanges) { + if len(mergeWorkers) != len(kvRanges) { + failpoint.Return(errors.Errorf("check merge worker num error, len kv ranges is: %v, check merge worker num is: %v, actual record num is: %v", len(kvRanges), num, len(mergeWorkers))) + } + } else if num != len(mergeWorkers) { + failpoint.Return(errors.Errorf("check merge worker num error, len kv ranges is: %v, check merge worker num is: %v, actual record num is: %v", len(kvRanges), num, len(mergeWorkers))) + } + var wg sync.WaitGroup + wg.Add(1) + TestCheckWorkerNumCh <- &wg + wg.Wait() + } + } + }) + + logutil.BgLogger().Info("[ddl] start merge workers to merge delta index changes", + zap.Int("workerCnt", len(mergeWorkers)), + zap.Int("regionCnt", len(kvRanges)), + zap.String("startHandle", tryDecodeToHandleString(startKey)), + zap.String("endHandle", tryDecodeToHandleString(endKey))) + + remains, err := w.sendRangeTaskToMergeWorkers(t, mergeWorkers, reorgInfo, &totalAddedCount, kvRanges, t.GetPhysicalID()) + if err != nil { + return errors.Trace(err) + } + if len(remains) == 0 { + break + } + startKey = remains[0].StartKey + if err != nil { + return errors.Trace(err) + } } return nil } +func (w *backfillWorker) runMerge(d *ddlCtx, bf backfiller, job *model.Job) { + logutil.BgLogger().Info("[ddl] merge worker start", zap.Int("workerID", w.id)) + defer func() { + w.resultCh <- &backfillResult{err: dbterror.ErrReorgPanic} + }() + defer util.Recover(metrics.LabelDDL, "backfillWorker.run", nil, false) + for { + task, more := <-w.taskCh + if !more { + break + } + d.setDDLLabelForTopSQL(job) + + logutil.BgLogger().Debug("[ddl] merge worker got task", zap.Int("workerID", w.id), zap.String("task", task.String())) + failpoint.Inject("mockMergeRunErr", func() { + if w.id == 0 { + result := &backfillResult{addedCount: 0, nextKey: nil, err: errors.Errorf("mock backfill error")} + w.resultCh <- result + failpoint.Continue() + } + }) + + failpoint.Inject("mockHighLoadForMergeIndex", func() { + sqlPrefixes := []string{"alter"} + topsql.MockHighCPULoad(job.Query, sqlPrefixes, 5) + }) + + failpoint.Inject("mockMergeSlow", func() { + time.Sleep(30 * time.Millisecond) + }) + + // Dynamic change batch size. + w.batchCnt = int(variable.GetDDLReorgBatchSize()) + result := w.handleMergeTask(d, task, bf) + w.resultCh <- result + } + logutil.BgLogger().Info("[ddl] merge worker exit", zap.Int("workerID", w.id)) +} + // handleMergeTask backfills range [task.startHandle, task.endHandle) handle's index to table. -func (w *backfillWorker) handleMergeTask(d *ddlCtx, task *reorgBackfillTask, bf backfiller) error { +func (w *backfillWorker) handleMergeTask(d *ddlCtx, task *reorgBackfillTask, bf backfiller) *backfillResult { handleRange := *task + result := &backfillResult{ + err: nil, + addedCount: 0, + nextKey: handleRange.startKey, + } + lastLogCount := 0 lastLogTime := time.Now() startTime := lastLogTime + rc := d.getReorgCtx(w.reorgInfo.Job) for { // Give job chance to be canceled, if we not check it here, @@ -993,12 +1087,38 @@ func (w *backfillWorker) handleMergeTask(d *ddlCtx, task *reorgBackfillTask, bf // we should check whether this ddl job is still runnable. err := d.isReorgRunnable(w.reorgInfo.Job) if err != nil { - return err + result.err = err + return result } taskCtx, err := bf.BackfillDataInTxn(handleRange) if err != nil { - return err + result.err = err + return result + } + + mergeBackfillCtxToResult(&taskCtx, result) + + // Although `handleRange` is for data in one region, but back fill worker still split it into many + // small reorg batch size slices and reorg them in many different kv txn. + // If a task failed, it may contained some committed small kv txn which has already finished the + // small range reorganization. + // In the next round of reorganization, the target handle range may overlap with last committed + // small ranges. This will cause the `redo` action in reorganization. + // So for added count and warnings collection, it is recommended to collect the statistics in every + // successfully committed small ranges rather than fetching it in the total result. + rc.increaseRowCount(int64(taskCtx.addedCount)) + rc.mergeWarnings(taskCtx.warnings, taskCtx.warningsCount) + + if num := result.scanCount - lastLogCount; num >= 30000 { + lastLogCount = result.scanCount + logutil.BgLogger().Info("[ddl] backfill worker back fill index", + zap.Int("workerID", w.id), + zap.Int("addedCount", result.addedCount), + zap.Int("scanCount", result.scanCount), + zap.String("nextHandle", tryDecodeToHandleString(taskCtx.nextKey)), + zap.Float64("speed(rows/s)", float64(num)/time.Since(lastLogTime).Seconds())) + lastLogTime = time.Now() } handleRange.startKey = taskCtx.nextKey @@ -1008,6 +1128,101 @@ func (w *backfillWorker) handleMergeTask(d *ddlCtx, task *reorgBackfillTask, bf } logutil.BgLogger().Info("[ddl] merge worker finish task", zap.Int("workerID", w.id), zap.String("task", task.String()), + zap.Int("addedCount", result.addedCount), + zap.Int("scanCount", result.scanCount), + zap.String("nextHandle", tryDecodeToHandleString(result.nextKey)), zap.String("takeTime", time.Since(startTime).String())) + return result +} + +// sendRangeTaskToWorkers sends tasks to workers, and returns remaining kvRanges that is not handled. +func (w *worker) sendRangeTaskToMergeWorkers(t table.Table, workers []*backfillWorker, reorgInfo *reorgInfo, + totalAddedCount *int64, kvRanges []kv.KeyRange, phyicID int64) ([]kv.KeyRange, error) { + batchTasks := make([]*reorgBackfillTask, 0, len(workers)) + physicalTableID := phyicID + + // Build reorg tasks. + for _, keyRange := range kvRanges { + endKey := keyRange.EndKey + endK, err := getIndexRangeEndKey(reorgInfo.d.jobContext(reorgInfo.Job), workers[0].sessCtx.GetStore(), workers[0].priority, t, keyRange.StartKey, endKey) + if err != nil { + logutil.BgLogger().Info("[ddl] send range task to workers, get reverse key failed", zap.Error(err)) + } else { + logutil.BgLogger().Info("[ddl] send range task to workers, change end key", + zap.String("end key", tryDecodeToHandleString(endKey)), zap.String("current end key", tryDecodeToHandleString(endK))) + endKey = endK + } + + task := &reorgBackfillTask{ + physicalTableID: physicalTableID, + startKey: keyRange.StartKey, + endKey: endKey} + batchTasks = append(batchTasks, task) + + if len(batchTasks) >= len(workers) { + break + } + } + + if len(batchTasks) == 0 { + return nil, nil + } + + // Wait tasks finish. + err := w.handleMergeTasks(reorgInfo, totalAddedCount, workers, batchTasks) + if err != nil { + return nil, errors.Trace(err) + } + + if len(batchTasks) < len(kvRanges) { + // There are kvRanges not handled. + remains := kvRanges[len(batchTasks):] + return remains, nil + } + + return nil, nil +} + +// handleReorgTasks sends tasks to workers, and waits for all the running workers to return results, +// there are taskCnt running workers. +func (w *worker) handleMergeTasks(reorgInfo *reorgInfo, totalAddedCount *int64, workers []*backfillWorker, batchTasks []*reorgBackfillTask) error { + for i, task := range batchTasks { + workers[i].taskCh <- task + } + + startKey := batchTasks[0].startKey + taskCnt := len(batchTasks) + startTime := time.Now() + nextKey, taskAddedCount, err := w.waitTaskResults(workers, taskCnt, totalAddedCount, startKey) + elapsedTime := time.Since(startTime) + if err == nil { + err = w.isReorgRunnable(reorgInfo.Job) + } + + if err != nil { + err := reorgInfo.UpdateReorgMeta(nextKey) + metrics.BatchAddIdxHistogram.WithLabelValues(metrics.LblError).Observe(elapsedTime.Seconds()) + logutil.BgLogger().Warn("[ddl] merge worker handle batch tasks failed", + zap.ByteString("elementType", reorgInfo.currElement.TypeKey), + zap.Int64("elementID", reorgInfo.currElement.ID), + zap.Int64("totalAddedCount", *totalAddedCount), + zap.String("startHandle", tryDecodeToHandleString(startKey)), + zap.String("nextHandle", tryDecodeToHandleString(nextKey)), + zap.Int64("batchAddedCount", taskAddedCount), + zap.String("taskFailedError", err.Error()), + zap.String("takeTime", elapsedTime.String()), + zap.NamedError("updateHandleError", err)) + return errors.Trace(err) + } + // nextHandle will be updated periodically in runReorgJob, so no need to update it here. + w.getReorgCtx(reorgInfo.Job).setNextKey(nextKey) + logutil.BgLogger().Info("[ddl] Merge workers successfully processed batch", + zap.ByteString("elementType", reorgInfo.currElement.TypeKey), + zap.Int64("elementID", reorgInfo.currElement.ID), + zap.Int64("totalAddedCount", *totalAddedCount), + zap.String("startHandle", tryDecodeToHandleString(startKey)), + zap.String("nextHandle", tryDecodeToHandleString(nextKey)), + zap.Int64("batchAddedCount", taskAddedCount), + zap.String("takeTime", elapsedTime.String())) return nil } diff --git a/ddl/index.go b/ddl/index.go index 3f6c931954ce5..a1153611bfbda 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -766,7 +766,7 @@ func doReorgWorkForCreateIndex(w *worker, d *ddlCtx, t *meta.Meta, job *model.Jo doReorg, ver, err = goFastDDLBackfill(w, d, t, job, tbl, indexInfo, reorgInfo, elements, rh) if indexInfo.SubState != model.StatePublic { if err != nil { - logutil.BgLogger().Error("Lightning: Add index backfill processing:", zap.String("Error:", err.Error())) + logutil.BgLogger().Error("Lightning: Add index fast path processing:", zap.String("Error:", err.Error())) return doReorg, ver, err } // Only when SubState is in BackFill state, then need start to start new backfill task. diff --git a/ddl/index_lightning.go b/ddl/index_lightning.go index 17b0979b2ef6d..dcf3bfd324aa7 100644 --- a/ddl/index_lightning.go +++ b/ddl/index_lightning.go @@ -401,12 +401,12 @@ type backFillIndexWorker struct { firstVal []byte } -func newTempIndexWorker(sessCtx sessionctx.Context, worker *worker, t table.PhysicalTable, indexInfo *model.IndexInfo, reorgInfo *reorgInfo, jc *JobContext) *backFillIndexWorker { +func newTempIndexWorker(sessCtx sessionctx.Context, worker *worker, id int, t table.PhysicalTable, indexInfo *model.IndexInfo, reorgInfo *reorgInfo, jc *JobContext) *backFillIndexWorker { index := tables.NewIndex(t.GetPhysicalID(), t.Meta(), indexInfo) // Add build openengine process. return &backFillIndexWorker{ - backfillWorker: newBackfillWorker(sessCtx, 0, t, reorgInfo), + backfillWorker: newBackfillWorker(sessCtx, id, t, reorgInfo), index: index, jobContext: jc, } diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index b383ac9f496f5..a37ab30543267 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -954,7 +954,7 @@ const ( DefStreamCountWhenMaxThreadsNotSet = 8 DefTiFlashFineGrainedShuffleBatchSize = 8192 DefTiDBFastDDL = false - DefTiDBDiskQuota = 100 * 1024 * 1024 * 1024 // 100GB + DefTiDBDiskQuota = 10 * 1024 * 1024 * 1024 // 100GB ) // Process global variables. From e8f23ca269a706a24bbe33cbf0b0cf6df4cabad6 Mon Sep 17 00:00:00 2001 From: Benjamin2037 Date: Thu, 14 Jul 2022 09:15:45 +0800 Subject: [PATCH 15/16] merge index batch edge duplicate value handle. --- ddl/index_lightning.go | 29 +++++++++++++++++++---------- ddl/lightning/backend.go | 4 ++-- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/ddl/index_lightning.go b/ddl/index_lightning.go index dcf3bfd324aa7..4cf4081380581 100644 --- a/ddl/index_lightning.go +++ b/ddl/index_lightning.go @@ -441,8 +441,8 @@ func (w *backFillIndexWorker) BackfillDataInTxn(taskRange reorgBackfillTask) (ta if err != nil { return errors.Trace(err) } - - for _, idxRecord := range temporaryIndexRecords { + endPos := len(temporaryIndexRecords) + for i, idxRecord := range temporaryIndexRecords { // The index is already exists, we skip it, no needs to backfill it. // The following update, delete, insert on these rows, TiDB can handle it correctly. // If all batch are skiped, update first index key to make txn commit to release lock. @@ -478,20 +478,29 @@ func (w *backFillIndexWorker) BackfillDataInTxn(taskRange reorgBackfillTask) (ta return err } break + } + + if idxRecord.delete { + if idxRecord.unique { + err = txn.GetMemBuffer().DeleteWithFlags(idxRecord.key, kv.SetNeedLocked) + } else { + err = txn.GetMemBuffer().Delete(idxRecord.key) + } } else { - if idxRecord.delete { + err = txn.GetMemBuffer().Set(idxRecord.key, idxRecord.vals) + // If the merge key is batch end should be deleted in temp index to avoid + // re merge by accident. + if i == endPos-1 { if idxRecord.unique { - err = txn.GetMemBuffer().DeleteWithFlags(idxRecord.key, kv.SetNeedLocked) + err = txn.GetMemBuffer().DeleteWithFlags(w.batchCheckTmpKeys[i], kv.SetNeedLocked) } else { - err = txn.GetMemBuffer().Delete(idxRecord.key) + err = txn.GetMemBuffer().Delete(w.batchCheckTmpKeys[i]) } - } else { - err = txn.GetMemBuffer().Set(idxRecord.key, idxRecord.vals) - } - if err != nil { - return err } } + if err != nil { + return err + } } return nil }) diff --git a/ddl/lightning/backend.go b/ddl/lightning/backend.go index 982f13a0d77a2..1f8b7a5e79bc6 100644 --- a/ddl/lightning/backend.go +++ b/ddl/lightning/backend.go @@ -109,7 +109,7 @@ func GenBackendContextKey(jobID int64) string { func adjustImportMemory(cfg *config.Config) { var scale int64 // Try agressive resource usage successful. - if tryAgressiveMemory(cfg) { + if tryAggressiveMemory(cfg) { return } @@ -140,7 +140,7 @@ func adjustImportMemory(cfg *config.Config) { } // tryAgressiveMemory lightning memory parameters according memory root's max limitation -func tryAgressiveMemory(cfg *config.Config) bool { +func tryAggressiveMemory(cfg *config.Config) bool { var defaultMemSize int64 defaultMemSize = int64(128 * _mb * cfg.TikvImporter.RangeConcurrency) defaultMemSize += int64(cfg.TikvImporter.EngineMemCacheSize) From 59d6ef7dd2bc3fa80e9f896045e3b479e02b9760 Mon Sep 17 00:00:00 2001 From: Benjamin2037 Date: Thu, 14 Jul 2022 09:38:33 +0800 Subject: [PATCH 16/16] typo --- ddl/index_lightning.go | 3 +-- ddl/lightning/backend.go | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/ddl/index_lightning.go b/ddl/index_lightning.go index 4cf4081380581..36407c7341917 100644 --- a/ddl/index_lightning.go +++ b/ddl/index_lightning.go @@ -445,7 +445,7 @@ func (w *backFillIndexWorker) BackfillDataInTxn(taskRange reorgBackfillTask) (ta for i, idxRecord := range temporaryIndexRecords { // The index is already exists, we skip it, no needs to backfill it. // The following update, delete, insert on these rows, TiDB can handle it correctly. - // If all batch are skiped, update first index key to make txn commit to release lock. + // If all batch are skipped, update first index key to make txn commit to release lock. if idxRecord.skip && !w.skipAll { continue } @@ -601,7 +601,6 @@ func (w *backFillIndexWorker) fetchTempIndexVals(txn kv.Transaction, taskRange r length-- // Just skip it. if bytes.Equal(keyVer, []byte("2")) { - skip = true return true, nil } if bytes.Equal(rawValue, []byte("delete")) { diff --git a/ddl/lightning/backend.go b/ddl/lightning/backend.go index 1f8b7a5e79bc6..5d9b81a250663 100644 --- a/ddl/lightning/backend.go +++ b/ddl/lightning/backend.go @@ -108,7 +108,7 @@ func GenBackendContextKey(jobID int64) string { // Adjust lightning memory parameters according memory root's max limitation func adjustImportMemory(cfg *config.Config) { var scale int64 - // Try agressive resource usage successful. + // Try aggressive resource usage successful. if tryAggressiveMemory(cfg) { return } @@ -139,7 +139,7 @@ func adjustImportMemory(cfg *config.Config) { zap.String("rangecounrrency:", strconv.Itoa(cfg.TikvImporter.RangeConcurrency))) } -// tryAgressiveMemory lightning memory parameters according memory root's max limitation +// tryAggressiveMemory lightning memory parameters according memory root's max limitation func tryAggressiveMemory(cfg *config.Config) bool { var defaultMemSize int64 defaultMemSize = int64(128 * _mb * cfg.TikvImporter.RangeConcurrency)