From 8b843bdb6307af4fd72ba304f6905169ed780ee4 Mon Sep 17 00:00:00 2001 From: Wenqi Mou Date: Thu, 17 Oct 2024 21:18:51 -0400 Subject: [PATCH 01/12] initial commit to fix restore checksum Signed-off-by: Wenqi Mou --- br/pkg/backup/schema.go | 4 +-- br/pkg/metautil/metafile.go | 19 +++++++++++++- br/pkg/restore/snap_client/client.go | 27 ++++++++------------ br/pkg/restore/snap_client/pipeline_items.go | 2 +- br/pkg/task/restore.go | 6 ++--- br/pkg/task/restore_raw.go | 4 +-- br/pkg/task/restore_txn.go | 4 +-- 7 files changed, 39 insertions(+), 27 deletions(-) diff --git a/br/pkg/backup/schema.go b/br/pkg/backup/schema.go index ac7bc98258a19..570a60debe317 100644 --- a/br/pkg/backup/schema.go +++ b/br/pkg/backup/schema.go @@ -106,7 +106,7 @@ func (ss *Schemas) BackupSchemas( } var checksum *checkpoint.ChecksumItem - var exists bool = false + var exists = false if ss.checkpointChecksum != nil && schema.tableInfo != nil { checksum, exists = ss.checkpointChecksum[schema.tableInfo.ID] } @@ -145,7 +145,7 @@ func (ss *Schemas) BackupSchemas( zap.Uint64("Crc64Xor", schema.crc64xor), zap.Uint64("TotalKvs", schema.totalKvs), zap.Uint64("TotalBytes", schema.totalBytes), - zap.Duration("calculate-take", calculateCost)) + zap.Duration("Time taken", calculateCost)) } } if statsHandle != nil { diff --git a/br/pkg/metautil/metafile.go b/br/pkg/metautil/metafile.go index 7b146f380243b..26732f57c9799 100644 --- a/br/pkg/metautil/metafile.go +++ b/br/pkg/metautil/metafile.go @@ -235,7 +235,7 @@ func (reader *MetaReader) readDataFiles(ctx context.Context, output func(*backup } // ArchiveSize return the size of Archive data -func (*MetaReader) ArchiveSize(_ context.Context, files []*backuppb.File) uint64 { +func ArchiveSize(files []*backuppb.File) uint64 { total := uint64(0) for _, file := range files { total += file.Size_ @@ -243,6 +243,23 @@ func (*MetaReader) ArchiveSize(_ context.Context, files []*backuppb.File) uint64 return total } +type ChecksumStats struct { + Crc64Xor uint64 + TotalKvs uint64 + TotalBytes uint64 +} + +// CalculateChecksumStatsOnAllFiles returns the ChecksumStats for all files +func CalculateChecksumStatsOnAllFiles(files []*backuppb.File) ChecksumStats { + var stats ChecksumStats + for _, file := range files { + stats.Crc64Xor ^= file.Crc64Xor + stats.TotalKvs += file.TotalKvs + stats.TotalBytes += file.TotalBytes + } + return stats +} + // ReadDDLs reads the ddls from the backupmeta. // This function is compatible with the old backupmeta. func (reader *MetaReader) ReadDDLs(ctx context.Context) ([]byte, error) { diff --git a/br/pkg/restore/snap_client/client.go b/br/pkg/restore/snap_client/client.go index d0f68157ff410..e59022c912f6f 100644 --- a/br/pkg/restore/snap_client/client.go +++ b/br/pkg/restore/snap_client/client.go @@ -467,8 +467,8 @@ func (rc *SnapClient) needLoadSchemas(backupMeta *backuppb.BackupMeta) bool { return !(backupMeta.IsRawKv || backupMeta.IsTxnKv) } -// InitBackupMeta loads schemas from BackupMeta to initialize RestoreClient. -func (rc *SnapClient) InitBackupMeta( +// LoadSchemaIfNeededAndInitClient loads schemas from BackupMeta to initialize RestoreClient. +func (rc *SnapClient) LoadSchemaIfNeededAndInitClient( c context.Context, backupMeta *backuppb.BackupMeta, backend *backuppb.StorageBackend, @@ -989,7 +989,7 @@ func (rc *SnapClient) setSpeedLimit(ctx context.Context, rateLimit uint64) error return nil } -func (rc *SnapClient) execChecksum( +func (rc *SnapClient) execAndValidateChecksum( ctx context.Context, tbl *CreatedTable, kvClient kv.Client, @@ -1000,13 +1000,8 @@ func (rc *SnapClient) execChecksum( zap.String("table", tbl.OldTable.Info.Name.O), ) - if tbl.OldTable.NoChecksum() { - logger.Warn("table has no checksum, skipping checksum") - return nil - } - if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { - span1 := span.Tracer().StartSpan("Client.execChecksum", opentracing.ChildOf(span.Context())) + span1 := span.Tracer().StartSpan("Client.execAndValidateChecksum", opentracing.ChildOf(span.Context())) defer span1.Finish() ctx = opentracing.ContextWithSpan(ctx, span1) } @@ -1046,16 +1041,16 @@ func (rc *SnapClient) execChecksum( } } } - table := tbl.OldTable - if item.Crc64xor != table.Crc64Xor || - item.TotalKvs != table.TotalKvs || - item.TotalBytes != table.TotalBytes { + expectedChecksumStats := metautil.CalculateChecksumStatsOnAllFiles(tbl.OldTable.Files) + if item.Crc64xor != expectedChecksumStats.Crc64Xor || + item.TotalKvs != expectedChecksumStats.TotalKvs || + item.TotalBytes != expectedChecksumStats.TotalBytes { logger.Error("failed in validate checksum", - zap.Uint64("origin tidb crc64", table.Crc64Xor), + zap.Uint64("origin tidb crc64", expectedChecksumStats.Crc64Xor), zap.Uint64("calculated crc64", item.Crc64xor), - zap.Uint64("origin tidb total kvs", table.TotalKvs), + zap.Uint64("origin tidb total kvs", expectedChecksumStats.TotalKvs), zap.Uint64("calculated total kvs", item.TotalKvs), - zap.Uint64("origin tidb total bytes", table.TotalBytes), + zap.Uint64("origin tidb total bytes", expectedChecksumStats.TotalBytes), zap.Uint64("calculated total bytes", item.TotalBytes), ) return errors.Annotate(berrors.ErrRestoreChecksumMismatch, "failed to validate checksum") diff --git a/br/pkg/restore/snap_client/pipeline_items.go b/br/pkg/restore/snap_client/pipeline_items.go index f76417a636c4a..3f74434e72f02 100644 --- a/br/pkg/restore/snap_client/pipeline_items.go +++ b/br/pkg/restore/snap_client/pipeline_items.go @@ -166,7 +166,7 @@ func (rc *SnapClient) GoValidateChecksum( elapsed := time.Since(start) summary.CollectSuccessUnit("table checksum", 1, elapsed) }() - err := rc.execChecksum(c, tbl, kvClient, concurrency) + err := rc.execAndValidateChecksum(c, tbl, kvClient, concurrency) if err != nil { return errors.Trace(err) } diff --git a/br/pkg/task/restore.go b/br/pkg/task/restore.go index 5bee261ceff21..3e251568510bc 100644 --- a/br/pkg/task/restore.go +++ b/br/pkg/task/restore.go @@ -801,7 +801,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s } reader := metautil.NewMetaReader(backupMeta, s, &cfg.CipherInfo) - if err = client.InitBackupMeta(c, backupMeta, u, reader, cfg.LoadStats); err != nil { + if err = client.LoadSchemaIfNeededAndInitClient(c, backupMeta, u, reader, cfg.LoadStats); err != nil { return errors.Trace(err) } @@ -822,7 +822,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s } } - archiveSize := reader.ArchiveSize(ctx, files) + archiveSize := metautil.ArchiveSize(files) g.Record(summary.RestoreDataSize, archiveSize) //restore from tidb will fetch a general Size issue https://github.com/pingcap/tidb/issues/27247 g.Record("Size", archiveSize) @@ -1124,7 +1124,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s finish := dropToBlackhole(ctx, postHandleCh, errCh) - // Reset speed limit. ResetSpeedLimit must be called after client.InitBackupMeta has been called. + // Reset speed limit. ResetSpeedLimit must be called after client.LoadSchemaIfNeededAndInitClient has been called. defer func() { var resetErr error // In future we may need a mechanism to set speed limit in ttl. like what we do in switchmode. TODO diff --git a/br/pkg/task/restore_raw.go b/br/pkg/task/restore_raw.go index 13a7382d6092c..cbcda5679fb57 100644 --- a/br/pkg/task/restore_raw.go +++ b/br/pkg/task/restore_raw.go @@ -109,7 +109,7 @@ func RunRestoreRaw(c context.Context, g glue.Glue, cmdName string, cfg *RestoreR return errors.Trace(err) } reader := metautil.NewMetaReader(backupMeta, s, &cfg.CipherInfo) - if err = client.InitBackupMeta(c, backupMeta, u, reader, true); err != nil { + if err = client.LoadSchemaIfNeededAndInitClient(c, backupMeta, u, reader, true); err != nil { return errors.Trace(err) } @@ -121,7 +121,7 @@ func RunRestoreRaw(c context.Context, g glue.Glue, cmdName string, cfg *RestoreR if err != nil { return errors.Trace(err) } - archiveSize := reader.ArchiveSize(ctx, files) + archiveSize := metautil.ArchiveSize(files) g.Record(summary.RestoreDataSize, archiveSize) if len(files) == 0 { diff --git a/br/pkg/task/restore_txn.go b/br/pkg/task/restore_txn.go index 4a4a832aad660..d686eb97e2154 100644 --- a/br/pkg/task/restore_txn.go +++ b/br/pkg/task/restore_txn.go @@ -54,7 +54,7 @@ func RunRestoreTxn(c context.Context, g glue.Glue, cmdName string, cfg *Config) return errors.Trace(err) } reader := metautil.NewMetaReader(backupMeta, s, &cfg.CipherInfo) - if err = client.InitBackupMeta(c, backupMeta, u, reader, true); err != nil { + if err = client.LoadSchemaIfNeededAndInitClient(c, backupMeta, u, reader, true); err != nil { return errors.Trace(err) } @@ -63,7 +63,7 @@ func RunRestoreTxn(c context.Context, g glue.Glue, cmdName string, cfg *Config) } files := backupMeta.Files - archiveSize := reader.ArchiveSize(ctx, files) + archiveSize := metautil.ArchiveSize(files) g.Record(summary.RestoreDataSize, archiveSize) if len(files) == 0 { From 829e6bc52b50f30affa7b6f5845a3750572908a6 Mon Sep 17 00:00:00 2001 From: Wenqi Mou Date: Mon, 21 Oct 2024 18:08:11 -0400 Subject: [PATCH 02/12] add tests, disable default checksum in backup Signed-off-by: Wenqi Mou --- br/pkg/restore/snap_client/client.go | 12 ++++++++---- br/pkg/task/BUILD.bazel | 2 +- br/pkg/task/backup.go | 7 +++++++ br/pkg/task/backup_test.go | 25 +++++++++++++++++++++++++ br/pkg/task/common.go | 1 + br/tests/br_file_corruption/run.sh | 24 +++++++++++++++++++++--- 6 files changed, 63 insertions(+), 8 deletions(-) diff --git a/br/pkg/restore/snap_client/client.go b/br/pkg/restore/snap_client/client.go index e59022c912f6f..52a73f477bab6 100644 --- a/br/pkg/restore/snap_client/client.go +++ b/br/pkg/restore/snap_client/client.go @@ -1042,9 +1042,13 @@ func (rc *SnapClient) execAndValidateChecksum( } } expectedChecksumStats := metautil.CalculateChecksumStatsOnAllFiles(tbl.OldTable.Files) - if item.Crc64xor != expectedChecksumStats.Crc64Xor || - item.TotalKvs != expectedChecksumStats.TotalKvs || - item.TotalBytes != expectedChecksumStats.TotalBytes { + checksumMatch := item.Crc64xor == expectedChecksumStats.Crc64Xor && + item.TotalKvs == expectedChecksumStats.TotalKvs && + item.TotalBytes == expectedChecksumStats.TotalBytes + failpoint.Inject("full-restore-validate-checksum", func(_ failpoint.Value) { + checksumMatch = false + }) + if !checksumMatch { logger.Error("failed in validate checksum", zap.Uint64("origin tidb crc64", expectedChecksumStats.Crc64Xor), zap.Uint64("calculated crc64", item.Crc64xor), @@ -1055,7 +1059,7 @@ func (rc *SnapClient) execAndValidateChecksum( ) return errors.Annotate(berrors.ErrRestoreChecksumMismatch, "failed to validate checksum") } - logger.Info("success in validate checksum") + logger.Info("success in validating checksum") return nil } diff --git a/br/pkg/task/BUILD.bazel b/br/pkg/task/BUILD.bazel index 0dabd8646fc6d..424388f57e573 100644 --- a/br/pkg/task/BUILD.bazel +++ b/br/pkg/task/BUILD.bazel @@ -114,7 +114,7 @@ go_test( ], embed = [":task"], flaky = True, - shard_count = 38, + shard_count = 39, deps = [ "//br/pkg/backup", "//br/pkg/config", diff --git a/br/pkg/task/backup.go b/br/pkg/task/backup.go index ab77b59bdc7a3..396fba935b25d 100644 --- a/br/pkg/task/backup.go +++ b/br/pkg/task/backup.go @@ -800,6 +800,13 @@ func DefaultBackupConfig() BackupConfig { if err != nil { log.Panic("infallible operation failed.", zap.Error(err)) } + + // Check if the checksum flag was set by the user + if !fs.Changed("checksum") { + // If not set, disable it for backup + cfg.Checksum = false + } + return cfg } diff --git a/br/pkg/task/backup_test.go b/br/pkg/task/backup_test.go index 705508811e149..30d4463ac38c2 100644 --- a/br/pkg/task/backup_test.go +++ b/br/pkg/task/backup_test.go @@ -3,6 +3,7 @@ package task import ( + "os" "testing" "time" @@ -222,3 +223,27 @@ func TestBackupConfigHash(t *testing.T) { hashCheck(t, &testCfg, originalHash, true) } } + +func TestDefaultBackupConfigDisableChecksum(t *testing.T) { + // Test the default configuration + cfg := DefaultBackupConfig() + + // Check some default values + require.Equal(t, uint32(4), cfg.Concurrency) + require.Equal(t, uint32(2), cfg.ChecksumConcurrency) + require.False(t, cfg.SendCreds) + require.False(t, cfg.Checksum) + + // Test with checksum flag set + os.Args = []string{"cmd", "--checksum=true"} + cfg = DefaultBackupConfig() + require.True(t, cfg.Checksum) + + // Test with checksum flag explicitly set to false + os.Args = []string{"cmd", "--checksum=false"} + cfg = DefaultBackupConfig() + require.False(t, cfg.Checksum) + + // Reset os.Args + os.Args = []string{"cmd"} +} diff --git a/br/pkg/task/common.go b/br/pkg/task/common.go index 93f9b3a08c2d9..cef47c6637a27 100644 --- a/br/pkg/task/common.go +++ b/br/pkg/task/common.go @@ -300,6 +300,7 @@ func DefineCommonFlags(flags *pflag.FlagSet) { flags.Uint(flagChecksumConcurrency, variable.DefChecksumTableConcurrency, "The concurrency of checksumming in one table") flags.Uint64(flagRateLimit, unlimited, "The rate limit of the task, MB/s per node") + // backup will override default to be false, restore will keep default to be true flags.Bool(flagChecksum, true, "Run checksum at end of task") flags.Bool(flagRemoveTiFlash, true, "Remove TiFlash replicas before backup or restore, for unsupported versions of TiFlash") diff --git a/br/tests/br_file_corruption/run.sh b/br/tests/br_file_corruption/run.sh index 35a7698bb9fef..7811cf3f7008f 100644 --- a/br/tests/br_file_corruption/run.sh +++ b/br/tests/br_file_corruption/run.sh @@ -22,7 +22,7 @@ CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) run_sql "CREATE DATABASE $DB;" go-ycsb load mysql -P $CUR/workload -p mysql.host=$TIDB_IP -p mysql.port=$TIDB_PORT -p mysql.user=root -p mysql.db=$DB -run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB" +run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB" --checksum=false filename=$(find $TEST_DIR/$DB -regex ".*.sst" | head -n 1) filename_temp=$filename"_temp" @@ -37,9 +37,10 @@ restore_fail=0 run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$DB" || restore_fail=1 export GO_FAILPOINTS="" if [ $restore_fail -ne 1 ]; then - echo 'restore success' + echo 'expect restore to fail on file lost but succeed' exit 1 fi +run_sql "DROP DATABASE IF EXISTS $DB;" # file corruption mv $filename_temp $filename @@ -49,6 +50,23 @@ restore_fail=0 run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$DB" || restore_fail=1 export GO_FAILPOINTS="" if [ $restore_fail -ne 1 ]; then - echo 'restore success' + echo 'expect restore to fail on file corruption but succeed' exit 1 fi +run_sql "DROP DATABASE IF EXISTS $DB;" + +# verify validating checksum is still performed even backup didn't enable it +mv $filename_bak $filename +export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/restore/snap_client/full-restore-validate-checksum=return(true)" +restore_fail=0 +run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$DB" --checksum=true || restore_fail=1 +export GO_FAILPOINTS="" +if [ $restore_fail -ne 1 ]; then + echo 'expect restore to fail on checksum mismatch but succeed' + exit 1 +fi +run_sql "DROP DATABASE IF EXISTS $DB;" + +# sanity check restore can succeed +run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$DB" --checksum=true +echo 'file corruption tests passed' From 03b2bf93e4c1e918973eeb8bc30595c959d605d7 Mon Sep 17 00:00:00 2001 From: Wenqi Mou Date: Mon, 21 Oct 2024 21:34:55 -0400 Subject: [PATCH 03/12] fix tests Signed-off-by: Wenqi Mou --- br/pkg/metautil/metafile.go | 19 ++++++++++++------- br/pkg/restore/snap_client/client.go | 13 +++++++++---- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/br/pkg/metautil/metafile.go b/br/pkg/metautil/metafile.go index 26732f57c9799..dad1a952b9729 100644 --- a/br/pkg/metautil/metafile.go +++ b/br/pkg/metautil/metafile.go @@ -166,11 +166,6 @@ type Table struct { StatsFileIndexes []*backuppb.StatsFileIndex } -// NoChecksum checks whether the table has a calculated checksum. -func (tbl *Table) NoChecksum() bool { - return tbl.Crc64Xor == 0 && tbl.TotalKvs == 0 && tbl.TotalBytes == 0 -} - // MetaReader wraps a reader to read both old and new version of backupmeta. type MetaReader struct { storage storage.ExternalStorage @@ -249,8 +244,18 @@ type ChecksumStats struct { TotalBytes uint64 } -// CalculateChecksumStatsOnAllFiles returns the ChecksumStats for all files -func CalculateChecksumStatsOnAllFiles(files []*backuppb.File) ChecksumStats { +func (stats *ChecksumStats) ChecksumExists() bool { + if stats == nil { + return false + } + if stats.Crc64Xor == 0 && stats.TotalKvs == 0 && stats.TotalBytes == 0 { + return false + } + return true +} + +// CalculateChecksumStatsOnFiles returns the ChecksumStats for the given files +func CalculateChecksumStatsOnFiles(files []*backuppb.File) ChecksumStats { var stats ChecksumStats for _, file := range files { stats.Crc64Xor ^= file.Crc64Xor diff --git a/br/pkg/restore/snap_client/client.go b/br/pkg/restore/snap_client/client.go index 52a73f477bab6..40ca4d13578cf 100644 --- a/br/pkg/restore/snap_client/client.go +++ b/br/pkg/restore/snap_client/client.go @@ -1000,6 +1000,12 @@ func (rc *SnapClient) execAndValidateChecksum( zap.String("table", tbl.OldTable.Info.Name.O), ) + expectedChecksumStats := metautil.CalculateChecksumStatsOnFiles(tbl.OldTable.Files) + if expectedChecksumStats.ChecksumExists() { + logger.Warn("table has no checksum, skipping checksum") + return nil + } + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { span1 := span.Tracer().StartSpan("Client.execAndValidateChecksum", opentracing.ChildOf(span.Context())) defer span1.Finish() @@ -1041,7 +1047,6 @@ func (rc *SnapClient) execAndValidateChecksum( } } } - expectedChecksumStats := metautil.CalculateChecksumStatsOnAllFiles(tbl.OldTable.Files) checksumMatch := item.Crc64xor == expectedChecksumStats.Crc64Xor && item.TotalKvs == expectedChecksumStats.TotalKvs && item.TotalBytes == expectedChecksumStats.TotalBytes @@ -1050,11 +1055,11 @@ func (rc *SnapClient) execAndValidateChecksum( }) if !checksumMatch { logger.Error("failed in validate checksum", - zap.Uint64("origin tidb crc64", expectedChecksumStats.Crc64Xor), + zap.Uint64("expected tidb crc64", expectedChecksumStats.Crc64Xor), zap.Uint64("calculated crc64", item.Crc64xor), - zap.Uint64("origin tidb total kvs", expectedChecksumStats.TotalKvs), + zap.Uint64("expected tidb total kvs", expectedChecksumStats.TotalKvs), zap.Uint64("calculated total kvs", item.TotalKvs), - zap.Uint64("origin tidb total bytes", expectedChecksumStats.TotalBytes), + zap.Uint64("expected tidb total bytes", expectedChecksumStats.TotalBytes), zap.Uint64("calculated total bytes", item.TotalBytes), ) return errors.Annotate(berrors.ErrRestoreChecksumMismatch, "failed to validate checksum") From 134692bbcd2fb3bbc09a5aab5f25283411ca1623 Mon Sep 17 00:00:00 2001 From: Wenqi Mou Date: Mon, 21 Oct 2024 22:49:08 -0400 Subject: [PATCH 04/12] fix again Signed-off-by: Wenqi Mou --- br/pkg/restore/snap_client/client.go | 4 ++-- br/pkg/task/BUILD.bazel | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/br/pkg/restore/snap_client/client.go b/br/pkg/restore/snap_client/client.go index 40ca4d13578cf..809956d93ea88 100644 --- a/br/pkg/restore/snap_client/client.go +++ b/br/pkg/restore/snap_client/client.go @@ -1001,8 +1001,8 @@ func (rc *SnapClient) execAndValidateChecksum( ) expectedChecksumStats := metautil.CalculateChecksumStatsOnFiles(tbl.OldTable.Files) - if expectedChecksumStats.ChecksumExists() { - logger.Warn("table has no checksum, skipping checksum") + if !expectedChecksumStats.ChecksumExists() { + logger.Error("table has no checksum, skipping checksum") return nil } diff --git a/br/pkg/task/BUILD.bazel b/br/pkg/task/BUILD.bazel index 424388f57e573..b578c5265ca8b 100644 --- a/br/pkg/task/BUILD.bazel +++ b/br/pkg/task/BUILD.bazel @@ -114,7 +114,7 @@ go_test( ], embed = [":task"], flaky = True, - shard_count = 39, + shard_count = 40, deps = [ "//br/pkg/backup", "//br/pkg/config", From 9fa90b956a18e8f62f1690972617c9e8f202842f Mon Sep 17 00:00:00 2001 From: Wenqi Mou Date: Wed, 23 Oct 2024 12:17:47 -0400 Subject: [PATCH 05/12] fix Signed-off-by: Wenqi Mou --- br/pkg/task/restore.go | 5 +++-- br/tests/br_file_corruption/run.sh | 29 ++++++++++++++++++++--------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/br/pkg/task/restore.go b/br/pkg/task/restore.go index 3e251568510bc..8a64aa3754ae5 100644 --- a/br/pkg/task/restore.go +++ b/br/pkg/task/restore.go @@ -1108,8 +1108,9 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s errCh := make(chan error, 32) postHandleCh := afterTableRestoredCh(ctx, createdTables) - // pipeline checksum - if cfg.Checksum { + // pipeline checksum only when enabled and is not incremental snapshot repair mode cuz incremental doesn't have + // enough information in backup meta to validate checksum + if cfg.Checksum && !client.IsIncremental() { postHandleCh = client.GoValidateChecksum( ctx, postHandleCh, mgr.GetStorage().GetClient(), errCh, updateCh, cfg.ChecksumConcurrency) } diff --git a/br/tests/br_file_corruption/run.sh b/br/tests/br_file_corruption/run.sh index 7811cf3f7008f..29e5bb4d9dfd3 100644 --- a/br/tests/br_file_corruption/run.sh +++ b/br/tests/br_file_corruption/run.sh @@ -24,14 +24,19 @@ run_sql "CREATE DATABASE $DB;" go-ycsb load mysql -P $CUR/workload -p mysql.host=$TIDB_IP -p mysql.port=$TIDB_PORT -p mysql.user=root -p mysql.db=$DB run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB" --checksum=false -filename=$(find $TEST_DIR/$DB -regex ".*.sst" | head -n 1) -filename_temp=$filename"_temp" -filename_bak=$filename"_bak" -echo "corruption" > $filename_temp -cat $filename >> $filename_temp +# Replace the single file manipulation with a loop over all .sst files +for filename in $(find $TEST_DIR/$DB -name "*.sst"); do + filename_temp="${filename}_temp" + filename_bak="${filename}_bak" + echo "corruption" > "$filename_temp" + cat "$filename" >> "$filename_temp" + mv "$filename" "$filename_bak" +done + +# need to drop db otherwise restore will fail because of cluster not fresh but not the expected issue +run_sql "DROP DATABASE IF EXISTS $DB;" # file lost -mv $filename $filename_bak export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/utils/set-import-attempt-to-one=return(true)" restore_fail=0 run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$DB" || restore_fail=1 @@ -43,8 +48,11 @@ fi run_sql "DROP DATABASE IF EXISTS $DB;" # file corruption -mv $filename_temp $filename -truncate --size=-11 $filename +for filename in $(find $TEST_DIR/$DB -name "*.sst_temp"); do + mv "$filename" "${filename%_temp}" + truncate -s 11 "${filename%_temp}" +done + export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/utils/set-import-attempt-to-one=return(true)" restore_fail=0 run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$DB" || restore_fail=1 @@ -56,7 +64,10 @@ fi run_sql "DROP DATABASE IF EXISTS $DB;" # verify validating checksum is still performed even backup didn't enable it -mv $filename_bak $filename +for filename in $(find $TEST_DIR/$DB -name "*.sst_bak"); do + mv "$filename" "${filename%_bak}" +done + export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/restore/snap_client/full-restore-validate-checksum=return(true)" restore_fail=0 run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$DB" --checksum=true || restore_fail=1 From 3e9053da52021a3ef80684d4710a4e5070f88024 Mon Sep 17 00:00:00 2001 From: Wenqi Mou Date: Fri, 25 Oct 2024 17:49:23 -0400 Subject: [PATCH 06/12] address comments Signed-off-by: Wenqi Mou --- br/pkg/backup/schema.go | 2 +- br/pkg/metautil/metafile.go | 5 +---- br/pkg/restore/snap_client/client.go | 2 +- br/pkg/task/backup.go | 7 ++----- br/pkg/task/backup_test.go | 27 +++++---------------------- br/pkg/task/common.go | 1 - br/tests/br_encryption/run.sh | 10 +++++----- br/tests/br_file_corruption/run.sh | 2 +- 8 files changed, 16 insertions(+), 40 deletions(-) diff --git a/br/pkg/backup/schema.go b/br/pkg/backup/schema.go index 570a60debe317..ecb146c03f145 100644 --- a/br/pkg/backup/schema.go +++ b/br/pkg/backup/schema.go @@ -145,7 +145,7 @@ func (ss *Schemas) BackupSchemas( zap.Uint64("Crc64Xor", schema.crc64xor), zap.Uint64("TotalKvs", schema.totalKvs), zap.Uint64("TotalBytes", schema.totalBytes), - zap.Duration("Time taken", calculateCost)) + zap.Duration("TimeTaken", calculateCost)) } } if statsHandle != nil { diff --git a/br/pkg/metautil/metafile.go b/br/pkg/metautil/metafile.go index dad1a952b9729..b39f12c278062 100644 --- a/br/pkg/metautil/metafile.go +++ b/br/pkg/metautil/metafile.go @@ -244,10 +244,7 @@ type ChecksumStats struct { TotalBytes uint64 } -func (stats *ChecksumStats) ChecksumExists() bool { - if stats == nil { - return false - } +func (stats ChecksumStats) ChecksumExists() bool { if stats.Crc64Xor == 0 && stats.TotalKvs == 0 && stats.TotalBytes == 0 { return false } diff --git a/br/pkg/restore/snap_client/client.go b/br/pkg/restore/snap_client/client.go index 809956d93ea88..a685d31847a55 100644 --- a/br/pkg/restore/snap_client/client.go +++ b/br/pkg/restore/snap_client/client.go @@ -1002,7 +1002,7 @@ func (rc *SnapClient) execAndValidateChecksum( expectedChecksumStats := metautil.CalculateChecksumStatsOnFiles(tbl.OldTable.Files) if !expectedChecksumStats.ChecksumExists() { - logger.Error("table has no checksum, skipping checksum") + logger.Warn("table has no checksum, skipping checksum") return nil } diff --git a/br/pkg/task/backup.go b/br/pkg/task/backup.go index 396fba935b25d..99fbde998b53a 100644 --- a/br/pkg/task/backup.go +++ b/br/pkg/task/backup.go @@ -801,11 +801,8 @@ func DefaultBackupConfig() BackupConfig { log.Panic("infallible operation failed.", zap.Error(err)) } - // Check if the checksum flag was set by the user - if !fs.Changed("checksum") { - // If not set, disable it for backup - cfg.Checksum = false - } + // disable checksum for backup by default + cfg.Checksum = false return cfg } diff --git a/br/pkg/task/backup_test.go b/br/pkg/task/backup_test.go index 30d4463ac38c2..5f2c7fd6c679c 100644 --- a/br/pkg/task/backup_test.go +++ b/br/pkg/task/backup_test.go @@ -3,7 +3,6 @@ package task import ( - "os" "testing" "time" @@ -225,25 +224,9 @@ func TestBackupConfigHash(t *testing.T) { } func TestDefaultBackupConfigDisableChecksum(t *testing.T) { - // Test the default configuration - cfg := DefaultBackupConfig() - - // Check some default values - require.Equal(t, uint32(4), cfg.Concurrency) - require.Equal(t, uint32(2), cfg.ChecksumConcurrency) - require.False(t, cfg.SendCreds) - require.False(t, cfg.Checksum) - - // Test with checksum flag set - os.Args = []string{"cmd", "--checksum=true"} - cfg = DefaultBackupConfig() - require.True(t, cfg.Checksum) - - // Test with checksum flag explicitly set to false - os.Args = []string{"cmd", "--checksum=false"} - cfg = DefaultBackupConfig() - require.False(t, cfg.Checksum) - - // Reset os.Args - os.Args = []string{"cmd"} + backupCfg := DefaultBackupConfig() + require.False(t, backupCfg.Checksum) + + restoreCfg := DefaultRestoreConfig() + require.True(t, restoreCfg.Checksum) } diff --git a/br/pkg/task/common.go b/br/pkg/task/common.go index d7ad3a2da62c7..ebb53968d5fea 100644 --- a/br/pkg/task/common.go +++ b/br/pkg/task/common.go @@ -297,7 +297,6 @@ func DefineCommonFlags(flags *pflag.FlagSet) { flags.Uint(flagChecksumConcurrency, variable.DefChecksumTableConcurrency, "The concurrency of checksumming in one table") flags.Uint64(flagRateLimit, unlimited, "The rate limit of the task, MB/s per node") - // backup will override default to be false, restore will keep default to be true flags.Bool(flagChecksum, true, "Run checksum at end of task") flags.Bool(flagRemoveTiFlash, true, "Remove TiFlash replicas before backup or restore, for unsupported versions of TiFlash") diff --git a/br/tests/br_encryption/run.sh b/br/tests/br_encryption/run.sh index 3934dd3b6103c..f55a0a1cee3bb 100755 --- a/br/tests/br_encryption/run.sh +++ b/br/tests/br_encryption/run.sh @@ -420,15 +420,15 @@ test_mixed_full_plain_log_encrypted() { echo "Operation,Encryption Mode,Duration (seconds)" > "$TEST_DIR/performance_results.csv" # Run tests -test_backup_encrypted_restore_unencrypted -test_plaintext +#test_backup_encrypted_restore_unencrypted +#test_plaintext test_plaintext_data_key -test_local_master_key +#test_local_master_key # localstack not working with older glibc version in our centos7 base image... #test_aws_kms #test_aws_kms_with_iam -test_mixed_full_encrypted_log_plain -test_mixed_full_plain_log_encrypted +#test_mixed_full_encrypted_log_plain +#test_mixed_full_plain_log_encrypted # uncomment for manual GCP KMS testing #test_gcp_kms diff --git a/br/tests/br_file_corruption/run.sh b/br/tests/br_file_corruption/run.sh index 29e5bb4d9dfd3..60907ac2e7a4c 100644 --- a/br/tests/br_file_corruption/run.sh +++ b/br/tests/br_file_corruption/run.sh @@ -50,7 +50,7 @@ run_sql "DROP DATABASE IF EXISTS $DB;" # file corruption for filename in $(find $TEST_DIR/$DB -name "*.sst_temp"); do mv "$filename" "${filename%_temp}" - truncate -s 11 "${filename%_temp}" + truncate -s -11 "${filename%_temp}" done export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/utils/set-import-attempt-to-one=return(true)" From 6b28638a7ae45a102271e392d314aeccdfe703ab Mon Sep 17 00:00:00 2001 From: Wenqi Mou Date: Mon, 28 Oct 2024 13:48:39 -0400 Subject: [PATCH 07/12] address comments Signed-off-by: Wenqi Mou --- br/cmd/br/cmd.go | 6 ++++-- br/cmd/br/main.go | 2 +- br/pkg/task/backup.go | 13 ++++++------- br/pkg/task/backup_test.go | 8 -------- br/pkg/task/common.go | 5 +++++ br/pkg/task/common_test.go | 11 ++++++++--- br/pkg/task/restore.go | 5 ++--- pkg/executor/brie.go | 14 ++++++++++---- pkg/executor/brie_test.go | 9 ++++++--- 9 files changed, 42 insertions(+), 31 deletions(-) diff --git a/br/cmd/br/cmd.go b/br/cmd/br/cmd.go index 02939c78b4676..955553c137ae9 100644 --- a/br/cmd/br/cmd.go +++ b/br/cmd/br/cmd.go @@ -86,8 +86,8 @@ func timestampLogFileName() string { return filepath.Join(os.TempDir(), time.Now().Format("br.log.2006-01-02T15.04.05Z0700")) } -// AddFlags adds flags to the given cmd. -func AddFlags(cmd *cobra.Command) { +// DefineCommonFlags defines the common flags for all BR cmd operation. +func DefineCommonFlags(cmd *cobra.Command) { cmd.Version = build.Info() cmd.Flags().BoolP(flagVersion, flagVersionShort, false, "Display version information about BR") cmd.SetVersionTemplate("{{printf \"%s\" .Version}}\n") @@ -104,6 +104,8 @@ func AddFlags(cmd *cobra.Command) { "Set whether to redact sensitive info in log") cmd.PersistentFlags().String(FlagStatusAddr, "", "Set the HTTP listening address for the status report service. Set to empty string to disable") + + // defines BR task common flags, this is shared by cmd and sql(brie) task.DefineCommonFlags(cmd.PersistentFlags()) cmd.PersistentFlags().StringP(FlagSlowLogFile, "", "", diff --git a/br/cmd/br/main.go b/br/cmd/br/main.go index f745920f5bfba..cad081606a0ea 100644 --- a/br/cmd/br/main.go +++ b/br/cmd/br/main.go @@ -20,7 +20,7 @@ func main() { TraverseChildren: true, SilenceUsage: true, } - AddFlags(rootCmd) + DefineCommonFlags(rootCmd) SetDefaultContext(ctx) rootCmd.AddCommand( NewDebugCommand(), diff --git a/br/pkg/task/backup.go b/br/pkg/task/backup.go index 99fbde998b53a..a8d27f1b410df 100644 --- a/br/pkg/task/backup.go +++ b/br/pkg/task/backup.go @@ -212,9 +212,13 @@ func (cfg *BackupConfig) ParseFromFlags(flags *pflag.FlagSet) error { } cfg.CompressionConfig = *compressionCfg + // parse common flags if err = cfg.Config.ParseFromFlags(flags); err != nil { return errors.Trace(err) } + // override common config specifically for backup use case + cfg.OverrideDefaultForBackup() + cfg.RemoveSchedulers, err = flags.GetBool(flagRemoveSchedulers) if err != nil { return errors.Trace(err) @@ -788,22 +792,17 @@ func ParseTSString(ts string, tzCheck bool) (uint64, error) { return oracle.GoTimeToTS(t1), nil } -func DefaultBackupConfig() BackupConfig { +func DefaultBackupConfig(commonConfig Config) BackupConfig { fs := pflag.NewFlagSet("dummy", pflag.ContinueOnError) - DefineCommonFlags(fs) DefineBackupFlags(fs) cfg := BackupConfig{} err := multierr.Combine( cfg.ParseFromFlags(fs), - cfg.Config.ParseFromFlags(fs), ) if err != nil { log.Panic("infallible operation failed.", zap.Error(err)) } - - // disable checksum for backup by default - cfg.Checksum = false - + cfg.Config = commonConfig return cfg } diff --git a/br/pkg/task/backup_test.go b/br/pkg/task/backup_test.go index 5f2c7fd6c679c..705508811e149 100644 --- a/br/pkg/task/backup_test.go +++ b/br/pkg/task/backup_test.go @@ -222,11 +222,3 @@ func TestBackupConfigHash(t *testing.T) { hashCheck(t, &testCfg, originalHash, true) } } - -func TestDefaultBackupConfigDisableChecksum(t *testing.T) { - backupCfg := DefaultBackupConfig() - require.False(t, backupCfg.Checksum) - - restoreCfg := DefaultRestoreConfig() - require.True(t, restoreCfg.Checksum) -} diff --git a/br/pkg/task/common.go b/br/pkg/task/common.go index ebb53968d5fea..ad64e94157d0d 100644 --- a/br/pkg/task/common.go +++ b/br/pkg/task/common.go @@ -777,6 +777,11 @@ func (cfg *Config) parseAndValidateMasterKeyInfo(hasPlaintextKey bool, flags *pf return nil } +// OverrideDefaultForBackup override common config for backup tasks +func (cfg *Config) OverrideDefaultForBackup() { + cfg.Checksum = false +} + // NewMgr creates a new mgr at the given PD address. func NewMgr(ctx context.Context, g glue.Glue, pds []string, diff --git a/br/pkg/task/common_test.go b/br/pkg/task/common_test.go index 5979ef6eebeeb..a756aefb795e3 100644 --- a/br/pkg/task/common_test.go +++ b/br/pkg/task/common_test.go @@ -229,8 +229,10 @@ func expectedDefaultConfig() Config { } func expectedDefaultBackupConfig() BackupConfig { + defaultConfig := expectedDefaultConfig() + defaultConfig.Checksum = false return BackupConfig{ - Config: expectedDefaultConfig(), + Config: defaultConfig, GCTTL: utils.DefaultBRGCSafePointTTL, CompressionConfig: CompressionConfig{ CompressionType: backup.CompressionType_ZSTD, @@ -270,13 +272,16 @@ func TestDefault(t *testing.T) { } func TestDefaultBackup(t *testing.T) { - def := DefaultBackupConfig() + commonConfig := DefaultConfig() + commonConfig.OverrideDefaultForBackup() + def := DefaultBackupConfig(commonConfig) defaultConfig := expectedDefaultBackupConfig() require.Equal(t, defaultConfig, def) } func TestDefaultRestore(t *testing.T) { - def := DefaultRestoreConfig() + commonConfig := DefaultConfig() + def := DefaultRestoreConfig(commonConfig) defaultConfig := expectedDefaultRestoreConfig() require.Equal(t, defaultConfig, def) } diff --git a/br/pkg/task/restore.go b/br/pkg/task/restore.go index 8a64aa3754ae5..775ce317c105e 100644 --- a/br/pkg/task/restore.go +++ b/br/pkg/task/restore.go @@ -641,20 +641,19 @@ func registerTaskToPD(ctx context.Context, etcdCLI *clientv3.Client) (closeF fun return register.Close, errors.Trace(err) } -func DefaultRestoreConfig() RestoreConfig { +func DefaultRestoreConfig(commonConfig Config) RestoreConfig { fs := pflag.NewFlagSet("dummy", pflag.ContinueOnError) - DefineCommonFlags(fs) DefineRestoreFlags(fs) cfg := RestoreConfig{} err := multierr.Combine( cfg.ParseFromFlags(fs), cfg.RestoreCommonConfig.ParseFromFlags(fs), - cfg.Config.ParseFromFlags(fs), ) if err != nil { log.Panic("infallible failed.", zap.Error(err)) } + cfg.Config = commonConfig return cfg } diff --git a/pkg/executor/brie.go b/pkg/executor/brie.go index c0ec6528f50b9..c9b5a759cbaef 100644 --- a/pkg/executor/brie.go +++ b/pkg/executor/brie.go @@ -283,7 +283,15 @@ func (b *executorBuilder) buildBRIE(s *ast.BRIEStmt, schema *expression.Schema) Key: tidbCfg.Security.ClusterSSLKey, } pds := strings.Split(tidbCfg.Path, ",") + + // build common config and override for specific task if needed cfg := task.DefaultConfig() + switch s.Kind { + case ast.BRIEKindBackup: + cfg.OverrideDefaultForBackup() + default: + } + cfg.PD = pds cfg.TLS = tlsCfg @@ -384,8 +392,7 @@ func (b *executorBuilder) buildBRIE(s *ast.BRIEStmt, schema *expression.Schema) switch s.Kind { case ast.BRIEKindBackup: - bcfg := task.DefaultBackupConfig() - bcfg.Config = cfg + bcfg := task.DefaultBackupConfig(cfg) e.backupCfg = &bcfg for _, opt := range s.Options { @@ -430,8 +437,7 @@ func (b *executorBuilder) buildBRIE(s *ast.BRIEStmt, schema *expression.Schema) } case ast.BRIEKindRestore: - rcfg := task.DefaultRestoreConfig() - rcfg.Config = cfg + rcfg := task.DefaultRestoreConfig(cfg) e.restoreCfg = &rcfg for _, opt := range s.Options { switch opt.Tp { diff --git a/pkg/executor/brie_test.go b/pkg/executor/brie_test.go index 7bfc014a2d215..305cd74417755 100644 --- a/pkg/executor/brie_test.go +++ b/pkg/executor/brie_test.go @@ -147,7 +147,7 @@ func TestFetchShowBRIE(t *testing.T) { require.Equal(t, info2Res, fetchShowBRIEResult(t, e, brieColTypes)) } -func TestBRIEBuilderOPtions(t *testing.T) { +func TestBRIEBuilderOptions(t *testing.T) { sctx := mock.NewContext() sctx.GetSessionVars().User = &auth.UserIdentity{Username: "test"} is := infoschema.MockInfoSchema([]*model.TableInfo{core.MockSignedTable(), core.MockUnsignedTable()}) @@ -156,9 +156,10 @@ func TestBRIEBuilderOPtions(t *testing.T) { ctx := context.Background() p := parser.New() p.SetParserConfig(parser.ParserConfig{EnableWindowFunction: true, EnableStrictDoubleTypeCheck: true}) - failpoint.Enable("github.com/pingcap/tidb/pkg/executor/modifyStore", `return("tikv")`) + err := failpoint.Enable("github.com/pingcap/tidb/pkg/executor/modifyStore", `return("tikv")`) + require.NoError(t, err) defer failpoint.Disable("github.com/pingcap/tidb/pkg/executor/modifyStore") - err := os.WriteFile("/tmp/keyfile", []byte(strings.Repeat("A", 128)), 0644) + err = os.WriteFile("/tmp/keyfile", []byte(strings.Repeat("A", 128)), 0644) require.NoError(t, err) stmt, err := p.ParseOneStmt("BACKUP TABLE `a` TO 'noop://' CHECKSUM_CONCURRENCY = 4 IGNORE_STATS = 1 COMPRESSION_LEVEL = 4 COMPRESSION_TYPE = 'lz4' ENCRYPTION_METHOD = 'aes256-ctr' ENCRYPTION_KEYFILE = '/tmp/keyfile'", "", "") @@ -190,6 +191,7 @@ func TestBRIEBuilderOPtions(t *testing.T) { require.NoError(t, builder.err) e, ok := exec.(*BRIEExec) require.True(t, ok) + require.False(t, e.backupCfg.Checksum) require.Equal(t, uint(4), e.backupCfg.ChecksumConcurrency) require.Equal(t, int32(4), e.backupCfg.CompressionLevel) require.Equal(t, true, e.backupCfg.IgnoreStats) @@ -223,6 +225,7 @@ func TestBRIEBuilderOPtions(t *testing.T) { e, ok = exec.(*BRIEExec) require.True(t, ok) require.Equal(t, uint(4), e.restoreCfg.ChecksumConcurrency) + require.True(t, e.restoreCfg.Checksum) require.True(t, e.restoreCfg.WaitTiflashReady) require.True(t, e.restoreCfg.WithSysTable) require.True(t, e.restoreCfg.LoadStats) From 06276b3558f74e2c2eff6569fc6e96b125126a58 Mon Sep 17 00:00:00 2001 From: Wenqi Mou Date: Mon, 28 Oct 2024 14:01:28 -0400 Subject: [PATCH 08/12] fix bazel Signed-off-by: Wenqi Mou --- br/pkg/task/BUILD.bazel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/br/pkg/task/BUILD.bazel b/br/pkg/task/BUILD.bazel index 6d519e0def8cc..163d28fd6b0e2 100644 --- a/br/pkg/task/BUILD.bazel +++ b/br/pkg/task/BUILD.bazel @@ -115,7 +115,7 @@ go_test( ], embed = [":task"], flaky = True, - shard_count = 40, + shard_count = 39, deps = [ "//br/pkg/backup", "//br/pkg/config", From 668fe5449fecc80b13e5055a251dfe48d158b180 Mon Sep 17 00:00:00 2001 From: Wenqi Mou Date: Mon, 28 Oct 2024 15:46:04 -0400 Subject: [PATCH 09/12] fix Signed-off-by: Wenqi Mou --- br/cmd/br/backup.go | 2 +- br/cmd/br/restore.go | 2 +- br/pkg/task/backup.go | 21 ++++++++++----------- br/pkg/task/restore.go | 20 +++++++++++--------- 4 files changed, 23 insertions(+), 22 deletions(-) diff --git a/br/cmd/br/backup.go b/br/cmd/br/backup.go index 925cf58c132b4..186ad2af5cd38 100644 --- a/br/cmd/br/backup.go +++ b/br/cmd/br/backup.go @@ -22,7 +22,7 @@ import ( func runBackupCommand(command *cobra.Command, cmdName string) error { cfg := task.BackupConfig{Config: task.Config{LogProgress: HasLogFile()}} - if err := cfg.ParseFromFlags(command.Flags()); err != nil { + if err := cfg.ParseFromFlags(command.Flags(), false); err != nil { command.SilenceUsage = false return errors.Trace(err) } diff --git a/br/cmd/br/restore.go b/br/cmd/br/restore.go index f991163813af2..2121105761a36 100644 --- a/br/cmd/br/restore.go +++ b/br/cmd/br/restore.go @@ -25,7 +25,7 @@ import ( func runRestoreCommand(command *cobra.Command, cmdName string) error { cfg := task.RestoreConfig{Config: task.Config{LogProgress: HasLogFile()}} - if err := cfg.ParseFromFlags(command.Flags()); err != nil { + if err := cfg.ParseFromFlags(command.Flags(), false); err != nil { command.SilenceUsage = false return errors.Trace(err) } diff --git a/br/pkg/task/backup.go b/br/pkg/task/backup.go index a8d27f1b410df..f5ac489da8da0 100644 --- a/br/pkg/task/backup.go +++ b/br/pkg/task/backup.go @@ -42,7 +42,6 @@ import ( "github.com/spf13/pflag" "github.com/tikv/client-go/v2/oracle" kvutil "github.com/tikv/client-go/v2/util" - "go.uber.org/multierr" "go.uber.org/zap" ) @@ -159,7 +158,7 @@ func DefineBackupFlags(flags *pflag.FlagSet) { } // ParseFromFlags parses the backup-related flags from the flag set. -func (cfg *BackupConfig) ParseFromFlags(flags *pflag.FlagSet) error { +func (cfg *BackupConfig) ParseFromFlags(flags *pflag.FlagSet, skipCommonConfig bool) error { timeAgo, err := flags.GetDuration(flagBackupTimeago) if err != nil { return errors.Trace(err) @@ -212,12 +211,14 @@ func (cfg *BackupConfig) ParseFromFlags(flags *pflag.FlagSet) error { } cfg.CompressionConfig = *compressionCfg - // parse common flags - if err = cfg.Config.ParseFromFlags(flags); err != nil { - return errors.Trace(err) + // parse common flags if needed + if !skipCommonConfig { + if err = cfg.Config.ParseFromFlags(flags); err != nil { + return errors.Trace(err) + } + // override common config specifically for backup use case + cfg.OverrideDefaultForBackup() } - // override common config specifically for backup use case - cfg.OverrideDefaultForBackup() cfg.RemoveSchedulers, err = flags.GetBool(flagRemoveSchedulers) if err != nil { @@ -796,11 +797,9 @@ func DefaultBackupConfig(commonConfig Config) BackupConfig { fs := pflag.NewFlagSet("dummy", pflag.ContinueOnError) DefineBackupFlags(fs) cfg := BackupConfig{} - err := multierr.Combine( - cfg.ParseFromFlags(fs), - ) + err := cfg.ParseFromFlags(fs, true) if err != nil { - log.Panic("infallible operation failed.", zap.Error(err)) + log.Panic("failed to parse backup flags to config", zap.Error(err)) } cfg.Config = commonConfig return cfg diff --git a/br/pkg/task/restore.go b/br/pkg/task/restore.go index 775ce317c105e..77893aaa913ec 100644 --- a/br/pkg/task/restore.go +++ b/br/pkg/task/restore.go @@ -347,7 +347,7 @@ func (cfg *RestoreConfig) ParseStreamRestoreFlags(flags *pflag.FlagSet) error { } // ParseFromFlags parses the restore-related flags from the flag set. -func (cfg *RestoreConfig) ParseFromFlags(flags *pflag.FlagSet) error { +func (cfg *RestoreConfig) ParseFromFlags(flags *pflag.FlagSet, skipCommonConfig bool) error { var err error cfg.NoSchema, err = flags.GetBool(flagNoSchema) if err != nil { @@ -357,10 +357,15 @@ func (cfg *RestoreConfig) ParseFromFlags(flags *pflag.FlagSet) error { if err != nil { return errors.Trace(err) } - err = cfg.Config.ParseFromFlags(flags) - if err != nil { - return errors.Trace(err) + + // parse common config if needed + if !skipCommonConfig { + err = cfg.Config.ParseFromFlags(flags) + if err != nil { + return errors.Trace(err) + } } + err = cfg.RestoreCommonConfig.ParseFromFlags(flags) if err != nil { return errors.Trace(err) @@ -645,12 +650,9 @@ func DefaultRestoreConfig(commonConfig Config) RestoreConfig { fs := pflag.NewFlagSet("dummy", pflag.ContinueOnError) DefineRestoreFlags(fs) cfg := RestoreConfig{} - err := multierr.Combine( - cfg.ParseFromFlags(fs), - cfg.RestoreCommonConfig.ParseFromFlags(fs), - ) + err := cfg.ParseFromFlags(fs, true) if err != nil { - log.Panic("infallible failed.", zap.Error(err)) + log.Panic("failed to parse restore flags to config", zap.Error(err)) } cfg.Config = commonConfig From ac5a12bdce449e52691c22317cdc738d2b0ac5bc Mon Sep 17 00:00:00 2001 From: Wenqi Mou Date: Mon, 28 Oct 2024 16:52:34 -0400 Subject: [PATCH 10/12] fix int tests Signed-off-by: Wenqi Mou --- br/tests/br_full_ddl/run.sh | 2 +- br/tests/br_full_index/run.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/br/tests/br_full_ddl/run.sh b/br/tests/br_full_ddl/run.sh index 5f9d67184e7d6..70e9b544fabb5 100755 --- a/br/tests/br_full_ddl/run.sh +++ b/br/tests/br_full_ddl/run.sh @@ -125,7 +125,7 @@ echo "backup start with stats..." unset BR_LOG_TO_TERM cluster_index_before_backup=$(run_sql "show variables like '%cluster%';" | awk '{print $2}') -run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB" --log-file $LOG --ignore-stats=false || cat $LOG +run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB" --log-file $LOG --ignore-stats=false --checksum=true || cat $LOG checksum_count=$(cat $LOG | grep "checksum success" | wc -l | xargs) if [ "${checksum_count}" -lt "1" ];then diff --git a/br/tests/br_full_index/run.sh b/br/tests/br_full_index/run.sh index edcac1bfa2377..28f959c10b5f4 100755 --- a/br/tests/br_full_index/run.sh +++ b/br/tests/br_full_index/run.sh @@ -41,7 +41,7 @@ echo "backup start..." # Do not log to terminal unset BR_LOG_TO_TERM # do not backup stats to test whether we can restore without stats. -run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB" --ignore-stats=true --log-file $LOG || cat $LOG +run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB" --ignore-stats=true --log-file $LOG --checksum=true || cat $LOG BR_LOG_TO_TERM=1 checksum_count=$(cat $LOG | grep "checksum success" | wc -l | xargs) From ffd60822543820690956b74ececc22d10fdd26f2 Mon Sep 17 00:00:00 2001 From: Wenqi Mou Date: Mon, 28 Oct 2024 18:39:19 -0400 Subject: [PATCH 11/12] fix again Signed-off-by: Wenqi Mou --- br/cmd/br/backup.go | 8 ++++++++ br/pkg/task/backup.go | 2 -- br/pkg/task/common.go | 8 ++++---- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/br/cmd/br/backup.go b/br/cmd/br/backup.go index 186ad2af5cd38..6525c85d535b9 100644 --- a/br/cmd/br/backup.go +++ b/br/cmd/br/backup.go @@ -26,6 +26,7 @@ func runBackupCommand(command *cobra.Command, cmdName string) error { command.SilenceUsage = false return errors.Trace(err) } + overrideDefaultBackupConfigIfNeeded(&cfg, command) if err := metricsutil.RegisterMetricsForBR(cfg.PD, cfg.KeyspaceName); err != nil { return errors.Trace(err) @@ -211,3 +212,10 @@ func newTxnBackupCommand() *cobra.Command { task.DefineTxnBackupFlags(command) return command } + +func overrideDefaultBackupConfigIfNeeded(config *task.BackupConfig, cmd *cobra.Command) { + // override only if flag not set by user + if !cmd.Flags().Changed(task.FlagChecksum) { + config.Checksum = false + } +} diff --git a/br/pkg/task/backup.go b/br/pkg/task/backup.go index f5ac489da8da0..af92518043a90 100644 --- a/br/pkg/task/backup.go +++ b/br/pkg/task/backup.go @@ -216,8 +216,6 @@ func (cfg *BackupConfig) ParseFromFlags(flags *pflag.FlagSet, skipCommonConfig b if err = cfg.Config.ParseFromFlags(flags); err != nil { return errors.Trace(err) } - // override common config specifically for backup use case - cfg.OverrideDefaultForBackup() } cfg.RemoveSchedulers, err = flags.GetBool(flagRemoveSchedulers) diff --git a/br/pkg/task/common.go b/br/pkg/task/common.go index ad64e94157d0d..08841700936e8 100644 --- a/br/pkg/task/common.go +++ b/br/pkg/task/common.go @@ -64,7 +64,7 @@ const ( flagRateLimit = "ratelimit" flagRateLimitUnit = "ratelimit-unit" flagConcurrency = "concurrency" - flagChecksum = "checksum" + FlagChecksum = "checksum" flagFilter = "filter" flagCaseSensitive = "case-sensitive" flagRemoveTiFlash = "remove-tiflash" @@ -297,7 +297,7 @@ func DefineCommonFlags(flags *pflag.FlagSet) { flags.Uint(flagChecksumConcurrency, variable.DefChecksumTableConcurrency, "The concurrency of checksumming in one table") flags.Uint64(flagRateLimit, unlimited, "The rate limit of the task, MB/s per node") - flags.Bool(flagChecksum, true, "Run checksum at end of task") + flags.Bool(FlagChecksum, true, "Run checksum at end of task") flags.Bool(flagRemoveTiFlash, true, "Remove TiFlash replicas before backup or restore, for unsupported versions of TiFlash") @@ -359,7 +359,7 @@ func DefineCommonFlags(flags *pflag.FlagSet) { // HiddenFlagsForStream temporary hidden flags that stream cmd not support. func HiddenFlagsForStream(flags *pflag.FlagSet) { - _ = flags.MarkHidden(flagChecksum) + _ = flags.MarkHidden(FlagChecksum) _ = flags.MarkHidden(flagLoadStats) _ = flags.MarkHidden(flagChecksumConcurrency) _ = flags.MarkHidden(flagRateLimit) @@ -609,7 +609,7 @@ func (cfg *Config) ParseFromFlags(flags *pflag.FlagSet) error { return errors.Trace(err) } - if cfg.Checksum, err = flags.GetBool(flagChecksum); err != nil { + if cfg.Checksum, err = flags.GetBool(FlagChecksum); err != nil { return errors.Trace(err) } if cfg.ChecksumConcurrency, err = flags.GetUint(flagChecksumConcurrency); err != nil { From 95059bcee43165ae6d3455d33215e743ea35854b Mon Sep 17 00:00:00 2001 From: Wenqi Mou Date: Tue, 29 Oct 2024 11:37:11 -0400 Subject: [PATCH 12/12] revert accidental change Signed-off-by: Wenqi Mou --- br/tests/br_encryption/run.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/br/tests/br_encryption/run.sh b/br/tests/br_encryption/run.sh index f55a0a1cee3bb..3934dd3b6103c 100755 --- a/br/tests/br_encryption/run.sh +++ b/br/tests/br_encryption/run.sh @@ -420,15 +420,15 @@ test_mixed_full_plain_log_encrypted() { echo "Operation,Encryption Mode,Duration (seconds)" > "$TEST_DIR/performance_results.csv" # Run tests -#test_backup_encrypted_restore_unencrypted -#test_plaintext +test_backup_encrypted_restore_unencrypted +test_plaintext test_plaintext_data_key -#test_local_master_key +test_local_master_key # localstack not working with older glibc version in our centos7 base image... #test_aws_kms #test_aws_kms_with_iam -#test_mixed_full_encrypted_log_plain -#test_mixed_full_plain_log_encrypted +test_mixed_full_encrypted_log_plain +test_mixed_full_plain_log_encrypted # uncomment for manual GCP KMS testing #test_gcp_kms