From d7ce2f2faa1da3177a0f0a7e825f6e8fccd13ec8 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 23 Aug 2023 16:23:04 +0800 Subject: [PATCH] br: wait tiflash replicas ready && fix unstable test (#46301) (#46342) close pingcap/tidb#46302 --- br/pkg/restore/client.go | 27 ++++++++++++++++++++++----- br/pkg/task/restore.go | 6 +----- br/tests/br_tiflash/run.sh | 15 +++++++++++++++ 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/br/pkg/restore/client.go b/br/pkg/restore/client.go index 76ca0c672782a..422c4cce3f63e 100644 --- a/br/pkg/restore/client.go +++ b/br/pkg/restore/client.go @@ -1529,11 +1529,28 @@ func (rc *Client) GoWaitTiFlashReady(ctx context.Context, inCh <-chan *CreatedTa zap.Stringer("table", tbl.OldTable.Info.Name), zap.Stringer("db", tbl.OldTable.DB.Name)) for { - progress, err := infosync.CalculateTiFlashProgress(tbl.Table.ID, tbl.Table.TiFlashReplica.Count, tiFlashStores) - if err != nil { - log.Warn("failed to get tiflash replica progress, wait for next retry", zap.Error(err)) - time.Sleep(time.Second) - continue + var progress float64 + if pi := tbl.Table.GetPartitionInfo(); pi != nil && len(pi.Definitions) > 0 { + for _, p := range pi.Definitions { + progressOfPartition, err := infosync.MustGetTiFlashProgress(p.ID, tbl.Table.TiFlashReplica.Count, &tiFlashStores) + if err != nil { + log.Warn("failed to get progress for tiflash partition replica, retry it", + zap.Int64("tableID", tbl.Table.ID), zap.Int64("partitionID", p.ID), zap.Error(err)) + time.Sleep(time.Second) + continue + } + progress += progressOfPartition + } + progress = progress / float64(len(pi.Definitions)) + } else { + var err error + progress, err = infosync.MustGetTiFlashProgress(tbl.Table.ID, tbl.Table.TiFlashReplica.Count, &tiFlashStores) + if err != nil { + log.Warn("failed to get progress for tiflash replica, retry it", + zap.Int64("tableID", tbl.Table.ID), zap.Error(err)) + time.Sleep(time.Second) + continue + } } // check until progress is 1 if progress == 1 { diff --git a/br/pkg/task/restore.go b/br/pkg/task/restore.go index 65b7c7c2520b7..9125d47d470fe 100644 --- a/br/pkg/task/restore.go +++ b/br/pkg/task/restore.go @@ -191,11 +191,7 @@ type RestoreConfig struct { PitrBatchSize uint32 `json:"pitr-batch-size" toml:"pitr-batch-size"` PitrConcurrency uint32 `json:"-" toml:"-"` - UseCheckpoint bool `json:"use-checkpoint" toml:"use-checkpoint"` - checkpointSnapshotRestoreTaskName string `json:"-" toml:"-"` - checkpointLogRestoreTaskName string `json:"-" toml:"-"` - checkpointTaskInfoClusterID uint64 `json:"-" toml:"-"` - WaitTiflashReady bool `json:"wait-tiflash-ready" toml:"wait-tiflash-ready"` + WaitTiflashReady bool `json:"wait-tiflash-ready" toml:"wait-tiflash-ready"` // for ebs-based restore FullBackupType FullBackupType `json:"full-backup-type" toml:"full-backup-type"` diff --git a/br/tests/br_tiflash/run.sh b/br/tests/br_tiflash/run.sh index b416b05a74f58..6161fed2eeec8 100644 --- a/br/tests/br_tiflash/run.sh +++ b/br/tests/br_tiflash/run.sh @@ -23,16 +23,29 @@ run_sql "CREATE DATABASE $DB" run_sql "CREATE TABLE $DB.kv(k varchar(256) primary key, v int)" +run_sql "CREATE TABLE $DB.partition_kv(\ + k INT, \ + v INT, \ + PRIMARY KEY(k) CLUSTERED \ +) PARTITION BY RANGE(k) (\ + PARTITION p0 VALUES LESS THAN (200), \ + PARTITION p1 VALUES LESS THAN (400), \ + PARTITION p2 VALUES LESS THAN MAXVALUE)" + stmt="INSERT INTO $DB.kv(k, v) VALUES ('1-record', 1)" +parition_stmt="INSERT INTO $DB.partition_kv(k, v) VALUES (1, 1)" for i in $(seq 2 $RECORD_COUNT); do stmt="$stmt,('$i-record', $i)" + parition_stmt="$parition_stmt,($i, $i)" done run_sql "$stmt" +run_sql "$parition_stmt" if ! run_sql "ALTER TABLE $DB.kv SET TIFLASH REPLICA 1"; then # 10s should be enough for tiflash-proxy get started sleep 10 run_sql "ALTER TABLE $DB.kv SET TIFLASH REPLICA 1" + run_sql "ALTER TABLE $DB.partition_kv SET TIFLASH REPLICA 1" fi @@ -54,6 +67,8 @@ run_sql "DROP DATABASE $DB" run_br restore full -s "local://$TEST_DIR/$DB" --pd $PD_ADDR --wait-tiflash-ready=true # check TiFlash sync +echo "wait 3 seconds for tiflash tick puller triggered" +sleep 3 if ! [ $(run_sql "select * from information_schema.tiflash_replica" | grep "PROGRESS" | sed "s/[^0-9]//g") -eq 1 ]; then echo "restore didn't wait tiflash synced after set --wait-tiflash-ready=true." exit 1