Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

br: wait tiflash replicas ready && fix unstable test (#46301) #46342

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions br/pkg/restore/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -1529,11 +1529,28 @@ func (rc *Client) GoWaitTiFlashReady(ctx context.Context, inCh <-chan *CreatedTa
zap.Stringer("table", tbl.OldTable.Info.Name),
zap.Stringer("db", tbl.OldTable.DB.Name))
for {
progress, err := infosync.CalculateTiFlashProgress(tbl.Table.ID, tbl.Table.TiFlashReplica.Count, tiFlashStores)
if err != nil {
log.Warn("failed to get tiflash replica progress, wait for next retry", zap.Error(err))
time.Sleep(time.Second)
continue
var progress float64
if pi := tbl.Table.GetPartitionInfo(); pi != nil && len(pi.Definitions) > 0 {
for _, p := range pi.Definitions {
progressOfPartition, err := infosync.MustGetTiFlashProgress(p.ID, tbl.Table.TiFlashReplica.Count, &tiFlashStores)
if err != nil {
log.Warn("failed to get progress for tiflash partition replica, retry it",
zap.Int64("tableID", tbl.Table.ID), zap.Int64("partitionID", p.ID), zap.Error(err))
time.Sleep(time.Second)
continue
}
progress += progressOfPartition
}
progress = progress / float64(len(pi.Definitions))
} else {
var err error
progress, err = infosync.MustGetTiFlashProgress(tbl.Table.ID, tbl.Table.TiFlashReplica.Count, &tiFlashStores)
if err != nil {
log.Warn("failed to get progress for tiflash replica, retry it",
zap.Int64("tableID", tbl.Table.ID), zap.Error(err))
time.Sleep(time.Second)
continue
}
}
// check until progress is 1
if progress == 1 {
Expand Down
6 changes: 1 addition & 5 deletions br/pkg/task/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,11 +191,7 @@ type RestoreConfig struct {
PitrBatchSize uint32 `json:"pitr-batch-size" toml:"pitr-batch-size"`
PitrConcurrency uint32 `json:"-" toml:"-"`

UseCheckpoint bool `json:"use-checkpoint" toml:"use-checkpoint"`
checkpointSnapshotRestoreTaskName string `json:"-" toml:"-"`
checkpointLogRestoreTaskName string `json:"-" toml:"-"`
checkpointTaskInfoClusterID uint64 `json:"-" toml:"-"`
WaitTiflashReady bool `json:"wait-tiflash-ready" toml:"wait-tiflash-ready"`
WaitTiflashReady bool `json:"wait-tiflash-ready" toml:"wait-tiflash-ready"`

// for ebs-based restore
FullBackupType FullBackupType `json:"full-backup-type" toml:"full-backup-type"`
Expand Down
15 changes: 15 additions & 0 deletions br/tests/br_tiflash/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,29 @@ run_sql "CREATE DATABASE $DB"

run_sql "CREATE TABLE $DB.kv(k varchar(256) primary key, v int)"

run_sql "CREATE TABLE $DB.partition_kv(\
k INT, \
v INT, \
PRIMARY KEY(k) CLUSTERED \
) PARTITION BY RANGE(k) (\
PARTITION p0 VALUES LESS THAN (200), \
PARTITION p1 VALUES LESS THAN (400), \
PARTITION p2 VALUES LESS THAN MAXVALUE)"

stmt="INSERT INTO $DB.kv(k, v) VALUES ('1-record', 1)"
parition_stmt="INSERT INTO $DB.partition_kv(k, v) VALUES (1, 1)"
for i in $(seq 2 $RECORD_COUNT); do
stmt="$stmt,('$i-record', $i)"
parition_stmt="$parition_stmt,($i, $i)"
done
run_sql "$stmt"
run_sql "$parition_stmt"

if ! run_sql "ALTER TABLE $DB.kv SET TIFLASH REPLICA 1"; then
# 10s should be enough for tiflash-proxy get started
sleep 10
run_sql "ALTER TABLE $DB.kv SET TIFLASH REPLICA 1"
run_sql "ALTER TABLE $DB.partition_kv SET TIFLASH REPLICA 1"
fi


Expand All @@ -54,6 +67,8 @@ run_sql "DROP DATABASE $DB"
run_br restore full -s "local://$TEST_DIR/$DB" --pd $PD_ADDR --wait-tiflash-ready=true

# check TiFlash sync
echo "wait 3 seconds for tiflash tick puller triggered"
sleep 3
if ! [ $(run_sql "select * from information_schema.tiflash_replica" | grep "PROGRESS" | sed "s/[^0-9]//g") -eq 1 ]; then
echo "restore didn't wait tiflash synced after set --wait-tiflash-ready=true."
exit 1
Expand Down