From c6cd7ceb75cce14c56fa46abc05a8e5f59384769 Mon Sep 17 00:00:00 2001 From: Jianjun Liao Date: Wed, 5 Jun 2024 18:21:37 +0800 Subject: [PATCH 1/7] fix br full ddl Signed-off-by: Jianjun Liao --- br/tests/br_full_ddl/run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/br/tests/br_full_ddl/run.sh b/br/tests/br_full_ddl/run.sh index 1572d0ef6fddd..0dae9611004ec 100755 --- a/br/tests/br_full_ddl/run.sh +++ b/br/tests/br_full_ddl/run.sh @@ -89,6 +89,7 @@ run_sql "analyze table $DB.$TABLE;" # } # } +sleep 7 # sleep until analyze update. run_curl https://$TIDB_STATUS_ADDR/stats/dump/$DB/$TABLE | jq '{columns,indices} | map_values(with_entries(del(.value.last_update_version, .value.correlation)))' > $BACKUP_STAT # ensure buckets in stats From c984379f546062e6f27492dd0e3b5c61bf551df9 Mon Sep 17 00:00:00 2001 From: Jianjun Liao Date: Thu, 6 Jun 2024 11:38:12 +0800 Subject: [PATCH 2/7] fix br full ddl Signed-off-by: Jianjun Liao --- br/tests/br_full_ddl/run.sh | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/br/tests/br_full_ddl/run.sh b/br/tests/br_full_ddl/run.sh index 0dae9611004ec..90b5d8a8aadf0 100755 --- a/br/tests/br_full_ddl/run.sh +++ b/br/tests/br_full_ddl/run.sh @@ -23,6 +23,7 @@ RESTORE_LOG=LOG=/$TEST_DIR/restore.log BACKUP_STAT=/$TEST_DIR/backup_stat RESOTRE_STAT=/$TEST_DIR/restore_stat CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +res_file="$TEST_DIR/sql_res.$TEST_NAME.txt" run_sql "CREATE DATABASE $DB;" go-ycsb load mysql -P $CUR/workload -p mysql.host=$TIDB_IP -p mysql.port=$TIDB_PORT -p mysql.user=root -p mysql.db=$DB @@ -39,6 +40,23 @@ for i in $(seq $DDL_COUNT); do fi done +# wait until the index creation/drop is done +retry_cnt=0 +while true; do + run_sql "ADMIN SHOW DDL JOBS WHERE DB_NAME = '$DB' AND TABLE_NAME = '$TABLE' AND STATE != 'sync';" + if grep -Fq "1. row" $res_file; then + cat $res_file + retry_cnt=$((retry_cnt+1)) + if [ "$retry_cnt" -gt 50 ]; then + echo 'the wait lag is too large' + exit 1 + fi + continue + fi + + break +done + # run analyze to generate stats run_sql "analyze table $DB.$TABLE;" # record the stats and remove last_update_version @@ -89,7 +107,6 @@ run_sql "analyze table $DB.$TABLE;" # } # } -sleep 7 # sleep until analyze update. run_curl https://$TIDB_STATUS_ADDR/stats/dump/$DB/$TABLE | jq '{columns,indices} | map_values(with_entries(del(.value.last_update_version, .value.correlation)))' > $BACKUP_STAT # ensure buckets in stats From 40d355eaabaa09a0910c7347620fb63fc8f00835 Mon Sep 17 00:00:00 2001 From: Jianjun Liao Date: Thu, 6 Jun 2024 13:14:33 +0800 Subject: [PATCH 3/7] fix typos Signed-off-by: Jianjun Liao --- br/tests/br_full_ddl/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/br/tests/br_full_ddl/run.sh b/br/tests/br_full_ddl/run.sh index 90b5d8a8aadf0..5f9d67184e7d6 100755 --- a/br/tests/br_full_ddl/run.sh +++ b/br/tests/br_full_ddl/run.sh @@ -43,7 +43,7 @@ done # wait until the index creation/drop is done retry_cnt=0 while true; do - run_sql "ADMIN SHOW DDL JOBS WHERE DB_NAME = '$DB' AND TABLE_NAME = '$TABLE' AND STATE != 'sync';" + run_sql "ADMIN SHOW DDL JOBS WHERE DB_NAME = '$DB' AND TABLE_NAME = '$TABLE' AND STATE != 'synced';" if grep -Fq "1. row" $res_file; then cat $res_file retry_cnt=$((retry_cnt+1)) From b2195b9f0ed12dbd0a1b2863ea0ca87d6e8232d5 Mon Sep 17 00:00:00 2001 From: Jianjun Liao Date: Thu, 22 Aug 2024 14:30:11 +0800 Subject: [PATCH 4/7] add test for corruption snapshot backup file Signed-off-by: Jianjun Liao --- br/pkg/utils/backoff.go | 5 ++++ br/tests/br_file_corruption/run.sh | 41 ++++++++++++++++++++++++++++ br/tests/br_file_corruption/workload | 12 ++++++++ br/tests/br_txn/run.sh | 22 +++++++++++---- 4 files changed, 74 insertions(+), 6 deletions(-) create mode 100644 br/tests/br_file_corruption/run.sh create mode 100644 br/tests/br_file_corruption/workload diff --git a/br/pkg/utils/backoff.go b/br/pkg/utils/backoff.go index 385ed4319a06a..fda272606e5c7 100644 --- a/br/pkg/utils/backoff.go +++ b/br/pkg/utils/backoff.go @@ -207,6 +207,11 @@ func (bo *importerBackoffer) NextBackoff(err error) time.Duration { } } } + failpoint.Inject("set-import-attempt-to-one", func(_ failpoint.Value) { + if bo.attempt > 1 { + bo.attempt = 1 + } + }) if bo.delayTime > bo.maxDelayTime { return bo.maxDelayTime } diff --git a/br/tests/br_file_corruption/run.sh b/br/tests/br_file_corruption/run.sh new file mode 100644 index 0000000000000..6feb08f41e1f6 --- /dev/null +++ b/br/tests/br_file_corruption/run.sh @@ -0,0 +1,41 @@ +#!/bin/sh +# +# Copyright 2024 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eux + +DB="$TEST_NAME" +TABLE="usertable" +CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + +run_sql "CREATE DATABASE $DB;" +go-ycsb load mysql -P $CUR/workload -p mysql.host=$TIDB_IP -p mysql.port=$TIDB_PORT -p mysql.user=root -p mysql.db=$DB +run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB" + +filename=$(find $TEST_DIR/$DB -regex ".*.sst" | head -n 1) +filename_temp=$filename"_temp" +echo "corruption" > $filename_temp +cat $filename >> $filename_temp +mv $filename_temp $filename +truncate --size=-11 $filename + +export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/utils/set-import-attempt-to-one=return(true)" +restore_fail=0 +run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$DB" || restore_fail=1 +export GO_FAILPOINTS="" +if [ $restore_fail -ne 1 ]; then + echo 'restore success' + exit 1 +fi diff --git a/br/tests/br_file_corruption/workload b/br/tests/br_file_corruption/workload new file mode 100644 index 0000000000000..e3fadf9a3d068 --- /dev/null +++ b/br/tests/br_file_corruption/workload @@ -0,0 +1,12 @@ +recordcount=10000 +operationcount=0 +workload=core + +readallfields=true + +readproportion=0 +updateproportion=0 +scanproportion=0 +insertproportion=0 + +requestdistribution=uniform diff --git a/br/tests/br_txn/run.sh b/br/tests/br_txn/run.sh index ff98bcc8fdb7d..567be9d76e263 100755 --- a/br/tests/br_txn/run.sh +++ b/br/tests/br_txn/run.sh @@ -97,12 +97,22 @@ run_test() { # delete data in range[start-key, end-key) clean "hello" "world" # Ensure the data is deleted - checksum_new=$(checksum "hello" "world") - - if [ "$checksum_new" != "$checksum_empty" ];then - echo "failed to delete data in range after backup" - fail_and_exit - fi + retry_cnt=0 + while true; do + checksum_new=$(checksum "hello" "world") + + if [ "$checksum_new" != "$checksum_empty" ]; then + echo "failed to delete data in range after backup; retry_cnt = $retry_cnt" + retry_cnt=$((retry_cnt+1)) + if [ "$retry_cnt" -gt 50 ]; then + fail_and_exit + fi + sleep 1 + continue + fi + + break + done # restore rawkv echo "restore start..." From 7df22d7a27c851a5a17de7fc9660f42325ecc27b Mon Sep 17 00:00:00 2001 From: Jianjun Liao Date: Thu, 22 Aug 2024 14:53:34 +0800 Subject: [PATCH 5/7] add test for corruption log backup file Signed-off-by: Jianjun Liao --- br/tests/br_pitr/run.sh | 20 ++++++++++++++++++++ br/tests/run_group_br_tests.sh | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/br/tests/br_pitr/run.sh b/br/tests/br_pitr/run.sh index 24c084af4191c..859b690d089a2 100644 --- a/br/tests/br_pitr/run.sh +++ b/br/tests/br_pitr/run.sh @@ -157,3 +157,23 @@ if [ $restore_fail -ne 1 ]; then echo 'pitr success' exit 1 fi + +# start a new cluster for corruption +echo "restart a services" +restart_services + +echo "corrupt a log file" +filename=$(find $TEST_DIR/$PREFIX/log -regex ".*.log" | head -n 1) +filename_temp=$filename"_temp" +echo "corruption" > $filename_temp +cat $filename >> $filename_temp +mv $filename_temp $filename +truncate --size=-11 $filename +export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/utils/set-import-attempt-to-one=return(true)" +restore_fail=0 +run_br --pd $PD_ADDR restore point -s "local://$TEST_DIR/$PREFIX/log" --full-backup-storage "local://$TEST_DIR/$PREFIX/full" || restore_fail=1 +export GO_FAILPOINTS="" +if [ $restore_fail -ne 1 ]; then + echo 'pitr success' + exit 1 +fi diff --git a/br/tests/run_group_br_tests.sh b/br/tests/run_group_br_tests.sh index 9fe7fd643a58b..340a9f3a53255 100755 --- a/br/tests/run_group_br_tests.sh +++ b/br/tests/run_group_br_tests.sh @@ -28,7 +28,7 @@ groups=( ["G05"]='br_skip_checksum br_small_batch_size br_split_region_fail br_systables br_table_filter br_txn br_stats br_clustered_index br_crypter' ["G06"]='br_tikv_outage br_tikv_outage3' ["G07"]='br_pitr' - ["G08"]='br_tikv_outage2 br_ttl br_views_and_sequences br_z_gc_safepoint br_autorandom' + ["G08"]='br_tikv_outage2 br_ttl br_views_and_sequences br_z_gc_safepoint br_autorandom br_file_corruption' ) # Get other cases not in groups, to avoid missing any case From 4f1c1bfc2261348a4708bdbd63a318c09d5c0d0a Mon Sep 17 00:00:00 2001 From: Jianjun Liao Date: Thu, 22 Aug 2024 15:59:43 +0800 Subject: [PATCH 6/7] fix integration test Signed-off-by: Jianjun Liao --- br/tests/br_pitr/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/br/tests/br_pitr/run.sh b/br/tests/br_pitr/run.sh index 859b690d089a2..1ae66f17a1a5a 100644 --- a/br/tests/br_pitr/run.sh +++ b/br/tests/br_pitr/run.sh @@ -163,7 +163,7 @@ echo "restart a services" restart_services echo "corrupt a log file" -filename=$(find $TEST_DIR/$PREFIX/log -regex ".*.log" | head -n 1) +filename=$(find $TEST_DIR/$PREFIX/log -regex ".*\.log" | grep -v "schema-meta" | tail -n 1) filename_temp=$filename"_temp" echo "corruption" > $filename_temp cat $filename >> $filename_temp From e6efeb88676eb70cbb3a31b2ad9bd9c1fbb05aa9 Mon Sep 17 00:00:00 2001 From: Jianjun Liao Date: Fri, 30 Aug 2024 10:58:20 +0800 Subject: [PATCH 7/7] add more test Signed-off-by: Jianjun Liao --- br/tests/br_file_corruption/run.sh | 15 ++++++++++++++- br/tests/br_pitr/run.sh | 14 ++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/br/tests/br_file_corruption/run.sh b/br/tests/br_file_corruption/run.sh index 6feb08f41e1f6..35a7698bb9fef 100644 --- a/br/tests/br_file_corruption/run.sh +++ b/br/tests/br_file_corruption/run.sh @@ -26,11 +26,24 @@ run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB" filename=$(find $TEST_DIR/$DB -regex ".*.sst" | head -n 1) filename_temp=$filename"_temp" +filename_bak=$filename"_bak" echo "corruption" > $filename_temp cat $filename >> $filename_temp + +# file lost +mv $filename $filename_bak +export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/utils/set-import-attempt-to-one=return(true)" +restore_fail=0 +run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$DB" || restore_fail=1 +export GO_FAILPOINTS="" +if [ $restore_fail -ne 1 ]; then + echo 'restore success' + exit 1 +fi + +# file corruption mv $filename_temp $filename truncate --size=-11 $filename - export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/utils/set-import-attempt-to-one=return(true)" restore_fail=0 run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$DB" || restore_fail=1 diff --git a/br/tests/br_pitr/run.sh b/br/tests/br_pitr/run.sh index 1ae66f17a1a5a..35143c0afa0b3 100644 --- a/br/tests/br_pitr/run.sh +++ b/br/tests/br_pitr/run.sh @@ -165,8 +165,22 @@ restart_services echo "corrupt a log file" filename=$(find $TEST_DIR/$PREFIX/log -regex ".*\.log" | grep -v "schema-meta" | tail -n 1) filename_temp=$filename"_temp" +filename_bak=$filename"_bak" echo "corruption" > $filename_temp cat $filename >> $filename_temp + +# file lost +mv $filename $filename_bak +export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/utils/set-import-attempt-to-one=return(true)" +restore_fail=0 +run_br --pd $PD_ADDR restore point -s "local://$TEST_DIR/$PREFIX/log" --full-backup-storage "local://$TEST_DIR/$PREFIX/full" || restore_fail=1 +export GO_FAILPOINTS="" +if [ $restore_fail -ne 1 ]; then + echo 'pitr success' + exit 1 +fi + +# file corruption mv $filename_temp $filename truncate --size=-11 $filename export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/utils/set-import-attempt-to-one=return(true)"