Skip to content
This repository has been archived by the owner on Nov 24, 2023. It is now read-only.

Commit

Permalink
Merge branch 'release-2.0' into cherry-pick-1707-to-release-2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
lance6716 committed Jun 6, 2021
2 parents 44e51ce + 75d1e82 commit b636f87
Show file tree
Hide file tree
Showing 211 changed files with 8,889 additions and 3,562 deletions.
267 changes: 1 addition & 266 deletions tests/ha_cases/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,52 +38,6 @@ function test_running() {
echo "[$(date)] <<<<<< finish test_running >>>>>>"
}

function test_multi_task_running() {
echo "[$(date)] <<<<<< start test_multi_task_running >>>>>>"
cleanup
prepare_sql_multi_task
start_multi_tasks_cluster

# make sure task to step in "Sync" stage
run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT3" \
"query-status test" \
"\"stage\": \"Running\"" 2 \
"\"unit\": \"Sync\"" 2
run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT3" \
"query-status test2" \
"\"stage\": \"Running\"" 2 \
"\"unit\": \"Sync\"" 2

echo "use sync_diff_inspector to check full dump loader"
check_sync_diff $WORK_DIR $cur/conf/diff_config.toml
check_sync_diff $WORK_DIR $cur/conf/diff_config_multi_task.toml

echo "flush logs to force rotate binlog file"
run_sql "flush logs;" $MYSQL_PORT1 $MYSQL_PASSWORD1
run_sql "flush logs;" $MYSQL_PORT2 $MYSQL_PASSWORD2

echo "apply increment data before restart dm-worker to ensure entering increment phase"
run_sql_file_withdb $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 $ha_test
run_sql_file_withdb $cur/data/db2.increment.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 $ha_test
run_sql_file_withdb $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 $ha_test2
run_sql_file_withdb $cur/data/db2.increment.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 $ha_test2

sleep 5 # wait for flush checkpoint
echo "use sync_diff_inspector to check increment data"
check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 50 || print_debug_status
check_sync_diff $WORK_DIR $cur/conf/diff_config_multi_task.toml 50 || print_debug_status
echo "[$(date)] <<<<<< finish test_multi_task_running >>>>>>"
}

function print_debug_status() {
run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT3" \
"query-status test" \
"fail me!" 1 &&
run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT3" \
"query-status test2" \
"fail me!" 1 && exit 1
}

function test_join_masters_and_worker {
echo "[$(date)] <<<<<< start test_join_masters_and_worker >>>>>>"
cleanup
Expand Down Expand Up @@ -134,221 +88,6 @@ function test_join_masters_and_worker {
echo "[$(date)] <<<<<< finish test_join_masters_and_worker >>>>>>"
}

function test_kill_master() {
echo "[$(date)] <<<<<< start test_kill_master >>>>>>"
test_running

echo "kill dm-master1"
ps aux | grep dm-master1 | awk '{print $2}' | xargs kill || true
check_port_offline $MASTER_PORT1 20
rm -rf $WORK_DIR/master1/default.master1

echo "waiting 5 seconds"
sleep 5
echo "check task is running"
check_http_alive 127.0.0.1:$MASTER_PORT2/apis/${API_VERSION}/status/test '"stage": "Running"' 10

echo "check master2,3 are running"
run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT2" \
"query-status test" \
"\"stage\": \"Running\"" 2

run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT3" \
"query-status test" \
"\"stage\": \"Running\"" 2

run_sql_file_withdb $cur/data/db1.increment2.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 $ha_test
run_sql_file_withdb $cur/data/db2.increment2.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 $ha_test
sleep 2

echo "use sync_diff_inspector to check increment2 data now!"
check_sync_diff $WORK_DIR $cur/conf/diff_config.toml
echo "[$(date)] <<<<<< finish test_kill_master >>>>>>"
}

function test_kill_and_isolate_worker() {
inject_points=("github.com/pingcap/dm/dm/worker/defaultKeepAliveTTL=return(1)"
"github.com/pingcap/dm/dm/worker/defaultRelayKeepAliveTTL=return(2)"
)
export GO_FAILPOINTS="$(join_string \; ${inject_points[@]})"
echo "[$(date)] <<<<<< start test_kill_and_isolate_worker >>>>>>"
test_running

run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
"start-relay -s $SOURCE_ID2 worker2" \
"\"result\": true" 1

echo "kill dm-worker2"
ps aux | grep dm-worker2 | awk '{print $2}' | xargs kill || true
check_port_offline $WORKER2_PORT 20
rm -rf $WORK_DIR/worker2/relay_log
run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \
"query-status test" \
"\"result\": false" 1

run_dm_worker $WORK_DIR/worker3 $WORKER3_PORT $cur/conf/dm-worker3.toml
check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER3_PORT

echo "wait and check task running"
check_http_alive 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/status/test '"stage": "Running"' 10

run_dm_worker $WORK_DIR/worker4 $WORKER4_PORT $cur/conf/dm-worker4.toml
check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER4_PORT

run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
"start-relay -s $SOURCE_ID2 worker3 worker4" \
"\"result\": true" 1

echo "restart dm-worker3"
ps aux | grep dm-worker3 | awk '{print $2}' | xargs kill || true
check_port_offline $WORKER3_PORT 20
rm -rf $WORK_DIR/worker3/relay_log

echo "wait and check task running"
check_http_alive 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/status/test '"stage": "Running"' 10

run_dm_worker $WORK_DIR/worker3 $WORKER3_PORT $cur/conf/dm-worker3.toml
check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER3_PORT

echo "isolate dm-worker4"
isolate_worker 4 "isolate"
run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \
"query-status test" \
"\"stage\": \"Running\"" 3

echo "isolate dm-worker3"
isolate_worker 3 "isolate"
run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \
"query-status test" \
"\"stage\": \"Running\"" 1 \
"\"result\": false" 1

echo "disable isolate dm-worker4"
isolate_worker 4 "disable_isolate"
run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \
"query-status test" \
"\"stage\": \"Running\"" 3

echo "query-status from all dm-master"
run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \
"query-status test" \
"\"stage\": \"Running\"" 3

run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT2" \
"query-status test" \
"\"stage\": \"Running\"" 3

run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT3" \
"query-status test" \
"\"stage\": \"Running\"" 3

run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \
"pause-task test" \
"\"result\": true" 3

echo "restart worker4"
ps aux | grep dm-worker4 | awk '{print $2}' | xargs kill || true
check_port_offline $WORKER4_PORT 20
rm -rf $WORK_DIR/worker4/relay_log
run_dm_worker $WORK_DIR/worker4 $WORKER4_PORT $cur/conf/dm-worker4.toml
check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER4_PORT

run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \
"resume-task test" \
"\"result\": true" 3

run_sql_file_withdb $cur/data/db1.increment2.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 $ha_test
run_sql_file_withdb $cur/data/db2.increment2.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 $ha_test
sleep 2

echo "use sync_diff_inspector to check increment2 data now!"
check_sync_diff $WORK_DIR $cur/conf/diff_config.toml
echo "[$(date)] <<<<<< finish test_kill_and_isolate_worker >>>>>>"
export GO_FAILPOINTS=""
}

# usage: test_kill_master_in_sync leader
# or: test_kill_master_in_sync follower (default)
function test_kill_master_in_sync() {
echo "[$(date)] <<<<<< start test_kill_master_in_sync >>>>>>"
test_running

echo "start dumping SQLs into source"
load_data $MYSQL_PORT1 $MYSQL_PASSWORD1 "a" &
load_data $MYSQL_PORT2 $MYSQL_PASSWORD2 "b" &

ps aux | grep dm-master1 | awk '{print $2}' | xargs kill || true
check_port_offline $MASTER_PORT1 20

echo "wait and check task running"
sleep 1
check_http_alive 127.0.0.1:$MASTER_PORT2/apis/${API_VERSION}/status/test '"stage": "Running"' 10
run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT2" \
"query-status test" \
"\"stage\": \"Running\"" 2

# waiting for syncing
wait

echo "wait for dm to sync"
sleep 1
echo "use sync_diff_inspector to check data now!"
check_sync_diff $WORK_DIR $cur/conf/diff_config.toml
echo "[$(date)] <<<<<< finish test_kill_master_in_sync >>>>>>"
}

function test_kill_worker_in_sync() {
echo "[$(date)] <<<<<< start test_kill_worker_in_sync >>>>>>"
test_running

echo "start dumping SQLs into source"
load_data $MYSQL_PORT1 $MYSQL_PASSWORD1 "a" &
load_data $MYSQL_PORT2 $MYSQL_PASSWORD2 "b" &

echo "kill dm-worker1"
ps aux | grep dm-worker1 | awk '{print $2}' | xargs kill || true
echo "start worker3"
run_dm_worker $WORK_DIR/worker3 $WORKER3_PORT $cur/conf/dm-worker3.toml
check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER3_PORT

# start-relay halfway
run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
"start-relay -s $SOURCE_ID1 worker3" \
"\"result\": true" 1

echo "kill dm-worker2"
ps aux | grep dm-worker2 | awk '{print $2}' | xargs kill || true
echo "start worker4"
run_dm_worker $WORK_DIR/worker4 $WORKER4_PORT $cur/conf/dm-worker4.toml
check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER4_PORT

echo "wait and check task running"
check_http_alive 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/status/test '"stage": "Running"' 10

echo "query-status from all dm-master"
run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \
"query-status test" \
"\"stage\": \"Running\"" 3

run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT2" \
"query-status test" \
"\"stage\": \"Running\"" 3

run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT3" \
"query-status test" \
"\"stage\": \"Running\"" 3

# waiting for syncing
wait

echo "wait for dm to sync"
sleep 1

echo "use sync_diff_inspector to check data now!"
check_sync_diff $WORK_DIR $cur/conf/diff_config.toml
echo "[$(date)] <<<<<< finish test_kill_worker_in_sync >>>>>>"
}

function test_standalone_running() {
echo "[$(date)] <<<<<< start test_standalone_running >>>>>>"
cleanup
Expand Down Expand Up @@ -535,11 +274,7 @@ function run() {
test_last_bound
test_config_name # TICASE-915, 916, 954, 955
test_join_masters_and_worker # TICASE-928, 930, 931, 961, 932, 957
test_kill_master # TICASE-996, 958
test_kill_and_isolate_worker # TICASE-968, 973, 1002, 975, 969, 972, 974, 970, 971, 976, 978, 988
test_kill_master_in_sync
test_kill_worker_in_sync
test_standalone_running # TICASE-929, 959, 960, 967, 977, 980, 983
test_standalone_running # TICASE-929, 959, 960, 967, 977, 980, 983
}

cleanup_data $ha_test
Expand Down
Loading

0 comments on commit b636f87

Please sign in to comment.