From d378937319f8120f334b2984b7d2f3d18b0dde5d Mon Sep 17 00:00:00 2001 From: "przemyslaw.skibinski@percona.com" Date: Thu, 12 Sep 2019 13:37:55 +0200 Subject: [PATCH] PS-3345: LP #1527463: Waiting for binlog lock Fix 3-way deadlock that can be achieved with 2 slave threads working and parallel and with 1 slave client that executes LOCK BINLOG FOR BACKUP. And the deadlock is: worker0: applying INSERT INTO t1 VALUES(11, NULL); worker1: applying INSERT INTO t1 VALUES(12, NULL); worker1: calls backup_binlog_lock.acquire_protection() worker1: waits for worker0 in wait_for_its_turn() client: executes LOCK BINLOG FOR BACKUP client: waits in backup_binlog_lock.acquire(), but protection is acquired by worker1 worker0: calls backup_binlog_lock.acquire_protection(), but it's blocked by client --- .../percona_rpl_mts_backup_lock_ps3345.result | 37 ++++++++++ ...rcona_rpl_mts_backup_lock_ps3345-slave.opt | 6 ++ .../t/percona_rpl_mts_backup_lock_ps3345.test | 70 +++++++++++++++++++ sql/binlog.cc | 61 +++++++++++++--- sql/binlog.h | 3 +- sql/rpl_slave_commit_order_manager.cc | 8 +++ sql/sql_parse.cc | 8 +++ 7 files changed, 181 insertions(+), 12 deletions(-) create mode 100644 mysql-test/suite/rpl/r/percona_rpl_mts_backup_lock_ps3345.result create mode 100644 mysql-test/suite/rpl/t/percona_rpl_mts_backup_lock_ps3345-slave.opt create mode 100644 mysql-test/suite/rpl/t/percona_rpl_mts_backup_lock_ps3345.test diff --git a/mysql-test/suite/rpl/r/percona_rpl_mts_backup_lock_ps3345.result b/mysql-test/suite/rpl/r/percona_rpl_mts_backup_lock_ps3345.result new file mode 100644 index 000000000000..b73e02405be8 --- /dev/null +++ b/mysql-test/suite/rpl/r/percona_rpl_mts_backup_lock_ps3345.result @@ -0,0 +1,37 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +[connection master] +[connection slave] +include/stop_slave_sql.inc +# +# Case 1: 3-way deadlock with 2 slave threads and 1 slave client +# +[connection master] +CREATE TABLE t1(c1 INT PRIMARY KEY) ENGINE = InnoDB; +SET debug = "+d,set_commit_parent_100"; +INSERT INTO t1 VALUES(11); +INSERT INTO t1 VALUES(12); +INSERT INTO t1 VALUES(13); +INSERT INTO t1 VALUES(14); +[connection slave1] +SET GLOBAL debug = "+d,delay_slave_worker_0"; +include/start_slave_sql.inc +LOCK BINLOG FOR BACKUP; +[connection slave] +SET DEBUG_SYNC = 'now WAIT_FOR finished_delay_slave_worker_0'; +include/assert.inc [Throws error when deadlock is detected] +[connection slave1] +UNLOCK BINLOG; +[connection master] +include/sync_slave_sql_with_master.inc +# +# Deinitialize +# +[connection slave] +SET GLOBAL debug = ""; +SET DEBUG_SYNC = 'RESET'; +[connection master] +DROP TABLE t1; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/percona_rpl_mts_backup_lock_ps3345-slave.opt b/mysql-test/suite/rpl/t/percona_rpl_mts_backup_lock_ps3345-slave.opt new file mode 100644 index 000000000000..73dcaf831146 --- /dev/null +++ b/mysql-test/suite/rpl/t/percona_rpl_mts_backup_lock_ps3345-slave.opt @@ -0,0 +1,6 @@ +--slave-parallel-workers=2 +--slave_parallel_type='LOGICAL_CLOCK' +--slave-transaction-retries=0 +--slave-preserve-commit-order=ON +--innodb_lock_wait_timeout=1000 +--debug-sync-timeout=30 diff --git a/mysql-test/suite/rpl/t/percona_rpl_mts_backup_lock_ps3345.test b/mysql-test/suite/rpl/t/percona_rpl_mts_backup_lock_ps3345.test new file mode 100644 index 000000000000..363a4d23315b --- /dev/null +++ b/mysql-test/suite/rpl/t/percona_rpl_mts_backup_lock_ps3345.test @@ -0,0 +1,70 @@ +################################################################################ +# PS-3345: LP #1527463: Waiting for binlog lock +# +# 3-way deadlock can be achieved with 2 slave threads working in parallel and +# with 1 slave client that executes LOCK BINLOG FOR BACKUP. +# +# And the deadlock is: +# worker0: applying INSERT INTO t1 VALUES(11, NULL); +# worker1: applying INSERT INTO t1 VALUES(12, NULL); +# worker1: calls backup_binlog_lock.acquire_protection() +# worker1: waits for worker0 in wait_for_its_turn() +# client: executes LOCK BINLOG FOR BACKUP +# client: waits in backup_binlog_lock.acquire(), but protection is acquired by worker1 +# worker0: calls backup_binlog_lock.acquire_protection(), but it's blocked by client +# +# This test verifies the deadlock can be found and handled correctly. +# When it finds the deadlock, it throws error. +################################################################################ + +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/have_innodb.inc +--source include/master-slave.inc + +--source include/rpl_connection_slave.inc +--source include/only_mts_slave_parallel_workers.inc +--source include/only_mts_slave_parallel_type_logical_clock.inc +--source include/stop_slave_sql.inc + +--echo # +--echo # Case 1: 3-way deadlock with 2 slave threads and 1 slave client +--echo # +--source include/rpl_connection_master.inc +CREATE TABLE t1(c1 INT PRIMARY KEY) ENGINE = InnoDB; +SET debug = "+d,set_commit_parent_100"; +INSERT INTO t1 VALUES(11); +INSERT INTO t1 VALUES(12); +INSERT INTO t1 VALUES(13); +INSERT INTO t1 VALUES(14); + +--source include/rpl_connection_slave1.inc +SET GLOBAL debug = "+d,delay_slave_worker_0"; +--source include/start_slave_sql.inc +--send LOCK BINLOG FOR BACKUP + +--source include/rpl_connection_slave.inc +SET DEBUG_SYNC = 'now WAIT_FOR finished_delay_slave_worker_0'; + +--let $assert_text= Throws error when deadlock is detected +--let $assert_cond= [ SELECT COUNT(*) FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE = "Waiting for binlog lock" AND INFO = "LOCK BINLOG FOR BACKUP" ] = 0 +--source include/assert.inc + +--source include/rpl_connection_slave1.inc +--reap +UNLOCK BINLOG; + +--source include/rpl_connection_master.inc +--source include/sync_slave_sql_with_master.inc + +--echo # +--echo # Deinitialize +--echo # +--source include/rpl_connection_slave.inc +SET GLOBAL debug = ""; +SET DEBUG_SYNC = 'RESET'; +--source include/rpl_connection_master.inc +DROP TABLE t1; + +--let $rpl_only_running_threads= 1 +--source include/rpl_end.inc diff --git a/sql/binlog.cc b/sql/binlog.cc index 58ea35fc9668..468155092b09 100644 --- a/sql/binlog.cc +++ b/sql/binlog.cc @@ -2700,7 +2700,9 @@ int MYSQL_BIN_LOG::rollback(THD *thd, bool all) DBUG_ASSERT(0); #endif - error= ordered_commit(thd, all, /* skip_commit */ true); + error= prepare_ordered_commit(thd, all, /* skip_commit */ true); + if (!error) + error= ordered_commit(thd); } if (check_write_error(thd)) @@ -9409,6 +9411,10 @@ TC_LOG::enum_result MYSQL_BIN_LOG::commit(THD *thd, bool all) DBUG_RETURN(RESULT_ABORTED); } + int rc= prepare_ordered_commit(thd, all, skip_commit); + if (rc) + DBUG_RETURN(RESULT_INCONSISTENT); + /* Block binlog updates if there's an active BINLOG lock. @@ -9422,6 +9428,33 @@ TC_LOG::enum_result MYSQL_BIN_LOG::commit(THD *thd, bool all) const ulong timeout= thd->variables.lock_wait_timeout; DBUG_PRINT("debug", ("Acquiring binlog protection lock")); + +#ifdef HAVE_REPLICATION + DBUG_EXECUTE_IF("delay_slave_worker_0", { + if (has_commit_order_manager(thd)) + { + Slave_worker *worker= dynamic_cast(thd->rli_slave); + + if (worker->id == 0) + { + static bool skip_first_query= true; + if (!skip_first_query) + { + static const char act[]= "now WAIT_FOR signal.lock_binlog_for_backup"; + DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act))); + + static const char act2[]= "now SIGNAL finished_delay_slave_worker_0"; + DBUG_ASSERT(opt_debug_sync_timeout > 0); + DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act2))); + + DBUG_SET("-d,delay_slave_worker_0"); + } + skip_first_query= !skip_first_query; + } + } + }); +#endif + if (thd->backup_binlog_lock.acquire_protection(thd, MDL_EXPLICIT, timeout)) { @@ -9434,7 +9467,7 @@ TC_LOG::enum_result MYSQL_BIN_LOG::commit(THD *thd, bool all) binlog_prot_acquired= true; } - int rc= ordered_commit(thd, all, skip_commit); + rc= ordered_commit(thd); if (binlog_prot_acquired) { @@ -10101,12 +10134,10 @@ void MYSQL_BIN_LOG::handle_binlog_flush_or_sync_error(THD *thd, be skipped (it is handled by the caller somehow) and @c false otherwise (the normal case). */ -int MYSQL_BIN_LOG::ordered_commit(THD *thd, bool all, bool skip_commit) +int MYSQL_BIN_LOG::prepare_ordered_commit(THD *thd, bool all, + bool skip_commit) { - DBUG_ENTER("MYSQL_BIN_LOG::ordered_commit"); - int flush_error= 0, sync_error= 0; - my_off_t total_bytes= 0; - bool do_rotate= false; + DBUG_ENTER("MYSQL_BIN_LOG::prepare_ordered_commit"); /* These values are used while flushing a transaction, so clear @@ -10167,12 +10198,20 @@ int MYSQL_BIN_LOG::ordered_commit(THD *thd, bool all, bool skip_commit) thd->commit_error= THD::CE_COMMIT_ERROR; DBUG_RETURN(thd->commit_error); } - - if (change_stage(thd, Stage_manager::FLUSH_STAGE, thd, NULL, &LOCK_log)) - DBUG_RETURN(finish_commit(thd)); } - else #endif + + DBUG_RETURN(0); /* no error */ +} + + +int MYSQL_BIN_LOG::ordered_commit(THD *thd) +{ + DBUG_ENTER("MYSQL_BIN_LOG::ordered_commit"); + int flush_error= 0, sync_error= 0; + my_off_t total_bytes= 0; + bool do_rotate= false; + if (change_stage(thd, Stage_manager::FLUSH_STAGE, thd, NULL, &LOCK_log)) { DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d", diff --git a/sql/binlog.h b/sql/binlog.h index 4d77325d38d8..fcef62c502e3 100644 --- a/sql/binlog.h +++ b/sql/binlog.h @@ -672,7 +672,8 @@ class MYSQL_BIN_LOG: public TC_LOG void process_after_commit_stage_queue(THD *thd, THD *first); int process_flush_stage_queue(my_off_t *total_bytes_var, bool *rotate_var, THD **out_queue_var); - int ordered_commit(THD *thd, bool all, bool skip_commit = false); + int prepare_ordered_commit(THD *thd, bool all, bool skip_commit= false); + int ordered_commit(THD *thd); void handle_binlog_flush_or_sync_error(THD *thd, bool need_lock_log); public: int open_binlog(const char *opt_name); diff --git a/sql/rpl_slave_commit_order_manager.cc b/sql/rpl_slave_commit_order_manager.cc index 358a5acd841d..5f29f14be349 100644 --- a/sql/rpl_slave_commit_order_manager.cc +++ b/sql/rpl_slave_commit_order_manager.cc @@ -77,6 +77,14 @@ bool Commit_order_manager::wait_for_its_turn(Slave_worker *worker, DBUG_PRINT("info", ("Worker %lu is waiting for commit signal", worker->id)); + DBUG_EXECUTE_IF("delay_slave_worker_0", { + if (worker->id == 1) + { + static const char act[]= "now SIGNAL signal.w1.wait_for_its_turn"; + DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act))); + } + }); + mysql_mutex_lock(&m_mutex); thd->ENTER_COND(cond, &m_mutex, &stage_worker_waiting_for_its_turn_to_commit, diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 48467a39ecbb..79e45d2abaf2 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -2512,6 +2512,14 @@ bool lock_binlog_for_backup(THD *thd) thd->global_read_lock.is_acquired()) DBUG_RETURN(false); + DBUG_EXECUTE_IF("delay_slave_worker_0", { + static const char act[]= "now WAIT_FOR signal.w1.wait_for_its_turn"; + DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act))); + + static const char act2[]= "now SIGNAL signal.lock_binlog_for_backup"; + DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act2))); + }); + DBUG_RETURN(thd->backup_binlog_lock.acquire(thd)); }