Skip to content

Commit 7ba5159

Browse files
abhinav04sharmainikep
authored andcommitted
FB8-131: Option to shutdown master if semi-sync is being switched off while there are active un-acked trxns (facebook#966) (facebook#966)
Summary: Jira issue: https://jira.percona.com/browse/FB8-131 Reference Patch: facebook@2d47615 Reference Patch: facebook@d1ea741 Reference Patch: facebook@f5085f4 Reference Patch: facebook@9204c65 Option to shutdown master if semi-sync is being switched off while there are active un-acked trxns Semi-sync master can be switched off manually or because of a timeout while waiting for an ack. This change introduces a new variable rpl_semi_sync_master_crash_if_active_trxs which when set to true will cause the master to shutdown (after printing an error msg) if semi-sync is being switched off while there are active un-acked transactions. This prevents any un-acked trx from committing on the master. Also delete pid file when crashing after timeout with unacked trx. If rpl_semi_sync_master_crash_if_active_trxs = true, the server crashes after a timeout if there are any active unacked transactions. mysqld_safe expects the pid file to be deleted if the expectation is to not bring the intance back up again. Pull Request resolved: facebook#966 Differential Revision: D14308888 Pulled By: hermanlee
1 parent 559991f commit 7ba5159

21 files changed

+289
-10
lines changed

mysql-test/include/plugin.defs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@ qa_auth_client plugin_output_directory no PLUGIN_AUTH_
3838
udf_example plugin_output_directory no UDF_EXAMPLE_LIB
3939
ha_example plugin_output_directory no EXAMPLE_PLUGIN EXAMPLE
4040
conflicting_variables plugin_output_directory no CONFLICTING_VARIABLES
41-
semisync_source plugin_output_directory no SEMISYNC_SOURCE_PLUGIN
42-
semisync_replica plugin_output_directory no SEMISYNC_REPLICA_PLUGIN
43-
semisync_master plugin_output_directory no SEMISYNC_MASTER_PLUGIN
44-
semisync_slave plugin_output_directory no SEMISYNC_SLAVE_PLUGIN
41+
semisync_source plugin_output_directory no SEMISYNC_SOURCE_PLUGIN rpl_semi_sync_source
42+
semisync_replica plugin_output_directory no SEMISYNC_REPLICA_PLUGIN rpl_semi_sync_replica
43+
semisync_master plugin_output_directory no SEMISYNC_MASTER_PLUGIN rpl_semi_sync_master
44+
semisync_slave plugin_output_directory no SEMISYNC_SLAVE_PLUGIN rpl_semi_sync_slave
4545
ha_archive plugin_output_directory no ARCHIVE_PLUGIN
4646
ha_blackhole plugin_output_directory no BLACKHOLE_PLUGIN
4747
ha_federated plugin_output_directory no FEDERATED_PLUGIN

mysql-test/suite/binlog_nogtid/r/binlog_persist_only_variables.result

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ VARIABLE_NAME LIKE '%source%') AND
2525
(VARIABLE_NAME NOT IN ('innodb_api_enable_binlog',
2626
'binlog_file_basedir', 'binlog_index_basedir',
2727
'skip_flush_master_info', 'skip_flush_relay_worker_info',
28-
'read_only_slave', 'reset_seconds_behind_master',
28+
'read_only_slave', 'rpl_semi_sync_source_crash_if_active_trxs',
29+
'reset_seconds_behind_master',
2930
'innodb_master_thread_disabled_debug', 'innodb_replication_delay'))
3031
AND (VARIABLE_NAME NOT LIKE 'rocksdb%')
3132
ORDER BY VARIABLE_NAME;

mysql-test/suite/binlog_nogtid/r/binlog_persist_variables.result

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ VARIABLE_NAME LIKE '%source%') AND
2525
(VARIABLE_NAME NOT IN ('innodb_api_enable_binlog',
2626
'binlog_file_basedir', 'binlog_index_basedir',
2727
'skip_flush_master_info', 'skip_flush_relay_worker_info',
28-
'read_only_slave', 'reset_seconds_behind_master',
28+
'read_only_slave', 'rpl_semi_sync_source_crash_if_active_trxs',
29+
'reset_seconds_behind_master',
2930
'innodb_master_thread_disabled_debug', 'innodb_replication_delay'))
3031
AND (VARIABLE_NAME NOT LIKE 'rocksdb%')
3132
ORDER BY VARIABLE_NAME;

mysql-test/suite/binlog_nogtid/t/binlog_persist_only_variables.test

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ INSERT INTO rplvars (varname, varvalue)
5555
(VARIABLE_NAME NOT IN ('innodb_api_enable_binlog',
5656
'binlog_file_basedir', 'binlog_index_basedir',
5757
'skip_flush_master_info', 'skip_flush_relay_worker_info',
58-
'read_only_slave', 'reset_seconds_behind_master',
58+
'read_only_slave', 'rpl_semi_sync_source_crash_if_active_trxs',
59+
'reset_seconds_behind_master',
5960
'innodb_master_thread_disabled_debug', 'innodb_replication_delay'))
6061
AND (VARIABLE_NAME NOT LIKE 'rocksdb%')
6162
ORDER BY VARIABLE_NAME;

mysql-test/suite/binlog_nogtid/t/binlog_persist_variables.test

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ INSERT INTO rplvars (varname, varvalue)
5656
(VARIABLE_NAME NOT IN ('innodb_api_enable_binlog',
5757
'binlog_file_basedir', 'binlog_index_basedir',
5858
'skip_flush_master_info', 'skip_flush_relay_worker_info',
59-
'read_only_slave', 'reset_seconds_behind_master',
59+
'read_only_slave', 'rpl_semi_sync_source_crash_if_active_trxs',
60+
'reset_seconds_behind_master',
6061
'innodb_master_thread_disabled_debug', 'innodb_replication_delay'))
6162
AND (VARIABLE_NAME NOT LIKE 'rocksdb%')
6263
ORDER BY VARIABLE_NAME;
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
include/master-slave.inc
2+
Warnings:
3+
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
4+
Note #### Storing MySQL user name or password information in the connection metadata repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START REPLICA; see the 'START REPLICA Syntax' in the MySQL Manual for more information.
5+
[connection master]
6+
call mtr.add_suppression("Timeout waiting for reply of binlog");
7+
call mtr.add_suppression("Force shutdown: Semi-sync master is being switched off while there are active un-acked transactions");
8+
call mtr.add_suppression("A message intended for a client cannot be sent there as no client-session is attached");
9+
[connection master]
10+
create table t1(a int) engine=innodb;
11+
include/sync_slave_sql_with_master.inc
12+
include/stop_slave.inc
13+
SET GLOBAL DEBUG='+d,dont_send_semi_sync_reply';
14+
CHANGE REPLICATION SOURCE TO SOURCE_RETRY_COUNT=0;
15+
include/start_slave.inc
16+
[connection master]
17+
insert into t1 values(1);
18+
ERROR HY000: Lost connection to MySQL server during query
19+
[connection slave]
20+
SET GLOBAL DEBUG='-d,dont_send_semi_sync_reply';
21+
# Restart the master server
22+
include/rpl_start_server.inc [server_number=1]
23+
[connection master]
24+
include/sync_slave_sql_with_master.inc
25+
include/stop_slave.inc
26+
SET GLOBAL DEBUG='+d,dont_send_semi_sync_reply';
27+
include/start_slave.inc
28+
[connection master]
29+
set @@global.rpl_semi_sync_source_timeout= 1000000;
30+
insert into t1 values(2);
31+
[connection slave]
32+
[connection master1]
33+
set @@global.rpl_semi_sync_source_timeout= 0;
34+
ERROR HY000: Lost connection to MySQL server during query
35+
[connection master]
36+
ERROR HY000: Lost connection to MySQL server during query
37+
[connection slave]
38+
SET GLOBAL DEBUG='-d,dont_send_semi_sync_reply';
39+
# Restart the master server
40+
include/rpl_start_server.inc [server_number=1]
41+
[connection master]
42+
include/sync_slave_sql_with_master.inc
43+
"Table at the master"
44+
[connection master]
45+
select * from t1;
46+
a
47+
1
48+
2
49+
"Table at the slave"
50+
[connection slave]
51+
select * from t1;
52+
a
53+
1
54+
2
55+
[connection master]
56+
set @save.rpl_semi_sync_source_timeout = @@global.rpl_semi_sync_source_timeout;
57+
set @@global.rpl_semi_sync_source_timeout= 0;
58+
set @@global.rpl_semi_sync_source_timeout = @save.rpl_semi_sync_source_timeout;
59+
[connection master]
60+
drop table t1;
61+
include/rpl_end.inc
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
$SEMISYNC_SOURCE_PLUGIN_OPT $SEMISYNC_SOURCE_PLUGIN_LOAD
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
$SEMISYNC_REPLICA_PLUGIN_OPT $SEMISYNC_REPLICA_PLUGIN_LOAD
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
!include ../my.cnf
2+
[mysqld.1]
3+
rpl_semi_sync_source_enabled=1
4+
rpl_semi_sync_source_timeout=1000 # 1 second
5+
rpl_semi_sync_source_crash_if_active_trxs=1
6+
[mysqld.2]
7+
rpl_semi_sync_replica_enabled=1
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
source include/not_valgrind.inc;
2+
source include/have_debug.inc;
3+
source include/master-slave.inc;
4+
source include/not_asan.inc;
5+
6+
7+
call mtr.add_suppression("Timeout waiting for reply of binlog");
8+
call mtr.add_suppression("Force shutdown: Semi-sync master is being switched off while there are active un-acked transactions");
9+
call mtr.add_suppression("A message intended for a client cannot be sent there as no client-session is attached");
10+
11+
--source include/rpl_connection_master.inc
12+
create table t1(a int) engine=innodb;
13+
--source include/sync_slave_sql_with_master.inc
14+
15+
# Disable ack on slave
16+
source include/stop_slave.inc;
17+
SET GLOBAL DEBUG='+d,dont_send_semi_sync_reply';
18+
CHANGE REPLICATION SOURCE TO SOURCE_RETRY_COUNT=0;
19+
source include/start_slave.inc;
20+
21+
##
22+
## Ack timeout when there are active un-acked trxs should shutdown the master
23+
##
24+
--source include/rpl_connection_master.inc
25+
enable_reconnect;
26+
exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
27+
# The master will die while executing this stmt because slave will not ack it
28+
error CR_SERVER_LOST;
29+
insert into t1 values(1);
30+
31+
# Wait for master to die
32+
source include/wait_until_disconnected.inc;
33+
34+
# Resume normal slave operations i.e start acking
35+
--source include/rpl_connection_slave.inc
36+
SET GLOBAL DEBUG='-d,dont_send_semi_sync_reply';
37+
38+
# Restart master
39+
echo # Restart the master server;
40+
let $rpl_server_number= 1;
41+
source include/rpl_start_server.inc;
42+
disable_reconnect;
43+
44+
--source include/rpl_connection_master.inc
45+
--source include/sync_slave_sql_with_master.inc
46+
47+
##
48+
## Turning off semi-sync when there are active un-acked trxs should shutdown the
49+
## master
50+
##
51+
52+
# Disable ack on slave
53+
source include/stop_slave.inc;
54+
SET GLOBAL DEBUG='+d,dont_send_semi_sync_reply';
55+
source include/start_slave.inc;
56+
57+
--source include/rpl_connection_master.inc
58+
# Set timeout to a large value so that we have a chance to switch semi-sync off
59+
# manually
60+
set @@global.rpl_semi_sync_source_timeout= 1000000;
61+
send insert into t1 values(2); # this stmt will hang
62+
63+
# Wait till the insert is processed on the slave
64+
--source include/rpl_connection_slave.inc
65+
let $wait_condition= SELECT count(*) = 1 from t1 where a = 2;
66+
source include/wait_condition.inc;
67+
68+
--source include/rpl_connection_master1.inc
69+
enable_reconnect;
70+
exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
71+
--shutdown_server 0
72+
--error CR_SERVER_LOST
73+
set @@global.rpl_semi_sync_source_timeout= 0; # master will shutdown
74+
75+
# Wait for master to die
76+
source include/wait_until_disconnected.inc;
77+
78+
--source include/rpl_connection_master.inc
79+
error CR_SERVER_LOST;
80+
reap;
81+
82+
# Resume normal slave operations i.e start acking
83+
--source include/rpl_connection_slave.inc
84+
SET GLOBAL DEBUG='-d,dont_send_semi_sync_reply';
85+
86+
# Restart master
87+
echo # Restart the master server;
88+
let $rpl_server_number= 1;
89+
source include/rpl_start_server.inc;
90+
disable_reconnect;
91+
92+
--source include/rpl_connection_master.inc
93+
--source include/sync_slave_sql_with_master.inc
94+
95+
# Will contain 2 rows because master recovery rolls trxs forward
96+
echo "Table at the master";
97+
--source include/rpl_connection_master.inc
98+
select * from t1;
99+
100+
# Will contain 2 rows because slaves received all trxs
101+
echo "Table at the slave";
102+
--source include/rpl_connection_slave.inc
103+
select * from t1;
104+
105+
##
106+
## Turning off semi-sync when there are no active un-acked trxs should succeed
107+
##
108+
--source include/rpl_connection_master.inc
109+
set @save.rpl_semi_sync_source_timeout = @@global.rpl_semi_sync_source_timeout;
110+
set @@global.rpl_semi_sync_source_timeout= 0; # should not fail
111+
set @@global.rpl_semi_sync_source_timeout = @save.rpl_semi_sync_source_timeout;
112+
113+
# Cleanup
114+
--source include/rpl_connection_master.inc
115+
drop table t1;
116+
117+
118+
source include/rpl_end.inc;

0 commit comments

Comments
 (0)