Skip to content

Commit 2d47615

Browse files
Option to shutdown master if semi-sync is being switched off while there are active un-acked trxns
Summary: Semi-sync master can be switched off manually or because of a timeout while waiting for an ack. This change introduces a new variable rpl_semi_sync_master_crash_if_active_trxs which when set to true will cause the master to shutdown (after printing an error msg) if semi-sync is being switched off while there are active un-acked transactions. This prevents any un-acked trx from committing on the master. Reviewed By: hermanlee Differential Revision: D5841339 fbshipit-source-id: c378441
1 parent 3e4b33b commit 2d47615

9 files changed

+241
-0
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
include/master-slave.inc
2+
Warnings:
3+
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
4+
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
5+
[connection master]
6+
call mtr.add_suppression("Timeout waiting for reply of binlog*");
7+
call mtr.add_suppression("Force shutdown: Semi-sync master is being switched off while there are active un-acked transactions");
8+
create table t1(a int) engine=innodb;
9+
include/stop_slave.inc
10+
SET GLOBAL DEBUG='+d,dont_send_semi_sync_reply';
11+
include/start_slave.inc
12+
insert into t1 values(1);
13+
ERROR HY000: Lost connection to MySQL server during query
14+
SET GLOBAL DEBUG='-d,dont_send_semi_sync_reply';
15+
# Restart the master server
16+
include/rpl_start_server.inc [server_number=1]
17+
include/stop_slave.inc
18+
SET GLOBAL DEBUG='+d,dont_send_semi_sync_reply';
19+
include/start_slave.inc
20+
set @@global.rpl_semi_sync_master_timeout= 1000000;
21+
insert into t1 values(2);
22+
set @@global.rpl_semi_sync_master_enabled= OFF;
23+
ERROR HY000: Lost connection to MySQL server during query
24+
ERROR HY000: Lost connection to MySQL server during query
25+
SET GLOBAL DEBUG='-d,dont_send_semi_sync_reply';
26+
# Restart the master server
27+
include/rpl_start_server.inc [server_number=1]
28+
"Table at the master"
29+
select * from t1;
30+
a
31+
1
32+
2
33+
"Table at the slave"
34+
select * from t1;
35+
a
36+
1
37+
2
38+
set @@global.rpl_semi_sync_master_enabled= OFF;
39+
set @@global.rpl_semi_sync_master_enabled= ON;
40+
drop table t1;
41+
include/rpl_end.inc
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
!include ../my.cnf
2+
[mysqld.1]
3+
rpl_semi_sync_master_enabled=1
4+
rpl_semi_sync_master_timeout=1000 # 1 second
5+
rpl_semi_sync_master_crash_if_active_trxs=1
6+
[mysqld.2]
7+
rpl_semi_sync_slave_enabled=1
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
source include/master-slave.inc;
2+
source include/have_debug.inc;
3+
4+
5+
call mtr.add_suppression("Timeout waiting for reply of binlog*");
6+
call mtr.add_suppression("Force shutdown: Semi-sync master is being switched off while there are active un-acked transactions");
7+
8+
connection master;
9+
create table t1(a int) engine=innodb;
10+
sync_slave_with_master;
11+
12+
# Disable ack on slave
13+
connection slave;
14+
source include/stop_slave.inc;
15+
SET GLOBAL DEBUG='+d,dont_send_semi_sync_reply';
16+
source include/start_slave.inc;
17+
18+
##
19+
## Ack timeout when there are active un-acked trxs should shutdown the master
20+
##
21+
connection master;
22+
enable_reconnect;
23+
exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
24+
# The master will die while executing this stmt because slave will not ack it
25+
error 2013;
26+
insert into t1 values(1);
27+
28+
# Wait for master to die
29+
source include/wait_until_disconnected.inc;
30+
31+
# Resume normal slave operations i.e start acking
32+
connection slave;
33+
SET GLOBAL DEBUG='-d,dont_send_semi_sync_reply';
34+
35+
# Restart master
36+
echo # Restart the master server;
37+
let $rpl_server_number= 1;
38+
source include/rpl_start_server.inc;
39+
disable_reconnect;
40+
41+
connection master;
42+
sync_slave_with_master;
43+
44+
##
45+
## Turning off semi-sync when there are active un-acked trxs should shutdown the
46+
## master
47+
##
48+
49+
# Disable ack on the slave
50+
connection slave;
51+
source include/stop_slave.inc;
52+
SET GLOBAL DEBUG='+d,dont_send_semi_sync_reply';
53+
source include/start_slave.inc;
54+
55+
connection master;
56+
# Set timeout to a large value so that we have a chance to switch semi-sync off
57+
# manually
58+
set @@global.rpl_semi_sync_master_timeout= 1000000;
59+
send insert into t1 values(2); # this stmt will hang
60+
61+
# Wait till the insert is processed on the slave
62+
connection slave;
63+
let $wait_condition= SELECT count(*) = 1 from t1 where a = 2;
64+
source include/wait_condition.inc;
65+
66+
connection master1;
67+
enable_reconnect;
68+
exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
69+
error 2013;
70+
set @@global.rpl_semi_sync_master_enabled= OFF; # master will shutdown
71+
72+
# Wait for master to die
73+
source include/wait_until_disconnected.inc;
74+
75+
connection master;
76+
error 2013;
77+
reap;
78+
79+
# Resume normal slave operations i.e start acking
80+
connection slave;
81+
SET GLOBAL DEBUG='-d,dont_send_semi_sync_reply';
82+
83+
# Restart master
84+
echo # Restart the master server;
85+
let $rpl_server_number= 1;
86+
source include/rpl_start_server.inc;
87+
disable_reconnect;
88+
89+
connection master;
90+
sync_slave_with_master;
91+
92+
# Will contain 2 rows because master recovery rolls trxs forward
93+
echo "Table at the master";
94+
connection master;
95+
select * from t1;
96+
97+
# Will contain 2 rows because slaves received all trxs
98+
echo "Table at the slave";
99+
connection slave;
100+
select * from t1;
101+
102+
##
103+
## Turning off semi-sync when there are no active un-acked trxs should succeed
104+
##
105+
connection master;
106+
set @@global.rpl_semi_sync_master_enabled= OFF; # should not fail
107+
set @@global.rpl_semi_sync_master_enabled= ON;
108+
109+
# Cleanup
110+
connection master;
111+
drop table t1;
112+
sync_slave_with_master;
113+
114+
115+
source include/rpl_end.inc;
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
set @save.rpl_semi_sync_master_crash_if_active_trxs= @@global.rpl_semi_sync_master_crash_if_active_trxs;
2+
select @@session.rpl_semi_sync_master_crash_if_active_trxs;
3+
ERROR HY000: Variable 'rpl_semi_sync_master_crash_if_active_trxs' is a GLOBAL variable
4+
select variable_name from information_schema.global_variables where variable_name='$var';
5+
variable_name
6+
select variable_name from information_schema.session_variables where variable_name='$var';
7+
variable_name
8+
set @@global.rpl_semi_sync_master_crash_if_active_trxs= false;
9+
select @@global.rpl_semi_sync_master_crash_if_active_trxs;
10+
@@global.rpl_semi_sync_master_crash_if_active_trxs
11+
0
12+
set @@global.rpl_semi_sync_master_crash_if_active_trxs= 1.1;
13+
ERROR 42000: Incorrect argument type to variable 'rpl_semi_sync_master_crash_if_active_trxs'
14+
set @@global.rpl_semi_sync_master_crash_if_active_trxs= "foo";
15+
ERROR 42000: Variable 'rpl_semi_sync_master_crash_if_active_trxs' can't be set to the value of 'foo'
16+
set @@global.rpl_semi_sync_master_crash_if_active_trxs= false;
17+
set @@global.rpl_semi_sync_master_crash_if_active_trxs= true;
18+
select @@global.rpl_semi_sync_master_crash_if_active_trxs as "truncated to the maximum";
19+
truncated to the maximum
20+
1
21+
set @@global.rpl_semi_sync_master_crash_if_active_trxs= @save.rpl_semi_sync_master_crash_if_active_trxs;
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
--source include/not_embedded.inc
2+
3+
let $var= rpl_semi_sync_master_crash_if_active_trxs;
4+
eval set @save.$var= @@global.$var;
5+
6+
#
7+
# exists as global only
8+
#
9+
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
10+
eval select @@session.$var;
11+
12+
select variable_name from information_schema.global_variables where variable_name='$var';
13+
select variable_name from information_schema.session_variables where variable_name='$var';
14+
15+
#
16+
# show that it's writable
17+
#
18+
let $value= false;
19+
eval set @@global.$var= $value;
20+
eval select @@global.$var;
21+
22+
#
23+
# incorrect value
24+
#
25+
--error ER_WRONG_TYPE_FOR_VAR
26+
eval set @@global.$var= 1.1;
27+
--error ER_WRONG_VALUE_FOR_VAR
28+
eval set @@global.$var= "foo";
29+
30+
#
31+
# min/max values
32+
#
33+
eval set @@global.$var= false;
34+
eval set @@global.$var= true;
35+
eval select @@global.$var as "truncated to the maximum";
36+
37+
# cleanup
38+
eval set @@global.$var= @save.$var;

plugin/semisync/semisync_master.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
/* This indicates whether semi-synchronous replication is enabled. */
2929
char rpl_semi_sync_master_enabled;
3030
unsigned long rpl_semi_sync_master_timeout;
31+
char rpl_semi_sync_master_crash_if_active_trxs;
3132
unsigned long rpl_semi_sync_master_trace_level;
3233
char rpl_semi_sync_master_status = 0;
3334
unsigned long rpl_semi_sync_master_yes_transactions = 0;
@@ -908,6 +909,13 @@ int ReplSemiSyncMaster::switch_off()
908909
{
909910
const char *kWho = "ReplSemiSyncMaster::switch_off";
910911

912+
if (rpl_semi_sync_master_crash_if_active_trxs && !active_tranxs_->is_empty())
913+
{
914+
sql_print_error("Force shutdown: Semi-sync master is being switched off "
915+
"while there are active un-acked transactions");
916+
exit(0);
917+
}
918+
911919
function_enter(kWho);
912920
state_ = false;
913921

plugin/semisync/semisync_master.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,7 @@ extern char rpl_semi_sync_master_enabled;
645645
extern char rpl_semi_sync_master_status;
646646
extern unsigned long rpl_semi_sync_master_clients;
647647
extern unsigned long rpl_semi_sync_master_timeout;
648+
extern char rpl_semi_sync_master_crash_if_active_trxs;
648649
extern unsigned long rpl_semi_sync_master_trace_level;
649650
extern unsigned long rpl_semi_sync_master_yes_transactions;
650651
extern unsigned long rpl_semi_sync_master_no_transactions;

plugin/semisync/semisync_master_plugin.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,13 @@ static MYSQL_SYSVAR_ULONG(timeout, rpl_semi_sync_master_timeout,
254254
fix_rpl_semi_sync_master_timeout, // update
255255
10000, 0, ~0UL, 1);
256256

257+
static MYSQL_SYSVAR_BOOL(crash_if_active_trxs,
258+
rpl_semi_sync_master_crash_if_active_trxs,
259+
PLUGIN_VAR_OPCMDARG,
260+
"Crash if there is an attempt to switch off semi-sync master while there are "
261+
"active un-acked transactions",
262+
NULL, NULL, 0);
263+
257264
static MYSQL_SYSVAR_BOOL(wait_no_slave, rpl_semi_sync_master_wait_no_slave,
258265
PLUGIN_VAR_OPCMDARG,
259266
"Wait until timeout when no semi-synchronous replication slave available (enabled by default). ",
@@ -277,6 +284,7 @@ static MYSQL_SYSVAR_STR(histogram_trx_wait_step_size,
277284
static SYS_VAR* semi_sync_master_system_vars[]= {
278285
MYSQL_SYSVAR(enabled),
279286
MYSQL_SYSVAR(timeout),
287+
MYSQL_SYSVAR(crash_if_active_trxs),
280288
MYSQL_SYSVAR(wait_no_slave),
281289
MYSQL_SYSVAR(trace_level),
282290
MYSQL_SYSVAR(histogram_trx_wait_step_size),

plugin/semisync/semisync_slave_plugin.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
#include "semisync_slave.h"
2020
#include <mysql.h>
21+
#include <debug_sync.h>
2122

2223
static ReplSemiSyncSlave repl_semisync;
2324

@@ -64,6 +65,7 @@ int repl_semi_slave_queue_event(Binlog_relay_IO_param *param,
6465
{
6566
if (rpl_semi_sync_slave_status && semi_sync_need_reply)
6667
{
68+
DBUG_EXECUTE_IF("dont_send_semi_sync_reply", { return 0; });
6769
/*
6870
We deliberately ignore the error in slaveReply, such error
6971
should not cause the slave IO thread to stop, and the error

0 commit comments

Comments
 (0)