Skip to content

Commit

Permalink
ensure that promotion is a sync point on new leader
Browse files Browse the repository at this point in the history
Summary:
Newly elected raft leader makes sure that all trxs from the previous
leader is committed by sql appliers. It then switches the server's trx
logs from apply-log-* to binary-log-*. To other part of the system this
looks like a rotation, but the necessary sync calls are not made here.
So, if the server (or os) restarts, then the storage engine could lose
the commit markers of the last batch of trxs. This will result in silent
data drift.

This diff fixes the problem by making an explicit call to
ha_flush_logs() before switching the server's trx logs

Reviewed By: anirbanr-fb

Differential Revision: D27582002

fbshipit-source-id: dc932ed247c
  • Loading branch information
bhatvinay authored and facebook-github-bot committed Apr 6, 2021
1 parent f12eea3 commit 39c70ca
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 0 deletions.
72 changes: 72 additions & 0 deletions mysql-test/suite/rpl_raft/r/rpl_raft_leader_election_crash.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
include/raft_3_node.inc
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
[connection master]
"server_1 is the initial leader"
select variable_value from information_schema.global_status where variable_name = 'Rpl_raft_role';
variable_value
LEADER
"Stopping slave applier on all peers"
stop slave sql_thread;
Warnings:
Note 1255 Slave already has been stopped
stop slave sql_thread;
stop slave sql_thread;
"Writing data on leader"
create table t1 (a int primary key auto_increment) engine = innodb;
insert into t1 values();
insert into t1 values();
insert into t1 values();
select * from t1;
a
1
2
3
"Setting debug symbol so that server_2 crashed on becoming a leader after switching logs"
set global debug="+d,crash_after_point_binlog_to_binlog";
"Stopping sql appliers on server_2"
stop slave sql_thread
insert into t1 values();
insert into t1 values();
insert into t1 values();
select * from t1;
a
1
2
3
4
5
6
"Transfering leadership: server_1 -> server_2"
set @@global.rpl_raft_new_leader_uuid = 'uuid2';
select sleep(1);
sleep(1)
0
"Restarting server_2"
include/rpl_start_server.inc [server_number=2]
"Checking table values in server_2"
connection server_2
select * from t1;
a
1
2
3
4
5
6
select sleep(20);
sleep(20)
0
"Make server_1 the leader"
connection server_1
sleep(10)
0
Warnings:
Note 1254 Slave is already running
include/sync_slave_sql_with_master.inc
include/sync_slave_sql_with_master.inc
include/rpl_end.inc
80 changes: 80 additions & 0 deletions mysql-test/suite/rpl_raft/t/rpl_raft_leader_election_crash.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@

source ../include/raft_3_node.inc;

connection server_1;
let $uuid1= `select variable_value from information_schema.global_status where variable_name = 'Rpl_raft_peer_uuid'`;

connection server_2;
let $uuid2= `select variable_value from information_schema.global_status where variable_name = 'Rpl_raft_peer_uuid'`;

connection server_3;
let $uuid3= `select variable_value from information_schema.global_status where variable_name = 'Rpl_raft_peer_uuid'`;

echo "server_1 is the initial leader";
connection server_1;
select variable_value from information_schema.global_status where variable_name = 'Rpl_raft_role';

echo "Stopping slave applier on all peers";
connection server_1;
stop slave sql_thread;
connection server_2;
stop slave sql_thread;
connection server_3;
stop slave sql_thread;

echo "Writing data on leader";
connection server_1;
create table t1 (a int primary key auto_increment) engine = innodb;
insert into t1 values();
insert into t1 values();
insert into t1 values();
select * from t1;

echo "Setting debug symbol so that server_2 crashed on becoming a leader after switching logs";
connection server_2;
set global debug="+d,crash_after_point_binlog_to_binlog";

echo "Stopping sql appliers on server_2"
stop slave sql_thread;

connection server_1;
insert into t1 values();
insert into t1 values();
insert into t1 values();
select * from t1;
echo "Transfering leadership: server_1 -> server_2";
--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
replace_result $uuid2 uuid2;
eval set @@global.rpl_raft_new_leader_uuid = '$uuid2';

select sleep(1);
echo "Restarting server_2";
let $rpl_server_number = 2;
--source include/rpl_start_server.inc
echo "Checking table values in server_2"
connection server_2;
select * from t1;

select sleep(20);

# Cleanup
--disable_query_log

echo "Make server_1 the leader"
connection server_1;
set global rpl_raft_start_election = 1;
select sleep(10);
drop table t1;

connection server_2;
start slave sql_thread;

connection server_3;
start slave sql_thread;

let $sync_slave_connection= server_2;
source include/sync_slave_sql_with_master.inc;
let $sync_slave_connection= server_3;
source include/sync_slave_sql_with_master.inc;

source include/rpl_end.inc;
8 changes: 8 additions & 0 deletions sql/binlog.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8066,6 +8066,12 @@ int binlog_change_to_binlog()
int error= 0;
uint64_t prev_hlc= 0;

// Flush logs to ensure that storage engine has flushed and fsynced the last
// batch of transactions. This is important because the act of switching trx
// logs from "apply-logs-*" to "binary-logs-*" looks like a rotation to other
// parts of the system and rotation is always a 'sync' point
ha_flush_logs(NULL);

mysql_mutex_lock(mysql_bin_log.get_log_lock());
dump_log.lock();
mysql_bin_log.lock_index();
Expand Down Expand Up @@ -8185,6 +8191,8 @@ int binlog_change_to_binlog()
dump_log.unlock();
mysql_mutex_unlock(mysql_bin_log.get_log_lock());

DBUG_EXECUTE_IF("crash_after_point_binlog_to_binlog", DBUG_SUICIDE(););

DBUG_RETURN(error);
}

Expand Down

0 comments on commit 39c70ca

Please sign in to comment.