Skip to content

Commit

Permalink
[raft] recover raft logs by removing partial trxs
Browse files Browse the repository at this point in the history
Summary:
Port D24628821

mysqld removes partial trxs in the tail of trx log (named binary-logs on
primaries and apply-logs on secondaries) during startup. However, relay logs
were not of much importance since it was anyways discarded and a new one would
be created.
However, with raft, this is not ideal. Relay logs are raft logs on secondaries
and have to be kept around (and kept sane and consistent). This diff adds the
ability to remove partial trxs from raft/relay logs.
Much of the code to open the last relay log (based on relay log index) and
identify partial trxs is borrowed from existing logic in
MYSQL_BIN_LOG::open_binlog() and binlog_recover()

Reviewed By: Pushapgl

Differential Revision: D26447448

---------------------------------------------------------------------------------

Fix load_mi_and_rli_from_repositories for raft

Summary:
The load_mi_and_rli_from_repositories() function was updated in 8.0.28,
but the raft call was still using the old definition and triggered a
deadlock. Fix the call.

Reviewed By: yichenshen

Differential Revision: D38872610

---------------------------------------------------------------------------------

always release data_lock mutex to avoid deadlock

Summary: in stage-1 replicaset, when kill a secondary instance, sometime the instance will run into deadlock due to process_raft_queue thread forgot to release its acquired mutex in raft_change_master

Reviewed By: Pushapgl, bhatvinay

Differential Revision: D27602667
  • Loading branch information
luqun authored and inikep committed May 8, 2024
1 parent 173357c commit 8ebcac1
Show file tree
Hide file tree
Showing 12 changed files with 387 additions and 2 deletions.
4 changes: 4 additions & 0 deletions mysql-test/r/mysqld--help-notwin.result
Original file line number Diff line number Diff line change
Expand Up @@ -1416,6 +1416,9 @@ The following options may be given as the first argument:
--read-rnd-buffer-size=#
When reading rows in sorted order after a sort, the rows
are read through this buffer to avoid a disk seeks
--recover-raft-log Temprary variable to control recovery of raft log by
removing partial trxs. This should be removed later.
(Defaults to on; use --skip-recover-raft-log to disable.)
--regexp-stack-limit=#
Stack size limit for regular expressions matches
--regexp-time-limit=#
Expand Down Expand Up @@ -3144,6 +3147,7 @@ read-only FALSE
read-only-error-msg-extra
read-only-slave TRUE
read-rnd-buffer-size 262144
recover-raft-log TRUE
regexp-stack-limit 8000000
regexp-time-limit 32
relay-log relaylog
Expand Down
51 changes: 51 additions & 0 deletions mysql-test/suite/rpl_raft/r/rpl_raft_recover_raft_log.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
include/raft_3_node.inc
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
[connection master]
call mtr.add_suppression(".*");
include/rpl_connect.inc [creating server_4]
include/rpl_connect.inc [creating server_5]
create table t1 (a int primary key) engine = innodb;
insert into t1 values(1);
insert into t1 values(2);
include/sync_slave_sql_with_master.inc
include/sync_slave_sql_with_master.inc
select * from t1;
a
1
2
select * from t1;
a
1
2
select sleep(10);
sleep(10)
0
"raft file: binary-logs-13001.000002"
"raft file pos: 1471"
"Restarting server_2"
include/rpl_restart_server.inc [server_number=2]
start slave sql_thread;
Warnings:
Note 3083 Replication thread(s) for channel '' are already runnning.
insert into t1 values(3);
include/sync_slave_sql_with_master.inc
include/sync_slave_sql_with_master.inc
select * from t1;
a
1
2
3
select * from t1;
a
1
2
3
drop table t1;
include/sync_slave_sql_with_master.inc
include/sync_slave_sql_with_master.inc
include/rpl_end.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--initialize --innodb_page_size=16k
82 changes: 82 additions & 0 deletions mysql-test/suite/rpl_raft/t/rpl_raft_recover_raft_log.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
source ../include/raft_3_node.inc;

call mtr.add_suppression(".*");

# Create connections to server 4 and 5 (these are not in the ring)
let $rpl_server_number= 4;
let $rpl_connection_name= server_4;
source include/rpl_connect.inc;
--disable_query_log
connection server_4;
call mtr.add_suppression(".*using --replicate-same-server-id in conjunction with --log-slave-updates.*");
--enable_query_log

let $rpl_server_number= 5;
let $rpl_connection_name= server_5;
source include/rpl_connect.inc;
--disable_query_log
connection server_5;
call mtr.add_suppression(".*using --replicate-same-server-id in conjunction with --log-slave-updates.*");
--enable_query_log

connection server_1;
create table t1 (a int primary key) engine = innodb;
insert into t1 values(1);
insert into t1 values(2);

let $sync_slave_connection= server_2;
source include/sync_slave_sql_with_master.inc;
let $sync_slave_connection= server_3;
source include/sync_slave_sql_with_master.inc;

connection server_2;
select * from t1;

connection server_3;
select * from t1;

connection server_2;
select sleep(10);

# Expand this test later to cover multiple scenarios
let $server2_datadir = `select @@datadir`;
let $server2_raft_file = query_get_value("SHOW SLAVE STATUS", "Relay_Log_File", 1);
let $server2_raft_file_pos = query_get_value("SHOW SLAVE STATUS", "Relay_Log_Pos", 1);
echo "raft file: $server2_raft_file";
echo "raft file pos: $server2_raft_file_pos";
let $half = `select ROUND($server2_raft_file_pos / 2)`;
exec truncate -s $half $server2_datadir/$server2_raft_file;

echo "Restarting server_2";
let $rpl_server_number = 2;
source include/rpl_restart_server.inc;

connection server_2;
# wait for raft plugin initialzed
sleep 10;
start slave sql_thread;

connection server_1;
insert into t1 values(3);

let $sync_slave_connection= server_2;
source include/sync_slave_sql_with_master.inc;
let $sync_slave_connection= server_3;
source include/sync_slave_sql_with_master.inc;

connection server_2;
select * from t1;

connection server_3;
select * from t1;

# cleanup
connection server_1;
drop table t1;

let $sync_slave_connection= server_2;
source include/sync_slave_sql_with_master.inc;
let $sync_slave_connection= server_3;
source include/sync_slave_sql_with_master.inc;

source include/rpl_end.inc;
42 changes: 42 additions & 0 deletions mysql-test/suite/sys_vars/r/recover_raft_log_basic.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
SET @start_recover_raft_log = @@global.recover_raft_log;
SELECT @start_recover_raft_log;
@start_recover_raft_log
1
SET @@global.recover_raft_log = DEFAULT;
SELECT @@global.recover_raft_log;
@@global.recover_raft_log
1
SET @@global.recover_raft_log = false;
SELECT @@global.recover_raft_log;
@@global.recover_raft_log
0
SET @@global.recover_raft_log = true;
SELECT @@global.recover_raft_log;
@@global.recover_raft_log
1
SET @@global.recover_raft_log = 1;
SELECT @@global.recover_raft_log;
@@global.recover_raft_log
1
SET @@global.recover_raft_log = 0;
SELECT @@global.recover_raft_log;
@@global.recover_raft_log
0
SET @@global.recover_raft_log = -1;
ERROR 42000: Variable 'recover_raft_log' can't be set to the value of '-1'
SELECT @@global.recover_raft_log;
@@global.recover_raft_log
0
SET @@global.recover_raft_log = 100;
ERROR 42000: Variable 'recover_raft_log' can't be set to the value of '100'
SELECT @@global.recover_raft_log;
@@global.recover_raft_log
0
SET @@session.recover_raft_log = 10;
ERROR HY000: Variable 'recover_raft_log' is a GLOBAL variable and should be set with SET GLOBAL
SELECT @@session.recover_raft_log;
ERROR HY000: Variable 'recover_raft_log' is a GLOBAL variable
SET @@global.recover_raft_log = @start_recover_raft_log;
SELECT @@global.recover_raft_log;
@@global.recover_raft_log
1
32 changes: 32 additions & 0 deletions mysql-test/suite/sys_vars/t/recover_raft_log_basic.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
SET @start_recover_raft_log = @@global.recover_raft_log;
SELECT @start_recover_raft_log;

SET @@global.recover_raft_log = DEFAULT;
SELECT @@global.recover_raft_log;

SET @@global.recover_raft_log = false;
SELECT @@global.recover_raft_log;

SET @@global.recover_raft_log = true;
SELECT @@global.recover_raft_log;

SET @@global.recover_raft_log = 1;
SELECT @@global.recover_raft_log;

SET @@global.recover_raft_log = 0;
SELECT @@global.recover_raft_log;

--Error ER_WRONG_VALUE_FOR_VAR
SET @@global.recover_raft_log = -1;
SELECT @@global.recover_raft_log;
--Error ER_WRONG_VALUE_FOR_VAR
SET @@global.recover_raft_log = 100;
SELECT @@global.recover_raft_log;

--ERROR ER_GLOBAL_VARIABLE
SET @@session.recover_raft_log = 10;
--ERROR ER_INCORRECT_GLOBAL_LOCAL_VAR
SELECT @@session.recover_raft_log;

SET @@global.recover_raft_log = @start_recover_raft_log;
SELECT @@global.recover_raft_log;
135 changes: 135 additions & 0 deletions sql/binlog.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12115,6 +12115,141 @@ void MYSQL_BIN_LOG::signal_update() {
return;
}

/**
* Recover raft log. This is primarily for relay logs in the raft world since
* trx logs (binary logs or apply logs) are already recovered by mysqld as part
* of trx log recovery. This method tries to get rid of partial trxs in the tal
* of the raft log. Much has been borrowed from
* MYSQL_BIN_LOG::open_binlog(const char *opt_name) and
* binlog_recover(). Refactoring the components is rather hard and
* adds unnecessary complexity with additional params and if() {} else {}
* branches. Hence a separate method.
*/
int MYSQL_BIN_LOG::raft_log_recover() {
int error = 0;
Log_event *ev = 0;
char log_name[FN_REFLEN];
my_off_t valid_pos = 0;
my_off_t binlog_size = 0;
LOG_INFO log_info;
bool pending_gtid = false;
std::string error_message;
int status = 0;
bool in_transaction = false;
if (!mysql_bin_log.is_apply_log)
goto err; // raft log already recovered as part of trx log recovery

if (!my_b_inited(&index_file)) {
error_message = "Index file is not inited in recover_raft_log";
error = 1;
goto err;
}

if ((status =
find_log_pos(&log_info, NullS, true /*need_lock_index=true*/))) {
if (status != LOG_INFO_EOF) {
error_message = "find_log_pos() failed in recover_raft_log with error: " +
std::to_string(error);
error = 1;
}
goto err;
}

do {
strmake(log_name, log_info.log_file_name, sizeof(log_name) - 1);
} while (!(status = find_next_log(&log_info, true /*need_lock_index=true*/)));

if (status != LOG_INFO_EOF) {
error_message = "find_log_pos() failed in recover_raft_log with error: " +
std::to_string(error);
error = 1;
goto err;
}

{
Binlog_file_reader binlog_file_reader(opt_source_verify_checksum);
if (binlog_file_reader.open(log_name)) {
error = 1;
error_message = "open_binlog_file() failed in recover_raft_log with ";
goto err;
}
binlog_size = binlog_file_reader.ifile()->length();
// This logic is borrowed from binlog_recover() which has to do
// additional things and refactoring it will simply add more branches. Hence
// the code duplication
while ((ev = binlog_file_reader.read_event_object())) {
if (ev->get_type_code() == binary_log::QUERY_EVENT &&
!strcmp(static_cast<Query_log_event *>(ev)->query, "BEGIN")) {
in_transaction = true;
} else if (ev->get_type_code() == binary_log::QUERY_EVENT &&
!strcmp(static_cast<Query_log_event *>(ev)->query, "COMMIT")) {
assert(in_transaction == true);
in_transaction = false;
} else if (is_gtid_event(ev)) {
pending_gtid = true;
} else if (ev->get_type_code() == binary_log::XID_EVENT ||
(ev->get_type_code() == binary_log::QUERY_EVENT &&
!strcmp(static_cast<Query_log_event *>(ev)->query,
"COMMIT"))) {
if (!in_transaction) {
// When we see a commit message, we should already be parsing a valid
// transaction
error_message =
"Saw a XID/COMMIT event without a begin. Corrupted log: " +
std::string(log_name);
error = 1;
delete ev;
break;
}
in_transaction = false;
}
if (!(ev->get_type_code() == binary_log::METADATA_EVENT &&
pending_gtid)) {
if (!in_transaction && !is_gtid_event(ev)) {
valid_pos = binlog_file_reader.position();
pending_gtid = false;
}
}

delete ev;
}
}

// No partial trxs found in the raft log or error parsing the log
if (error || (valid_pos == 0 || valid_pos >= binlog_size)) goto err;

// NO_LINT_DEBUG
sql_print_information(
"Raft log %s with a size of %llu will be trimmed to "
"%llu bytes based on valid transactions in the file",
log_name, binlog_size, valid_pos);

{
std::unique_ptr<Binlog_ofile> ofile(
Binlog_ofile::open_existing(key_file_binlog, log_name, MYF(MY_WME)));
if (!ofile) {
error_message =
"Failed to remove partial transactions from raft log file ";
error = 1;
goto err;
}
if (ofile->truncate(valid_pos)) {
error_message =
"Failed to remove partial transactions from raft log file " +
std::string(log_name);
error = 1;
goto err;
}
}

err:
if (error && !error_message.empty())
// NO_LINT_DEBUG
sql_print_error("%s", error_message.c_str());

return error;
}

void MYSQL_BIN_LOG::update_binlog_end_pos(bool need_lock) {
if (need_lock)
lock_binlog_end_pos();
Expand Down
2 changes: 2 additions & 0 deletions sql/binlog.h
Original file line number Diff line number Diff line change
Expand Up @@ -1169,6 +1169,8 @@ class MYSQL_BIN_LOG : public TC_LOG {
*/
int wait_for_update();

int raft_log_recover();

public:
/** register binlog/relay (its IO_CACHE) and mutexes to plugin.
Sharing the pointers with the plugin enables the plugin to
Expand Down
1 change: 1 addition & 0 deletions sql/mysqld.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1255,6 +1255,7 @@ ulonglong apply_log_retention_num = 0;
ulonglong apply_log_retention_duration = 0;
bool disable_raft_log_repointing = 0;
ulong opt_raft_signal_async_dump_threads = 0;
bool recover_raft_log = false;

// Apply log related variables for raft
char *opt_apply_logname = nullptr;
Expand Down
1 change: 1 addition & 0 deletions sql/mysqld.h
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ extern bool disallow_raft;
extern bool override_enable_raft_check;
extern ulonglong apply_log_retention_num;
extern ulonglong apply_log_retention_duration;
extern bool recover_raft_log;
/* Apply log related variables for raft */
extern char *opt_apply_logname;
extern char *opt_applylog_index_name;
Expand Down
Loading

0 comments on commit 8ebcac1

Please sign in to comment.