Skip to content

Commit

Permalink
[raft] Checking for inited bool to make sure global_init_info was suc…
Browse files Browse the repository at this point in the history
…cessful

Summary:
Port D25584004

A master.info and relay.info file can be present
but needs to be properly inited for use. We were bypassing the inited
check which could lead to issues in Raft.
In case there is an error in global_init_info, Raft will do a
raft_reset_slave and make another attempt at it. If both recourses
fail, the init of the plugin would fail.

Reviewed By: Pushapgl

Differential Revision: D26447457

--------------------------------------------------------------------------------

raft: skip mts_recovery_groups during start slave

Summary:
During MySQL8+Raft DMP, some instance fail to switch to Leader or start slave

```
2021-06-24T17:56:38.627423-07:00 431 [Note] [MY-010574] [Repl] Slave: MTS group recovery relay log info group_master_log_name /data/mysql/3127/bls-unittestdb658.frc2-3305-mysql.replicaset.180021/binary-logs-3727.000033, event_master_log_pos 1129.
2021-06-24T17:56:38.627473-07:00 431 [ERROR] [MY-010575] [Repl] Error looking for file after /binlogs/binary-logs-3307.000120.
2021-06-24T17:56:38.627516-07:00 431 [ERROR] [MY-000000] [Repl] load_mi_and_rli_from_repositories: rli_init_info returned error
```

similar to 5.6, we don't need to run mts_recovery_groups due to GTID_MODE is always enabled.

Reviewed By: Pushapgl

Differential Revision: D29520066
  • Loading branch information
luqun authored and inikep committed Jun 28, 2024
1 parent 18b4d5e commit d4b5041
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 3 deletions.
79 changes: 77 additions & 2 deletions sql/rpl_replica.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1453,10 +1453,35 @@ int rli_relay_log_raft_reset(
mysql_mutex_lock(&mi->data_lock);
mysql_mutex_lock(&mi->rli->data_lock);

if (mi->rli->check_info() == REPOSITORY_DOES_NOT_EXIST) {
enum_return_check check_return_mi = mi->check_info();
enum_return_check check_return_rli = mi->rli->check_info();

// If the master.info file does not exist, or if it exists,
// but the inited has never happened (most likely due to an
// error), try mi_init_info
if (check_return_mi == REPOSITORY_DOES_NOT_EXIST || !mi->inited) {
// NO_LINT_DEBUG
sql_print_information(
"rli_relay_log_raft_reset: Master info "
"repository doesn't exist or not inited."
" Calling mi_init_info");
if (mi->mi_init_info()) {
// NO_LINT_DEBUG
sql_print_error(
"rli_relay_log_raft_reset: Failed to initialize "
"the master info structure");
error = 1;
goto end;
}
}

if (check_return_rli == REPOSITORY_DOES_NOT_EXIST) {
// NO_LINT_DEBUG
// NO_LINT_DEBUG
sql_print_information(
"Relay log info repository doesn't exist, creating one now");
"rli_relay_log_raft_reset: Relay log info repository"
" doesn't exist or not inited. Calling"
" load_mi_and_rli_from_repositories ");
// TODO: Check these additional params (skip_received_gtid_set_recovery)
if (load_mi_and_rli_from_repositories(
mi,
Expand Down Expand Up @@ -1606,14 +1631,31 @@ int load_mi_and_rli_from_repositories(Master_info *mi, bool ignore_if_no_info,
*/
check_return = mi->check_info();
if (check_return == ERROR_CHECKING_REPOSITORY) {
if (enable_raft_plugin) {
// NO_LINT_DEBUG
sql_print_error(
"load_mi_and_rli_from_repositories: mi repository "
"check returns ERROR_CHECKING_REPOSITORY");
}
init_error = 1;
goto end;
}

if (!ignore_if_no_info || check_return != REPOSITORY_DOES_NOT_EXIST) {
if ((thread_mask & SLAVE_IO) != 0) {
if (enable_raft_plugin) {
// NO_LINT_DEBUG
sql_print_information(
"load_mi_and_rli_from_repositories: mi_init_info called");
}
if (!mi->inited || force_load) {
if (mi->mi_init_info()) {
if (enable_raft_plugin) {
// NO_LINT_DEBUG
sql_print_error(
"load_mi_and_rli_from_repositories: mi_init_info returned "
"error");
}
init_error = 1;
}
}
Expand All @@ -1622,13 +1664,30 @@ int load_mi_and_rli_from_repositories(Master_info *mi, bool ignore_if_no_info,

check_return = mi->rli->check_info();
if (check_return == ERROR_CHECKING_REPOSITORY) {
if (enable_raft_plugin) {
// NO_LINT_DEBUG
sql_print_error(
"load_mi_and_rli_from_repositories: rli repository check returns"
" ERROR_CHECKING_REPOSITORY");
}
init_error = 1;
goto end;
}
if (!ignore_if_no_info || check_return != REPOSITORY_DOES_NOT_EXIST) {
if ((thread_mask & SLAVE_SQL) != 0 || !(mi->rli->inited)) {
if (enable_raft_plugin) {
// NO_LINT_DEBUG
sql_print_information(
"load_mi_and_rli_from_repositories: rli_init_info called");
}
if (!mi->rli->inited || force_load) {
if (mi->rli->rli_init_info(skip_received_gtid_set_recovery)) {
if (enable_raft_plugin) {
// NO_LINT_DEBUG
sql_print_error(
"load_mi_and_rli_from_repositories: rli_init_info returned "
"error");
}
init_error = 1;
} else {
/*
Expand Down Expand Up @@ -2447,6 +2506,12 @@ bool start_slave_threads(bool need_lock_slave, bool wait_for_start,
if (!mi->inited || !mi->rli->inited) {
int error = (!mi->inited ? ER_REPLICA_CM_INIT_REPOSITORY
: ER_REPLICA_AM_INIT_REPOSITORY);

if (enable_raft_plugin) {
// NO_LINT_DEBUG
sql_print_error("start_slave_threads: error: %d mi_inited: %d", error,
mi->inited);
}
Rpl_info *info = (!mi->inited ? mi : static_cast<Rpl_info *>(mi->rli));
const char *prefix = current_thd ? ER_THD_NONCONST(current_thd, error)
: ER_DEFAULT_NONCONST(error);
Expand Down Expand Up @@ -6753,6 +6818,10 @@ bool mts_recovery_groups(Relay_log_info *rli) {
return false;
}

// raft replication always have GTID_MODE=ON, thus ignore positions
if (enable_raft_plugin) {
return false;
}
/*
Save relay log position to compare with worker's position.
*/
Expand Down Expand Up @@ -9598,6 +9667,12 @@ bool start_slave(THD *thd, LEX_SLAVE_CONNECTION *connection_param,
if (load_mi_and_rli_from_repositories(mi, false, thread_mask)) {
is_error = true;
my_error(ER_CONNECTION_METADATA, MYF(0));

if (enable_raft_plugin) {
// NO_LINT_DEBUG
sql_print_error(
"start_slave: error as load_mi_and_rli_from_repositories failed");
}
} else if (*mi->host || !(thread_mask & SLAVE_IO)) {
/*
If we will start IO thread we need to take care of possible
Expand Down
7 changes: 6 additions & 1 deletion sql/rpl_rli.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1565,7 +1565,12 @@ int Relay_log_info::rli_init_info(bool skip_received_gtid_set_recovery) {
stopped when there were replication initialization errors, now it is
not and so init_info() must be aware of previous failures.
*/
if (error_on_rli_init_info) goto err;
if (error_on_rli_init_info) {
// In raft mode, these error codes are critical. Hence we should
// not chew them.
if (enable_raft_plugin) error = 1;
goto err;
}

if (inited) {
return recovery_parallel_workers ? mts_recovery_groups(this) : 0;
Expand Down

0 comments on commit d4b5041

Please sign in to comment.