Skip to content

Commit

Permalink
Fixing a bug in stop slave
Browse files Browse the repository at this point in the history
Summary:
Last start event time was not reset properly leading to partial
trx that cannot rollback to giveup before the 1 minute time limit. Also
rollback decision should only be taken when the slave is aborting
normally (not when the thread/process is killed).

Squash with: D5130797

Reviewed By: tianx

Differential Revision: D5195196

fbshipit-source-id: 3b43634
  • Loading branch information
abhinav04sharma authored and facebook-github-bot committed Jun 6, 2017
1 parent 528673c commit 0623a07
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 38 deletions.
44 changes: 18 additions & 26 deletions mysql-test/suite/rpl/r/rpl_stop_slave_partial_trx.result
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,21 @@ create database d1;
create database d2;
create database d3;
create table d1.t1 (a int) engine=innodb;
insert into d1.t1 values(1);
create table d2.t2 (a int) engine=myisam;
create table d3.t3 (a int) engine=innodb;
lock tables d1.t1 read;
insert into d1.t1 values(1);
insert into d1.t1 values(1);
insert into d1.t1 values(1);
insert into d1.t1 values(1);
insert into d1.t1 values(1);
set @@global.debug= '+d,after_executed_write_rows_event';
update d1.t1 set a= 5;
update d1.t1 set a= 4;
update d1.t1 set a= 3;
update d1.t1 set a= 2;
update d1.t1 set a= 1;
set global debug= '+d,dump_thread_wait_after_send_write_rows';
insert into d2.t2 values(1);
unlock tables;
SET DEBUG_SYNC= 'now WAIT_FOR executed';
set @@global.debug= '-d,after_executed_write_rows_event';
set @start=now();
stop slave;
select timestampdiff(SECOND, @start, now()) >= 60;
Expand All @@ -33,10 +37,6 @@ connection master
select * from d1.t1;
a
1
1
1
1
1
select * from d2.t2;
a
1
Expand All @@ -47,27 +47,27 @@ connection slave
select * from d1.t1;
a
1
1
1
1
1
select * from d2.t2;
a
1
select * from d3.t3;
a
delete from d1.t1;
insert into d1.t1 values(1);
delete from d2.t2;
delete from d3.t3;
lock tables d1.t1 read;
insert into d1.t1 values(1);
insert into d1.t1 values(1);
insert into d1.t1 values(1);
insert into d1.t1 values(1);
insert into d1.t1 values(1);
set @@global.debug= '+d,after_executed_write_rows_event';
update d1.t1 set a= 5;
update d1.t1 set a= 4;
update d1.t1 set a= 3;
update d1.t1 set a= 2;
update d1.t1 set a= 1;
set global debug= '+d,dump_thread_wait_after_send_write_rows';
insert into d3.t3 values(1);
unlock tables;
SET DEBUG_SYNC= 'now WAIT_FOR executed';
set @@global.debug= '-d,after_executed_write_rows_event';
set @start=now();
stop slave;
select timestampdiff(SECOND, @start, now()) < 60;
Expand All @@ -82,10 +82,6 @@ connection master
select * from d1.t1;
a
1
1
1
1
1
select * from d2.t2;
a
select * from d3.t3;
Expand All @@ -96,10 +92,6 @@ connection slave
select * from d1.t1;
a
1
1
1
1
1
select * from d2.t2;
a
select * from d3.t3;
Expand Down
24 changes: 16 additions & 8 deletions mysql-test/suite/rpl/t/rpl_stop_slave_partial_trx.test
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ create database d1;
create database d2;
create database d3;
create table d1.t1 (a int) engine=innodb;
insert into d1.t1 values(1); # add some data for update statements
create table d2.t2 (a int) engine=myisam; # non-transactional engine
create table d3.t3 (a int) engine=innodb; # transactional engine
sync_slave_with_master;
Expand All @@ -24,14 +25,15 @@ sync_slave_with_master;
# Block all d1.t1 transactions on the slave
connection slave;
lock tables d1.t1 read;
set @@global.debug= '+d,after_executed_write_rows_event';

# Generate some load, all of these will be blocked in the slave worker queue
connection master;
let $num_inserts=5;
while ($num_inserts)
let $num_updates=5;
while ($num_updates)
{
insert into d1.t1 values(1);
dec $num_inserts;
eval update d1.t1 set a= $num_updates;
dec $num_updates;
}

# This will stop the dump thread before sending the entire group
Expand All @@ -47,6 +49,8 @@ connection slave;
unlock tables;

# This should take at least a minute because the trx on t2 is not completely downloaded and cannot be rollbacked safely
SET DEBUG_SYNC= 'now WAIT_FOR executed';
set @@global.debug= '-d,after_executed_write_rows_event';
set @start=now();
stop slave;
select timestampdiff(SECOND, @start, now()) >= 60;
Expand Down Expand Up @@ -79,6 +83,7 @@ select * from d3.t3;
# cleanup
connection master;
delete from d1.t1;
insert into d1.t1 values(1); # add some data for update statements
delete from d2.t2;
delete from d3.t3;
sync_slave_with_master;
Expand All @@ -92,14 +97,15 @@ sync_slave_with_master;
# Block all d1.t1 transactions on the slave
connection slave;
lock tables d1.t1 read;
set @@global.debug= '+d,after_executed_write_rows_event';

# Generate some load, all of these will be blocked in the slave worker queue
connection master;
let $num_inserts=5;
while ($num_inserts)
let $num_updates=5;
while ($num_updates)
{
insert into d1.t1 values(1);
dec $num_inserts;
eval update d1.t1 set a= $num_updates;
dec $num_updates;
}

# This will stop the dump thread before sending the entire group
Expand All @@ -116,6 +122,8 @@ unlock tables;

# Since the partial transaction is on a transactional table the slave should
# stop as soon as is completes all pending full transactions
SET DEBUG_SYNC= 'now WAIT_FOR executed';
set @@global.debug= '-d,after_executed_write_rows_event';
set @start=now();
stop slave;
select timestampdiff(SECOND, @start, now()) < 60;
Expand Down
11 changes: 11 additions & 0 deletions sql/rpl_rli_pdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2565,6 +2565,17 @@ int slave_worker_exec_job(Slave_worker *worker, Relay_log_info *rli)
if (!skip_event || ev->get_type_code() == TABLE_MAP_EVENT)
error= ev->do_apply_event_worker(worker);

DBUG_EXECUTE_IF("after_executed_write_rows_event",
{
if (ev->get_type_code() == WRITE_ROWS_EVENT)
{
const char act[]= "now signal executed";
DBUG_ASSERT(opt_debug_sync_timeout > 0);
DBUG_ASSERT(!debug_sync_set_action(thd,
STRING_WITH_LEN(act)));
}
};);

if (is_gtid_event(ev))
{
reset_dynamic(&worker->worker_gtid_infos);
Expand Down
12 changes: 8 additions & 4 deletions sql/rpl_slave.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1840,9 +1840,10 @@ static bool sql_slave_killed(THD* thd, Relay_log_info* rli)
rli->sql_thread_kill_accepted= true;
/* NOTE: In MTS mode if all workers are done and if the partial trx
(if any) can be rollbacked safely we can accept the kill */
bool can_rollback= !rli->is_mts_in_group() ||
(rli->mts_workers_queue_empty() &&
!rli->cannot_safely_rollback());
bool can_rollback= rli->abort_slave &&
(!rli->is_mts_in_group() ||
(rli->mts_workers_queue_empty() &&
!rli->cannot_safely_rollback()));
is_parallel_warn= (rli->is_parallel_exec() &&
(!can_rollback || thd->killed));
/*
Expand Down Expand Up @@ -1915,7 +1916,6 @@ static bool sql_slave_killed(THD* thd, Relay_log_info* rli)
}
if (rli->sql_thread_kill_accepted)
{
rli->last_event_start_time= 0;
if (rli->mts_group_status == Relay_log_info::MTS_IN_GROUP)
{
rli->mts_group_status= Relay_log_info::MTS_KILLED_GROUP;
Expand All @@ -1932,6 +1932,10 @@ static bool sql_slave_killed(THD* thd, Relay_log_info* rli)
}
}
}

if (rli->sql_thread_kill_accepted)
rli->last_event_start_time= 0;

DBUG_RETURN(rli->sql_thread_kill_accepted);
}

Expand Down

0 comments on commit 0623a07

Please sign in to comment.