Skip to content

Commit 0623a07

Browse files
Fixing a bug in stop slave
Summary: Last start event time was not reset properly leading to partial trx that cannot rollback to giveup before the 1 minute time limit. Also rollback decision should only be taken when the slave is aborting normally (not when the thread/process is killed). Squash with: D5130797 Reviewed By: tianx Differential Revision: D5195196 fbshipit-source-id: 3b43634
1 parent 528673c commit 0623a07

File tree

4 files changed

+53
-38
lines changed

4 files changed

+53
-38
lines changed

mysql-test/suite/rpl/r/rpl_stop_slave_partial_trx.result

+18-26
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,21 @@ create database d1;
88
create database d2;
99
create database d3;
1010
create table d1.t1 (a int) engine=innodb;
11+
insert into d1.t1 values(1);
1112
create table d2.t2 (a int) engine=myisam;
1213
create table d3.t3 (a int) engine=innodb;
1314
lock tables d1.t1 read;
14-
insert into d1.t1 values(1);
15-
insert into d1.t1 values(1);
16-
insert into d1.t1 values(1);
17-
insert into d1.t1 values(1);
18-
insert into d1.t1 values(1);
15+
set @@global.debug= '+d,after_executed_write_rows_event';
16+
update d1.t1 set a= 5;
17+
update d1.t1 set a= 4;
18+
update d1.t1 set a= 3;
19+
update d1.t1 set a= 2;
20+
update d1.t1 set a= 1;
1921
set global debug= '+d,dump_thread_wait_after_send_write_rows';
2022
insert into d2.t2 values(1);
2123
unlock tables;
24+
SET DEBUG_SYNC= 'now WAIT_FOR executed';
25+
set @@global.debug= '-d,after_executed_write_rows_event';
2226
set @start=now();
2327
stop slave;
2428
select timestampdiff(SECOND, @start, now()) >= 60;
@@ -33,10 +37,6 @@ connection master
3337
select * from d1.t1;
3438
a
3539
1
36-
1
37-
1
38-
1
39-
1
4040
select * from d2.t2;
4141
a
4242
1
@@ -47,27 +47,27 @@ connection slave
4747
select * from d1.t1;
4848
a
4949
1
50-
1
51-
1
52-
1
53-
1
5450
select * from d2.t2;
5551
a
5652
1
5753
select * from d3.t3;
5854
a
5955
delete from d1.t1;
56+
insert into d1.t1 values(1);
6057
delete from d2.t2;
6158
delete from d3.t3;
6259
lock tables d1.t1 read;
63-
insert into d1.t1 values(1);
64-
insert into d1.t1 values(1);
65-
insert into d1.t1 values(1);
66-
insert into d1.t1 values(1);
67-
insert into d1.t1 values(1);
60+
set @@global.debug= '+d,after_executed_write_rows_event';
61+
update d1.t1 set a= 5;
62+
update d1.t1 set a= 4;
63+
update d1.t1 set a= 3;
64+
update d1.t1 set a= 2;
65+
update d1.t1 set a= 1;
6866
set global debug= '+d,dump_thread_wait_after_send_write_rows';
6967
insert into d3.t3 values(1);
7068
unlock tables;
69+
SET DEBUG_SYNC= 'now WAIT_FOR executed';
70+
set @@global.debug= '-d,after_executed_write_rows_event';
7171
set @start=now();
7272
stop slave;
7373
select timestampdiff(SECOND, @start, now()) < 60;
@@ -82,10 +82,6 @@ connection master
8282
select * from d1.t1;
8383
a
8484
1
85-
1
86-
1
87-
1
88-
1
8985
select * from d2.t2;
9086
a
9187
select * from d3.t3;
@@ -96,10 +92,6 @@ connection slave
9692
select * from d1.t1;
9793
a
9894
1
99-
1
100-
1
101-
1
102-
1
10395
select * from d2.t2;
10496
a
10597
select * from d3.t3;

mysql-test/suite/rpl/t/rpl_stop_slave_partial_trx.test

+16-8
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ create database d1;
1111
create database d2;
1212
create database d3;
1313
create table d1.t1 (a int) engine=innodb;
14+
insert into d1.t1 values(1); # add some data for update statements
1415
create table d2.t2 (a int) engine=myisam; # non-transactional engine
1516
create table d3.t3 (a int) engine=innodb; # transactional engine
1617
sync_slave_with_master;
@@ -24,14 +25,15 @@ sync_slave_with_master;
2425
# Block all d1.t1 transactions on the slave
2526
connection slave;
2627
lock tables d1.t1 read;
28+
set @@global.debug= '+d,after_executed_write_rows_event';
2729

2830
# Generate some load, all of these will be blocked in the slave worker queue
2931
connection master;
30-
let $num_inserts=5;
31-
while ($num_inserts)
32+
let $num_updates=5;
33+
while ($num_updates)
3234
{
33-
insert into d1.t1 values(1);
34-
dec $num_inserts;
35+
eval update d1.t1 set a= $num_updates;
36+
dec $num_updates;
3537
}
3638

3739
# This will stop the dump thread before sending the entire group
@@ -47,6 +49,8 @@ connection slave;
4749
unlock tables;
4850

4951
# This should take at least a minute because the trx on t2 is not completely downloaded and cannot be rollbacked safely
52+
SET DEBUG_SYNC= 'now WAIT_FOR executed';
53+
set @@global.debug= '-d,after_executed_write_rows_event';
5054
set @start=now();
5155
stop slave;
5256
select timestampdiff(SECOND, @start, now()) >= 60;
@@ -79,6 +83,7 @@ select * from d3.t3;
7983
# cleanup
8084
connection master;
8185
delete from d1.t1;
86+
insert into d1.t1 values(1); # add some data for update statements
8287
delete from d2.t2;
8388
delete from d3.t3;
8489
sync_slave_with_master;
@@ -92,14 +97,15 @@ sync_slave_with_master;
9297
# Block all d1.t1 transactions on the slave
9398
connection slave;
9499
lock tables d1.t1 read;
100+
set @@global.debug= '+d,after_executed_write_rows_event';
95101

96102
# Generate some load, all of these will be blocked in the slave worker queue
97103
connection master;
98-
let $num_inserts=5;
99-
while ($num_inserts)
104+
let $num_updates=5;
105+
while ($num_updates)
100106
{
101-
insert into d1.t1 values(1);
102-
dec $num_inserts;
107+
eval update d1.t1 set a= $num_updates;
108+
dec $num_updates;
103109
}
104110

105111
# This will stop the dump thread before sending the entire group
@@ -116,6 +122,8 @@ unlock tables;
116122

117123
# Since the partial transaction is on a transactional table the slave should
118124
# stop as soon as is completes all pending full transactions
125+
SET DEBUG_SYNC= 'now WAIT_FOR executed';
126+
set @@global.debug= '-d,after_executed_write_rows_event';
119127
set @start=now();
120128
stop slave;
121129
select timestampdiff(SECOND, @start, now()) < 60;

sql/rpl_rli_pdb.cc

+11
Original file line numberDiff line numberDiff line change
@@ -2565,6 +2565,17 @@ int slave_worker_exec_job(Slave_worker *worker, Relay_log_info *rli)
25652565
if (!skip_event || ev->get_type_code() == TABLE_MAP_EVENT)
25662566
error= ev->do_apply_event_worker(worker);
25672567

2568+
DBUG_EXECUTE_IF("after_executed_write_rows_event",
2569+
{
2570+
if (ev->get_type_code() == WRITE_ROWS_EVENT)
2571+
{
2572+
const char act[]= "now signal executed";
2573+
DBUG_ASSERT(opt_debug_sync_timeout > 0);
2574+
DBUG_ASSERT(!debug_sync_set_action(thd,
2575+
STRING_WITH_LEN(act)));
2576+
}
2577+
};);
2578+
25682579
if (is_gtid_event(ev))
25692580
{
25702581
reset_dynamic(&worker->worker_gtid_infos);

sql/rpl_slave.cc

+8-4
Original file line numberDiff line numberDiff line change
@@ -1840,9 +1840,10 @@ static bool sql_slave_killed(THD* thd, Relay_log_info* rli)
18401840
rli->sql_thread_kill_accepted= true;
18411841
/* NOTE: In MTS mode if all workers are done and if the partial trx
18421842
(if any) can be rollbacked safely we can accept the kill */
1843-
bool can_rollback= !rli->is_mts_in_group() ||
1844-
(rli->mts_workers_queue_empty() &&
1845-
!rli->cannot_safely_rollback());
1843+
bool can_rollback= rli->abort_slave &&
1844+
(!rli->is_mts_in_group() ||
1845+
(rli->mts_workers_queue_empty() &&
1846+
!rli->cannot_safely_rollback()));
18461847
is_parallel_warn= (rli->is_parallel_exec() &&
18471848
(!can_rollback || thd->killed));
18481849
/*
@@ -1915,7 +1916,6 @@ static bool sql_slave_killed(THD* thd, Relay_log_info* rli)
19151916
}
19161917
if (rli->sql_thread_kill_accepted)
19171918
{
1918-
rli->last_event_start_time= 0;
19191919
if (rli->mts_group_status == Relay_log_info::MTS_IN_GROUP)
19201920
{
19211921
rli->mts_group_status= Relay_log_info::MTS_KILLED_GROUP;
@@ -1932,6 +1932,10 @@ static bool sql_slave_killed(THD* thd, Relay_log_info* rli)
19321932
}
19331933
}
19341934
}
1935+
1936+
if (rli->sql_thread_kill_accepted)
1937+
rli->last_event_start_time= 0;
1938+
19351939
DBUG_RETURN(rli->sql_thread_kill_accepted);
19361940
}
19371941

0 commit comments

Comments
 (0)