Let Master Only Send Timestamped Heartbeat Event when waiting for new trxs

teng-li · facebook-github-bot · commit 746c217bba99 · 2017-08-10T12:26:34.000-07:00
Summary: Let master only send HB timestamp while waiting for new trx, this will fix the SBM being inaccurate when the slave is stopped and trying to catchup from the master. Squash with: D5006919 Reviewed By: abhinav04sharma Differential Revision: D5590444 fbshipit-source-id: 620dd48
diff --git a/mysql-test/suite/rpl/include/rpl_heartbeat_zero_timestamp_input.inc b/mysql-test/suite/rpl/include/rpl_heartbeat_zero_timestamp_input.inc
@@ -0,0 +1,38 @@
+################################################################################
+# The workload generator on the master for rpl_heartbeat_zero_timestamp
+################################################################################
+
+# Create the databases and tables
+
+--disable_query_log
+--disable_result_log
+
+let $i = $databases;
+
+while ($i)
+{
+  eval drop database if exists test$i;
+  eval create database test$i;
+  eval use test$i;
+  eval drop table if exists t$i;
+  eval create table t$i (a int) engine=InnoDB;
+  dec $i;
+}
+
+# Run the queries
+
+while ($iter)
+{
+  let $i=$databases;
+
+  while ($i)
+  {
+    eval use test$i;
+    eval insert into t$i values ($iter);
+    dec $i;
+  }
+  dec $iter;
+}
+
+--enable_result_log
+--enable_query_log
diff --git a/mysql-test/suite/rpl/r/rpl_heartbeat_zero_timestamp.result b/mysql-test/suite/rpl/r/rpl_heartbeat_zero_timestamp.result
@@ -0,0 +1,38 @@
+include/master-slave.inc
+[connection master]
+include/rpl_set_gtid_mode.inc
+include/stop_slave.inc
+set @save.slave_parallel_workers= @@global.slave_parallel_workers;
+SET @@global.slave_parallel_workers= 4;
+CHANGE MASTER TO MASTER_AUTO_POSITION=1;
+include/start_slave.inc
+SET GLOBAL DEBUG="+d, send_zero_hb_event";
+include/sync_slave_sql_with_master.inc
+include/stop_slave.inc
+CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=0.1;
+include/start_slave.inc
+include/assert.inc [Seconds behind master should be bigger than zero after creating the lag for MTS]
+include/sync_slave_sql_with_master.inc
+include/stop_slave.inc
+SET @@global.slave_parallel_workers= 0;
+include/start_slave.inc
+include/assert.inc [Seconds behind master should be bigger than zero after creating the lag for non-MTS]
+use test4;
+drop table if exists t4;
+drop database if exists test4;
+use test3;
+drop table if exists t3;
+drop database if exists test3;
+use test2;
+drop table if exists t2;
+drop database if exists test2;
+use test1;
+drop table if exists t1;
+drop database if exists test1;
+SET GLOBAL DEBUG="-d, send_zero_hb_event";
+include/sync_slave_sql_with_master.inc
+include/stop_slave.inc
+set @@global.slave_parallel_workers= @save.slave_parallel_workers;
+CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=60.000;
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_heartbeat_zero_timestamp.test b/mysql-test/suite/rpl/t/rpl_heartbeat_zero_timestamp.test
@@ -0,0 +1,119 @@
+################################################################################
+# Testing the second behind master is correct with timestamp in HB event
+################################################################################
+
+source include/not_valgrind.inc;
+source include/not_parallel.inc;
+
+# Start a new master-slave
+--disable_warnings
+--source include/have_binlog_format_row.inc
+--source include/master-slave.inc
+--source include/rpl_set_gtid_mode.inc
+--enable_warnings
+
+--source include/have_gtid.inc
+
+# Enable MTS
+--connection slave
+--source include/stop_slave.inc
+set @save.slave_parallel_workers= @@global.slave_parallel_workers;
+SET @@global.slave_parallel_workers= 4;
+CHANGE MASTER TO MASTER_AUTO_POSITION=1;
+--source include/start_slave.inc
+
+# Run a few queries on the master to create skip scneario
+--connection master
+SET GLOBAL DEBUG="+d, send_zero_hb_event";
+let $databases = 4;
+let $iter = 10;
+--source suite/rpl/include/rpl_heartbeat_zero_timestamp_input.inc
+# Make sure that the slave has caught up to the master
+--source include/sync_slave_sql_with_master.inc
+
+
+# Stop the slave
+--connection slave
+--source include/stop_slave.inc
+
+
+# Create a lag on the master by running many queries
+--connection master
+let $databases = 4;
+let $iter = 10000;
+--source suite/rpl/include/rpl_heartbeat_zero_timestamp_input.inc
+
+
+# Now start slave again so that we can get some HB during skipping
+--connection slave
+# Create some lag
+let $lagging_sec = 5;
+sleep $lagging_sec;
+# Set HB event interval small enough
+let $old_slave_heartbeat_period= query_get_value(SHOW GLOBAL STATUS LIKE 'slave_heartbeat_period', Value, 1);
+let $new_slave_heartbeat_period= 0.1;
+# Since the skiping is really small, after this, for sure we should get the event
+let $skipping_guarantee_sleep= 1;
+eval CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=$new_slave_heartbeat_period;
+--source include/start_slave.inc
+
+# Sleep so that we for sure get the binlog events
+sleep $skipping_guarantee_sleep;
+let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);
+# Assertion
+--let $assert_text = Seconds behind master should be bigger than zero after creating the lag for MTS
+--let $assert_cond = $sbm > $lagging_sec;
+--source include/assert.inc
+
+
+# Catch up
+--connection master
+--source include/sync_slave_sql_with_master.inc
+
+
+# Now test by disabling MTS
+--connection slave
+--source include/stop_slave.inc
+SET @@global.slave_parallel_workers= 0;
+--source include/not_mts_slave_parallel_workers.inc
+
+
+# Create a lag on the master
+--connection master
+let $databases = 4;
+let $iter = 10000;
+--source suite/rpl/include/rpl_heartbeat_zero_timestamp_input.inc
+
+
+--connection slave
+# Create some lag
+sleep $lagging_sec;
+--source include/start_slave.inc
+# Sleep so that we for sure get the HB events
+sleep $skipping_guarantee_sleep;
+let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);
+# Assertion
+--let $assert_text = Seconds behind master should be bigger than zero after creating the lag for non-MTS
+--let $assert_cond = $sbm > $lagging_sec
+--source include/assert.inc
+
+# clean up
+--connection master
+let $i = $databases;
+while ($i)
+{
+  eval use test$i;
+  eval drop table if exists t$i;
+  eval drop database if exists test$i;
+  dec $i;
+}
+SET GLOBAL DEBUG="-d, send_zero_hb_event";
+--source include/sync_slave_sql_with_master.inc
+
+--connection slave
+--source include/stop_slave.inc
+set @@global.slave_parallel_workers= @save.slave_parallel_workers;
+eval CHANGE MASTER TO MASTER_HEARTBEAT_PERIOD=$old_slave_heartbeat_period;
+--source include/start_slave.inc
+
+--source include/rpl_end.inc
diff --git a/sql/rpl_master.cc b/sql/rpl_master.cc
@@ -808,8 +808,10 @@ static ulonglong get_heartbeat_period(THD * thd)
   @param packet             buffer to store the heartbeat instance
   @param event_coordinates  binlog file name and position of the last
                             real event master sent from binlog
+  @param send_timestamp     flag enables sending the HB event with
+                                the current timestamp: time().
 
-  @note 
+  @note
     Among three essential pieces of heartbeat data Log_event::when
     is computed locally.
     The  error to send is serious and should force terminating
@@ -818,17 +820,22 @@ static ulonglong get_heartbeat_period(THD * thd)
 static int send_heartbeat_event(NET* net, String* packet,
                                 const struct event_coordinates *coord,
                                 bool is_semi_sync_slave,
-                                uint8 checksum_alg_arg)
+                                uint8 checksum_alg_arg,
+                                bool send_timestamp)
 {
   DBUG_ENTER("send_heartbeat_event");
   char header[LOG_EVENT_HEADER_LEN];
   my_bool do_checksum= checksum_alg_arg != BINLOG_CHECKSUM_ALG_OFF &&
     checksum_alg_arg != BINLOG_CHECKSUM_ALG_UNDEF;
 
-  // NOTE: if @last_master_timestamp is provided we're a slave and we use this
-  // value in the HB event otherwise we use now()
-  time_t ts= mysql_bin_log.last_master_timestamp.load();
-  if (!ts) ts= time(0);
+  time_t ts= 0;
+
+  if (send_timestamp) {
+    // NOTE: if @last_master_timestamp is provided we're a slave and we use this
+    // value in the HB event otherwise we use now()
+    ts= mysql_bin_log.last_master_timestamp.load();
+    if (!ts) ts= time(0);
+  }
   memcpy(header, &ts, 4);
 
   header[EVENT_TYPE_OFFSET] = HEARTBEAT_LOG_EVENT;
@@ -880,6 +887,15 @@ static int send_heartbeat_event(NET* net, String* packet,
    later. Note that, the caller has to send the last skipped coordinates
    to this function.
 
+   Note that this function will not send HeartBeat events that carry the current
+   timestamp. This this because this function is ONLY used to send HB update
+   when the previous transaction has been skipped. Note that HB events'
+   timestamps will be used by the slave to calculate the seconds behind master.
+
+   The only place that makes sense for the event to carry a timestamp is when
+   the master is waiting on the update and keeping sending HB events to keep
+   the connection.
+
    @param[in]      net               This master-slave network handler
    @param[in]      packet            packet that is to be sent to the slave.
    @param[in]      last_skip_coord   coordinates for last skipped transaction
@@ -904,17 +920,24 @@ static int send_last_skip_group_heartbeat(THD *thd, NET* net, String *packet,
   String save_packet;
   int save_offset= *ev_offset;
 
- /* Save the current read packet */ 
+ /* Save the current read packet */
   save_packet.swap(*packet);
 
   if (reset_transmit_packet(thd, 0, ev_offset, errmsg, observe_transmission,
                             packet, packet_buffer, packet_buffer_size,
                             semi_sync_slave))
     DBUG_RETURN(-1);
 
-  /* Send heart beat event to the slave to update slave  threads coordinates */
-  if (send_heartbeat_event(net, packet, last_skip_coord,
-                           semi_sync_slave, checksum_alg_arg))
+  /**
+   * Send heart beat event to the slave to update slave  threads coordinates
+   * Note that we will not send timestamp in these heart beat events
+   */
+  if (send_heartbeat_event(net,
+                           packet,
+                           last_skip_coord,
+                           semi_sync_slave,
+                           checksum_alg_arg,
+                           false))
   {
     *errmsg= "Failed on my_net_write()";
     my_errno= ER_UNKNOWN_ERROR;
@@ -1922,6 +1945,10 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
         // Both diff_timespec() and heartbeat_period are in nano seconds.
         time_for_hb_event= (diff_timespec(cur_clock, last_event_sent_ts) >=
                             heartbeat_period);
+        DBUG_EXECUTE_IF("send_zero_hb_event",
+                        {
+                          time_for_hb_event= true;
+                        });
       }
 
       if ((!skip_group && last_skip_group
@@ -1938,6 +1965,10 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
           is the first event to be sent to the slave. In this case, it is
           no need to send a HB event (which might have coordinates
           of previous binlog file).
+
+          This heartbeat event sending only happens when the master is in the
+          middle or has just completed skipping all the trxs. Therefore, HB
+          shouldn't carry Timestamp.
         */
 
         if (send_last_skip_group_heartbeat(thd, net, packet, p_last_skip_coord,
@@ -2210,6 +2241,10 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
               If the heartbeat is on, it is better to send a heartbeat
               event as the time_out of certain functions (Ex: master_pos_wait()
               or semi sync ack timeout) might be less than heartbeat period.
+
+              This can happen when the slave has exact same GTID as the master.
+              In this case, we still send the first HB with 0 timestamp to be
+              safe since the follow up HB with carry the right timestamp
             */
             if (skip_group)
             {
@@ -2254,8 +2289,19 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
                 thd->EXIT_COND(&old_stage);
                 GOTO_ERR;
               }
-              if (send_heartbeat_event(net, packet, p_coord,
-                                       semi_sync_slave, current_checksum_alg))
+              /**
+               * HB events sent here are essentially when the master is waiting
+               * on updates and send HB to keep the connection connected.
+               *
+               * This is the only place that sending HB with Timestamp makes
+               * sense and won't break monotonic TS calculated SBM
+               */
+              if (send_heartbeat_event(net,
+                                       packet,
+                                       p_coord,
+                                       semi_sync_slave,
+                                       current_checksum_alg,
+                                       true))
               {
                 errmsg = "Failed on my_net_write()";
                 my_errno= ER_UNKNOWN_ERROR;
@@ -2357,7 +2403,11 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
 
           if (!skip_group && !goto_next_binlog)
           {
-            /* If the last group was skipped, send a HB event */
+            /**
+             * If the last group was skipped, send a HB event,
+             * similarly, HB sent here should not carry TS since the last
+             * trx is skipped and we are not sure if we are waiting on update
+             */
             if (last_skip_group &&
                 send_last_skip_group_heartbeat(thd, net, packet,
                                                p_last_skip_coord, &ev_offset,