Skip to content

Commit 21edb26

Browse files
Correctly calculating milli-seconds behind master
Summary: Milli-seconds behind master was not calculated correctly for MTS. With this change we calculate it just like seconds behind master. One difference between these is that sec timestamp is available for every event but msec timestamp is only available once per query/rows event in form of trx metadata inside a Rows_query_event. Squash with: D5220355 Reviewed By: teng-li Differential Revision: D5694630 fbshipit-source-id: 94c95bb
1 parent 5913425 commit 21edb26

7 files changed

+117
-43
lines changed

sql/binlog.cc

-6
Original file line numberDiff line numberDiff line change
@@ -1293,8 +1293,6 @@ binlog_cache_data::add_time_metadata(THD *thd, ptree &meta_data_root)
12931293

12941294
// get existing timestamps
12951295
ptree timestamps= meta_data_root.get_child("timestamps", ptree());
1296-
ulonglong prev_ts= timestamps.empty() ? 0 :
1297-
timestamps.back().second.get_value<ulonglong>();
12981296

12991297
// add our timestamp to the array
13001298
ptree timestamp;
@@ -1308,10 +1306,6 @@ binlog_cache_data::add_time_metadata(THD *thd, ptree &meta_data_root)
13081306
meta_data_root.erase("timestamps");
13091307
meta_data_root.add_child("timestamps", timestamps);
13101308

1311-
// milliseconds behind master related
1312-
if (thd->rli_slave && prev_ts > 0)
1313-
thd->rli_slave->last_master_timestamp_millis.store(prev_ts);
1314-
13151309
DBUG_VOID_RETURN;
13161310
}
13171311

sql/log_event.cc

+52-3
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ slave_ignored_err_throttle(window_size,
6363

6464
#include "sql_digest.h"
6565

66+
#include <boost/property_tree/ptree.hpp>
67+
#include <boost/property_tree/json_parser.hpp>
68+
#include <boost/algorithm/string.hpp>
69+
6670
using std::min;
6771
using std::max;
6872

@@ -3153,8 +3157,15 @@ Slave_worker *Log_event::get_slave_worker(Relay_log_info *rli)
31533157
if (is_relay_log_event())
31543158
ptr_group->ts= 0;
31553159
else
3160+
{
31563161
ptr_group->ts=
31573162
when.tv_sec + (time_t) exec_time; // Seconds_behind_master related
3163+
ptr_group->ts_millis= (rli->group_timestamp_millis ?
3164+
rli->group_timestamp_millis : when.tv_sec * 1000)
3165+
+ exec_time * 1000;
3166+
// reset for next group
3167+
rli->group_timestamp_millis= 0;
3168+
}
31583169
rli->checkpoint_seqno++;
31593170
/*
31603171
Coordinator should not use the main memroot however its not
@@ -3405,8 +3416,17 @@ void Log_event::do_post_end_event(Relay_log_info *rli, Log_event_wrapper *ev)
34053416
rli->checkpoint_seqno++;
34063417

34073418
// seconds_behind_master related
3408-
if (is_relay_log_event()) ptr_group->ts= 0;
3409-
else ptr_group->ts= when.tv_sec + (time_t) exec_time;
3419+
if (is_relay_log_event())
3420+
ptr_group->ts= 0;
3421+
else
3422+
{
3423+
ptr_group->ts= when.tv_sec + (time_t) exec_time;
3424+
ptr_group->ts_millis= (rli->group_timestamp_millis ?
3425+
rli->group_timestamp_millis : when.tv_sec * 1000)
3426+
+ exec_time * 1000;
3427+
// reset for next group
3428+
rli->group_timestamp_millis= 0;
3429+
}
34103430
}
34113431

34123432
/**
@@ -3700,6 +3720,16 @@ int Log_event::apply_event(Relay_log_info *rli)
37003720
worker= NULL;
37013721
rli->mts_group_status= Relay_log_info::MTS_IN_GROUP;
37023722

3723+
// milli-sec behind master related for MTS
3724+
// case: this event contains trx metadata, so save the ts in msec for the grp
3725+
if (opt_binlog_trx_meta_data &&
3726+
get_type_code() == ROWS_QUERY_LOG_EVENT &&
3727+
static_cast<Rows_query_log_event*>(this)->has_trx_meta_data())
3728+
{
3729+
rli->group_timestamp_millis=
3730+
static_cast<Rows_query_log_event*>(this)->extract_last_timestamp();
3731+
}
3732+
37033733
if (!opt_mts_dependency_replication)
37043734
worker= (Relay_log_info*)
37053735
(rli->last_assigned_worker= get_slave_worker(rli));
@@ -7636,7 +7666,7 @@ int Rotate_log_event::do_update_pos(Relay_log_info *rli)
76367666
(ulong) rli->get_group_master_log_pos()));
76377667
mysql_mutex_unlock(&rli->data_lock);
76387668
if (rli->is_parallel_exec())
7639-
rli->reset_notified_checkpoint(0, 0,
7669+
rli->reset_notified_checkpoint(0, 0, 0,
76407670
true/*need_data_lock=true*/);
76417671

76427672
/*
@@ -14793,6 +14823,25 @@ Rows_query_log_event::write_data_body(IO_CACHE *file)
1479314823
(uint) strlen(m_rows_query)));
1479414824
}
1479514825

14826+
inline ulonglong Rows_query_log_event::extract_last_timestamp() const
14827+
{
14828+
boost::property_tree::ptree pt;
14829+
std::string json= extract_trx_meta_data();
14830+
if (json.empty())
14831+
return 0;
14832+
std::istringstream is(json);
14833+
try {
14834+
read_json(is, pt);
14835+
} catch (const std::exception& e) {
14836+
sql_print_error("Error while parsing metadata JSON for timestamps");
14837+
return 0;
14838+
}
14839+
14840+
auto timestamps= pt.get_child("timestamps", boost::property_tree::ptree());
14841+
return timestamps.empty() ? 0 :
14842+
timestamps.back().second.get_value<ulonglong>();
14843+
}
14844+
1479614845
#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION)
1479714846
int Rows_query_log_event::do_apply_event(Relay_log_info const *rli)
1479814847
{

sql/log_event.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -5044,15 +5044,16 @@ class Rows_query_log_event : public Ignorable_log_event {
50445044

50455045
std::string extract_trx_meta_data() const
50465046
{
5047-
if (!has_trx_meta_data())
5048-
return std::string();
5047+
DBUG_ASSERT(has_trx_meta_data());
50495048
char *json_start= strchr(m_rows_query, '{');
50505049
char *json_end= strrchr(m_rows_query, '}');
50515050
DBUG_ASSERT(json_start < json_end);
50525051
size_t json_len= json_end - json_start + 1;
50535052
return std::string(json_start, json_len);
50545053
}
50555054

5055+
ulonglong extract_last_timestamp() const;
5056+
50565057
#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION)
50575058
virtual int do_apply_event(Relay_log_info const *rli);
50585059
#endif

sql/rpl_rli.cc

+35-12
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "rpl_info_factory.h"
3232
#include <mysql/plugin.h>
3333
#include <mysql/service_thd_wait.h>
34+
#include <chrono>
3435

3536
using std::min;
3637
using std::max;
@@ -192,8 +193,6 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery
192193
mysql_cond_init(0, &dag_empty_cond, NULL);
193194
mysql_mutex_init(0, &dag_empty_mutex, MY_MUTEX_INIT_FAST);
194195

195-
last_master_timestamp_millis.store(0);
196-
197196
DBUG_VOID_RETURN;
198197
}
199198

@@ -307,9 +306,11 @@ void Relay_log_info::reset_notified_relay_log_change()
307306
current checkpoint change.
308307
@param new_ts new seconds_behind_master timestamp value
309308
unless zero. Zero could be due to FD event.
309+
@param new_ts_millis new milli_seconds_behind_master timestamp value
310310
@param need_data_lock False if caller has locked @c data_lock
311311
*/
312312
void Relay_log_info::reset_notified_checkpoint(ulong shift, time_t new_ts,
313+
ulonglong new_ts_millis,
313314
bool need_data_lock)
314315
{
315316
/*
@@ -364,16 +365,8 @@ void Relay_log_info::reset_notified_checkpoint(ulong shift, time_t new_ts,
364365

365366
// Set the flag to say that "the slave has not yet caught up"
366367
slave_has_caughtup= Enum_slave_caughtup::NO;
367-
/*
368-
Note that we only skip assigning new_ts to last_master_timestamp when
369-
new_ts is smaller than last_master_timestamp to avoid a sudden spike on
370-
second behind master. If new_ts is very big, say bigger than time(0), we
371-
will assign the current time to last_master_timestamp instead.
372-
*/
373-
if (new_ts > last_master_timestamp)
374-
{
375-
set_last_master_timestamp(std::min(time(nullptr), new_ts));
376-
}
368+
369+
set_last_master_timestamp(new_ts, new_ts_millis);
377370

378371
if (need_data_lock)
379372
mysql_mutex_unlock(&data_lock);
@@ -528,6 +521,36 @@ void Relay_log_info::clear_until_condition()
528521
DBUG_VOID_RETURN;
529522
}
530523

524+
/**
525+
* Update the last master timestamp seen by the slave
526+
* Last master timestamp is used to calculate lag (seconds/milli-seconds behind
527+
* master).
528+
*/
529+
void Relay_log_info::set_last_master_timestamp(time_t ts, ulonglong ts_millis)
530+
{
531+
auto now= std::chrono::system_clock::now().time_since_epoch();
532+
auto now_sec= std::chrono::duration_cast<std::chrono::seconds>(now).count();
533+
auto now_msec= std::chrono::duration_cast<std::chrono::milliseconds>(now)
534+
.count();
535+
536+
/*
537+
Note that we only skip assigning ts to last_master_timestamp when
538+
ts is smaller than last_master_timestamp to avoid a sudden spike on
539+
second behind master. If ts is very big, say bigger than now(), we
540+
will assign the current time to last_master_timestamp instead.
541+
Same for last_master_timestamp_millis
542+
*/
543+
if (ts > last_master_timestamp)
544+
{
545+
penultimate_master_timestamp= last_master_timestamp;
546+
last_master_timestamp= std::min(ts, now_sec);
547+
mysql_bin_log.last_master_timestamp.store(last_master_timestamp);
548+
}
549+
550+
if (ts_millis > last_master_timestamp_millis)
551+
last_master_timestamp_millis= std::min(ts_millis, (ulonglong) now_msec);
552+
}
553+
531554
/**
532555
Update the peak lag with the latest event.
533556

sql/rpl_rli.h

+7-9
Original file line numberDiff line numberDiff line change
@@ -331,15 +331,13 @@ class Relay_log_info : public Rpl_info
331331
// cache value for sql thread
332332
time_t penultimate_master_timestamp;
333333

334-
void set_last_master_timestamp(time_t ts)
335-
{
336-
penultimate_master_timestamp= last_master_timestamp;
337-
last_master_timestamp= ts;
338-
mysql_bin_log.last_master_timestamp.store(last_master_timestamp);
339-
}
340-
341334
// last master timestamp in milli seconds from trx meta data
342-
std::atomic<ulonglong> last_master_timestamp_millis;
335+
ulonglong last_master_timestamp_millis= 0;
336+
337+
// milli ts for the current group
338+
ulonglong group_timestamp_millis= 0;
339+
340+
void set_last_master_timestamp(time_t ts, ulonglong ts_millis);
343341

344342
#define PEAK_LAG_MAX_SECS 512
345343
time_t peak_lag_last[PEAK_LAG_MAX_SECS];
@@ -733,7 +731,7 @@ class Relay_log_info : public Rpl_info
733731
Coordinator notifies Workers about this event. Coordinator and Workers
734732
maintain a bitmap of executed group that is reset with a new checkpoint.
735733
*/
736-
void reset_notified_checkpoint(ulong, time_t, bool);
734+
void reset_notified_checkpoint(ulong, time_t, ulonglong, bool);
737735

738736
/**
739737
Called when gaps execution is ended so it is crash-safe

sql/rpl_rli_pdb.h

+1
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ typedef struct st_slave_job_group
224224
volatile uchar done; // Flag raised by W, read and reset by Coordinator
225225
ulong shifted; // shift the last CP bitmap at receiving a new CP
226226
time_t ts; // Group's timestampt to update Seconds_behind_master
227+
ulonglong ts_millis= 0;
227228
#ifndef DBUG_OFF
228229
bool notified; // to debug group_master_log_name change notification
229230
#endif

sql/rpl_slave.cc

+19-11
Original file line numberDiff line numberDiff line change
@@ -3449,8 +3449,7 @@ bool show_slave_status(THD* thd, Master_info* mi)
34493449
.count();
34503450
// adjust for clock mismatch
34513451
now_millis-= mi->clock_diff_with_master * 1000;
3452-
protocol->store(now_millis -
3453-
mi->rli->last_master_timestamp_millis.load());
3452+
protocol->store(now_millis - mi->rli->last_master_timestamp_millis);
34543453
}
34553454
}
34563455

@@ -4649,15 +4648,27 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
46494648
last_master_timestamp if the tentative one is bigger. If the tentative
46504649
is too big so that it's beyond current slave time, we assign the
46514650
current time of the slave to the last_master_timestamp.
4651+
see @Relay_log_info::set_last_master_timestamp()
46524652
*/
46534653
time_t tentative_last_master_timestamp=
46544654
ev->when.tv_sec + (time_t) ev->exec_time;
46554655

4656-
if (tentative_last_master_timestamp > rli->last_master_timestamp)
4656+
// milli seconds behind master related for non-MTS
4657+
ulonglong tentative_last_master_ts_millis= 0;
4658+
4659+
// case: if trx meta data is enabled and this is a rows query event with
4660+
// trx meta data attempt to get the ts in milli secs
4661+
if (opt_binlog_trx_meta_data &&
4662+
ev->get_type_code() == ROWS_QUERY_LOG_EVENT &&
4663+
static_cast<Rows_query_log_event*>(ev)->has_trx_meta_data())
46574664
{
4658-
rli->set_last_master_timestamp(std::min(tentative_last_master_timestamp,
4659-
time(nullptr)));
4665+
tentative_last_master_ts_millis=
4666+
static_cast<Rows_query_log_event*>(ev)->extract_last_timestamp();
46604667
}
4668+
4669+
rli->set_last_master_timestamp(tentative_last_master_timestamp,
4670+
tentative_last_master_ts_millis);
4671+
46614672
DBUG_ASSERT(rli->last_master_timestamp >= 0);
46624673
}
46634674

@@ -6164,7 +6175,8 @@ bool mts_checkpoint_routine(Relay_log_info *rli, ulonglong period,
61646175
cnt is zero. This value means that the checkpoint information
61656176
will be completely reset.
61666177
*/
6167-
rli->reset_notified_checkpoint(cnt, rli->gaq->lwm.ts, need_data_lock);
6178+
rli->reset_notified_checkpoint(cnt, rli->gaq->lwm.ts, rli->gaq->lwm.ts_millis,
6179+
need_data_lock);
61686180

61696181
/* end-of "Coordinator::"commit_positions" */
61706182

@@ -7647,11 +7659,7 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
76477659
is always monotonically increasing
76487660
*/
76497661
mysql_mutex_lock(&rli->data_lock);
7650-
if (hb.when.tv_sec > rli->last_master_timestamp)
7651-
{
7652-
rli->set_last_master_timestamp(hb.when.tv_sec);
7653-
rli->last_master_timestamp_millis.store(hb.when.tv_sec * 1000);
7654-
}
7662+
rli->set_last_master_timestamp(hb.when.tv_sec, hb.when.tv_sec * 1000);
76557663
mysql_mutex_unlock(&rli->data_lock);
76567664

76577665
/*

0 commit comments

Comments
 (0)