Skip to content

Commit

Permalink
Correctly calculating milli-seconds behind master
Browse files Browse the repository at this point in the history
Summary:
Milli-seconds behind master was not calculated correctly for MTS. With this
change we calculate it just like seconds behind master. One difference between
these is that sec timestamp is available for every event but msec timestamp is
only available once per query/rows event in form of trx metadata inside a
Rows_query_event.

Squash with: D5220355

Reviewed By: teng-li

Differential Revision: D5694630

fbshipit-source-id: 94c95bb
  • Loading branch information
abhinav04sharma authored and facebook-github-bot committed Aug 30, 2017
1 parent 5913425 commit 21edb26
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 43 deletions.
6 changes: 0 additions & 6 deletions sql/binlog.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1293,8 +1293,6 @@ binlog_cache_data::add_time_metadata(THD *thd, ptree &meta_data_root)

// get existing timestamps
ptree timestamps= meta_data_root.get_child("timestamps", ptree());
ulonglong prev_ts= timestamps.empty() ? 0 :
timestamps.back().second.get_value<ulonglong>();

// add our timestamp to the array
ptree timestamp;
Expand All @@ -1308,10 +1306,6 @@ binlog_cache_data::add_time_metadata(THD *thd, ptree &meta_data_root)
meta_data_root.erase("timestamps");
meta_data_root.add_child("timestamps", timestamps);

// milliseconds behind master related
if (thd->rli_slave && prev_ts > 0)
thd->rli_slave->last_master_timestamp_millis.store(prev_ts);

DBUG_VOID_RETURN;
}

Expand Down
55 changes: 52 additions & 3 deletions sql/log_event.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ slave_ignored_err_throttle(window_size,

#include "sql_digest.h"

#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/json_parser.hpp>
#include <boost/algorithm/string.hpp>

using std::min;
using std::max;

Expand Down Expand Up @@ -3153,8 +3157,15 @@ Slave_worker *Log_event::get_slave_worker(Relay_log_info *rli)
if (is_relay_log_event())
ptr_group->ts= 0;
else
{
ptr_group->ts=
when.tv_sec + (time_t) exec_time; // Seconds_behind_master related
ptr_group->ts_millis= (rli->group_timestamp_millis ?
rli->group_timestamp_millis : when.tv_sec * 1000)
+ exec_time * 1000;
// reset for next group
rli->group_timestamp_millis= 0;
}
rli->checkpoint_seqno++;
/*
Coordinator should not use the main memroot however its not
Expand Down Expand Up @@ -3405,8 +3416,17 @@ void Log_event::do_post_end_event(Relay_log_info *rli, Log_event_wrapper *ev)
rli->checkpoint_seqno++;

// seconds_behind_master related
if (is_relay_log_event()) ptr_group->ts= 0;
else ptr_group->ts= when.tv_sec + (time_t) exec_time;
if (is_relay_log_event())
ptr_group->ts= 0;
else
{
ptr_group->ts= when.tv_sec + (time_t) exec_time;
ptr_group->ts_millis= (rli->group_timestamp_millis ?
rli->group_timestamp_millis : when.tv_sec * 1000)
+ exec_time * 1000;
// reset for next group
rli->group_timestamp_millis= 0;
}
}

/**
Expand Down Expand Up @@ -3700,6 +3720,16 @@ int Log_event::apply_event(Relay_log_info *rli)
worker= NULL;
rli->mts_group_status= Relay_log_info::MTS_IN_GROUP;

// milli-sec behind master related for MTS
// case: this event contains trx metadata, so save the ts in msec for the grp
if (opt_binlog_trx_meta_data &&
get_type_code() == ROWS_QUERY_LOG_EVENT &&
static_cast<Rows_query_log_event*>(this)->has_trx_meta_data())
{
rli->group_timestamp_millis=
static_cast<Rows_query_log_event*>(this)->extract_last_timestamp();
}

if (!opt_mts_dependency_replication)
worker= (Relay_log_info*)
(rli->last_assigned_worker= get_slave_worker(rli));
Expand Down Expand Up @@ -7636,7 +7666,7 @@ int Rotate_log_event::do_update_pos(Relay_log_info *rli)
(ulong) rli->get_group_master_log_pos()));
mysql_mutex_unlock(&rli->data_lock);
if (rli->is_parallel_exec())
rli->reset_notified_checkpoint(0, 0,
rli->reset_notified_checkpoint(0, 0, 0,
true/*need_data_lock=true*/);

/*
Expand Down Expand Up @@ -14793,6 +14823,25 @@ Rows_query_log_event::write_data_body(IO_CACHE *file)
(uint) strlen(m_rows_query)));
}

inline ulonglong Rows_query_log_event::extract_last_timestamp() const
{
boost::property_tree::ptree pt;
std::string json= extract_trx_meta_data();
if (json.empty())
return 0;
std::istringstream is(json);
try {
read_json(is, pt);
} catch (const std::exception& e) {
sql_print_error("Error while parsing metadata JSON for timestamps");
return 0;
}

auto timestamps= pt.get_child("timestamps", boost::property_tree::ptree());
return timestamps.empty() ? 0 :
timestamps.back().second.get_value<ulonglong>();
}

#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION)
int Rows_query_log_event::do_apply_event(Relay_log_info const *rli)
{
Expand Down
5 changes: 3 additions & 2 deletions sql/log_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -5044,15 +5044,16 @@ class Rows_query_log_event : public Ignorable_log_event {

std::string extract_trx_meta_data() const
{
if (!has_trx_meta_data())
return std::string();
DBUG_ASSERT(has_trx_meta_data());
char *json_start= strchr(m_rows_query, '{');
char *json_end= strrchr(m_rows_query, '}');
DBUG_ASSERT(json_start < json_end);
size_t json_len= json_end - json_start + 1;
return std::string(json_start, json_len);
}

ulonglong extract_last_timestamp() const;

#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION)
virtual int do_apply_event(Relay_log_info const *rli);
#endif
Expand Down
47 changes: 35 additions & 12 deletions sql/rpl_rli.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "rpl_info_factory.h"
#include <mysql/plugin.h>
#include <mysql/service_thd_wait.h>
#include <chrono>

using std::min;
using std::max;
Expand Down Expand Up @@ -192,8 +193,6 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery
mysql_cond_init(0, &dag_empty_cond, NULL);
mysql_mutex_init(0, &dag_empty_mutex, MY_MUTEX_INIT_FAST);

last_master_timestamp_millis.store(0);

DBUG_VOID_RETURN;
}

Expand Down Expand Up @@ -307,9 +306,11 @@ void Relay_log_info::reset_notified_relay_log_change()
current checkpoint change.
@param new_ts new seconds_behind_master timestamp value
unless zero. Zero could be due to FD event.
@param new_ts_millis new milli_seconds_behind_master timestamp value
@param need_data_lock False if caller has locked @c data_lock
*/
void Relay_log_info::reset_notified_checkpoint(ulong shift, time_t new_ts,
ulonglong new_ts_millis,
bool need_data_lock)
{
/*
Expand Down Expand Up @@ -364,16 +365,8 @@ void Relay_log_info::reset_notified_checkpoint(ulong shift, time_t new_ts,

// Set the flag to say that "the slave has not yet caught up"
slave_has_caughtup= Enum_slave_caughtup::NO;
/*
Note that we only skip assigning new_ts to last_master_timestamp when
new_ts is smaller than last_master_timestamp to avoid a sudden spike on
second behind master. If new_ts is very big, say bigger than time(0), we
will assign the current time to last_master_timestamp instead.
*/
if (new_ts > last_master_timestamp)
{
set_last_master_timestamp(std::min(time(nullptr), new_ts));
}

set_last_master_timestamp(new_ts, new_ts_millis);

if (need_data_lock)
mysql_mutex_unlock(&data_lock);
Expand Down Expand Up @@ -528,6 +521,36 @@ void Relay_log_info::clear_until_condition()
DBUG_VOID_RETURN;
}

/**
* Update the last master timestamp seen by the slave
* Last master timestamp is used to calculate lag (seconds/milli-seconds behind
* master).
*/
void Relay_log_info::set_last_master_timestamp(time_t ts, ulonglong ts_millis)
{
auto now= std::chrono::system_clock::now().time_since_epoch();
auto now_sec= std::chrono::duration_cast<std::chrono::seconds>(now).count();
auto now_msec= std::chrono::duration_cast<std::chrono::milliseconds>(now)
.count();

/*
Note that we only skip assigning ts to last_master_timestamp when
ts is smaller than last_master_timestamp to avoid a sudden spike on
second behind master. If ts is very big, say bigger than now(), we
will assign the current time to last_master_timestamp instead.
Same for last_master_timestamp_millis
*/
if (ts > last_master_timestamp)
{
penultimate_master_timestamp= last_master_timestamp;
last_master_timestamp= std::min(ts, now_sec);
mysql_bin_log.last_master_timestamp.store(last_master_timestamp);
}

if (ts_millis > last_master_timestamp_millis)
last_master_timestamp_millis= std::min(ts_millis, (ulonglong) now_msec);
}

/**
Update the peak lag with the latest event.

Expand Down
16 changes: 7 additions & 9 deletions sql/rpl_rli.h
Original file line number Diff line number Diff line change
Expand Up @@ -331,15 +331,13 @@ class Relay_log_info : public Rpl_info
// cache value for sql thread
time_t penultimate_master_timestamp;

void set_last_master_timestamp(time_t ts)
{
penultimate_master_timestamp= last_master_timestamp;
last_master_timestamp= ts;
mysql_bin_log.last_master_timestamp.store(last_master_timestamp);
}

// last master timestamp in milli seconds from trx meta data
std::atomic<ulonglong> last_master_timestamp_millis;
ulonglong last_master_timestamp_millis= 0;

// milli ts for the current group
ulonglong group_timestamp_millis= 0;

void set_last_master_timestamp(time_t ts, ulonglong ts_millis);

#define PEAK_LAG_MAX_SECS 512
time_t peak_lag_last[PEAK_LAG_MAX_SECS];
Expand Down Expand Up @@ -733,7 +731,7 @@ class Relay_log_info : public Rpl_info
Coordinator notifies Workers about this event. Coordinator and Workers
maintain a bitmap of executed group that is reset with a new checkpoint.
*/
void reset_notified_checkpoint(ulong, time_t, bool);
void reset_notified_checkpoint(ulong, time_t, ulonglong, bool);

/**
Called when gaps execution is ended so it is crash-safe
Expand Down
1 change: 1 addition & 0 deletions sql/rpl_rli_pdb.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ typedef struct st_slave_job_group
volatile uchar done; // Flag raised by W, read and reset by Coordinator
ulong shifted; // shift the last CP bitmap at receiving a new CP
time_t ts; // Group's timestampt to update Seconds_behind_master
ulonglong ts_millis= 0;
#ifndef DBUG_OFF
bool notified; // to debug group_master_log_name change notification
#endif
Expand Down
30 changes: 19 additions & 11 deletions sql/rpl_slave.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3449,8 +3449,7 @@ bool show_slave_status(THD* thd, Master_info* mi)
.count();
// adjust for clock mismatch
now_millis-= mi->clock_diff_with_master * 1000;
protocol->store(now_millis -
mi->rli->last_master_timestamp_millis.load());
protocol->store(now_millis - mi->rli->last_master_timestamp_millis);
}
}

Expand Down Expand Up @@ -4649,15 +4648,27 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
last_master_timestamp if the tentative one is bigger. If the tentative
is too big so that it's beyond current slave time, we assign the
current time of the slave to the last_master_timestamp.
see @Relay_log_info::set_last_master_timestamp()
*/
time_t tentative_last_master_timestamp=
ev->when.tv_sec + (time_t) ev->exec_time;

if (tentative_last_master_timestamp > rli->last_master_timestamp)
// milli seconds behind master related for non-MTS
ulonglong tentative_last_master_ts_millis= 0;

// case: if trx meta data is enabled and this is a rows query event with
// trx meta data attempt to get the ts in milli secs
if (opt_binlog_trx_meta_data &&
ev->get_type_code() == ROWS_QUERY_LOG_EVENT &&
static_cast<Rows_query_log_event*>(ev)->has_trx_meta_data())
{
rli->set_last_master_timestamp(std::min(tentative_last_master_timestamp,
time(nullptr)));
tentative_last_master_ts_millis=
static_cast<Rows_query_log_event*>(ev)->extract_last_timestamp();
}

rli->set_last_master_timestamp(tentative_last_master_timestamp,
tentative_last_master_ts_millis);

DBUG_ASSERT(rli->last_master_timestamp >= 0);
}

Expand Down Expand Up @@ -6164,7 +6175,8 @@ bool mts_checkpoint_routine(Relay_log_info *rli, ulonglong period,
cnt is zero. This value means that the checkpoint information
will be completely reset.
*/
rli->reset_notified_checkpoint(cnt, rli->gaq->lwm.ts, need_data_lock);
rli->reset_notified_checkpoint(cnt, rli->gaq->lwm.ts, rli->gaq->lwm.ts_millis,
need_data_lock);

/* end-of "Coordinator::"commit_positions" */

Expand Down Expand Up @@ -7647,11 +7659,7 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
is always monotonically increasing
*/
mysql_mutex_lock(&rli->data_lock);
if (hb.when.tv_sec > rli->last_master_timestamp)
{
rli->set_last_master_timestamp(hb.when.tv_sec);
rli->last_master_timestamp_millis.store(hb.when.tv_sec * 1000);
}
rli->set_last_master_timestamp(hb.when.tv_sec, hb.when.tv_sec * 1000);
mysql_mutex_unlock(&rli->data_lock);

/*
Expand Down

0 comments on commit 21edb26

Please sign in to comment.