Skip to content

Commit

Permalink
[syncd] Use steady clock for TimerWatchdog (sonic-net#613)
Browse files Browse the repository at this point in the history
Clock can rollback in big jumps and this causes TimerWatchdog to through
and crashes syncd. This code uses steady clock instead. Steady clock is
guaranteed to be monotonically increasing clock.

signed-of-by: Tamer Ahmed <tamer.ahmed@microsoft.com>
  • Loading branch information
tahmed-dev authored and lguohan committed May 9, 2020
1 parent e50cf66 commit 88a1982
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 7 deletions.
11 changes: 4 additions & 7 deletions syncd/TimerWatchdog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ TimerWatchdog::TimerWatchdog(
_In_ int64_t warnTimespan):
m_run(true),
m_warnTimespan(warnTimespan),
m_callback(0)
m_callback(nullptr)
{
SWSS_LOG_ENTER();

Expand Down Expand Up @@ -68,11 +68,8 @@ void TimerWatchdog::threadFunction()

SWSS_LOG_NOTICE("starting timer watchdog thread");

int id = 0;

while (m_run)
{
id++;
std::this_thread::sleep_for(std::chrono::seconds(1));

// we make local copies, since executing functions can be so fast that
Expand All @@ -94,11 +91,11 @@ void TimerWatchdog::threadFunction()
// executing, this negative span can be arbitrary long even hours,
// and that is fine, since we don't know when OA makes next
// function call

span = now - start; // this must be always non negative

SWSS_LOG_NOTICE(" new span = %ld", span);

if (span < 0)
SWSS_LOG_THROW("negative span 'now - start': %ld - %ld", now, start);

Expand Down Expand Up @@ -129,5 +126,5 @@ int64_t TimerWatchdog::getTimeSinceEpoch()
{
SWSS_LOG_ENTER();

return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now().time_since_epoch()).count();
}
33 changes: 33 additions & 0 deletions syncd/tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#include <arpa/inet.h>
#include <unistd.h>
#include <time.h>
#include <sys/time.h>
#include <stdlib.h>
#include <stdio.h>

extern "C" {
#include <sai.h>
Expand All @@ -10,6 +14,7 @@ extern "C" {
#include "MetadataLogger.h"
#include "sairedis.h"
#include "sairediscommon.h"
#include "TimerWatchdog.h"

#include "meta/sai_serialize.h"
#include "meta/OidRefCounter.h"
Expand Down Expand Up @@ -775,6 +780,32 @@ void test_bulk_route_create()
sleep(10000);
}

void test_watchdog_timer_clock_rollback()
{
SWSS_LOG_ENTER();

const int64_t WARN_TIMESPAN_USEC = 30 * 1000000;
const uint8_t ROLLBACK_TIME_SEC = 5;
const uint8_t LONG_RUNNING_API_TIME_SEC = 3;

// take note of current time
struct timeval currentTime;
gettimeofday(&currentTime, NULL);

// start watchdog timer
TimerWatchdog twd(WARN_TIMESPAN_USEC);
twd.setStartTime();

// roll back time by ROLLBACK_TIME_SEC
currentTime.tv_sec -= ROLLBACK_TIME_SEC;
assert(settimeofday(&currentTime, NULL) == 0);

// Simulate long running API
sleep(LONG_RUNNING_API_TIME_SEC);

twd.setEndTime();
}

int main()
{
swss::Logger::getInstance().setMinPrio(swss::Logger::SWSS_DEBUG);
Expand All @@ -800,6 +831,8 @@ int main()
sai_api_uninitialize();

printf("\n[ %s ]\n\n", sai_serialize_status(SAI_STATUS_SUCCESS).c_str());

test_watchdog_timer_clock_rollback();
}
catch (const std::exception &e)
{
Expand Down

0 comments on commit 88a1982

Please sign in to comment.