From 1f9baf4f056c54423be1415d480750c5334a1e7a Mon Sep 17 00:00:00 2001 From: Kamil Cudnik Date: Tue, 3 May 2016 12:00:19 -0700 Subject: [PATCH] Add warm restart support (#13) * Add warm restart support * Prevent hard reinit when warm start --- syncd/Makefile.am | 9 ++- syncd/profile.ini | 2 + syncd/syncd.cpp | 129 +++++++++++++++++++++++++++---- syncd/syncd.h | 5 +- syncd/syncd_counters.cpp | 14 +++- syncd/syncd_reinit.cpp | 10 ++- syncd/syncd_request_shutdown.cpp | 79 +++++++++++++++++++ 7 files changed, 226 insertions(+), 22 deletions(-) create mode 100644 syncd/profile.ini create mode 100644 syncd/syncd_request_shutdown.cpp diff --git a/syncd/Makefile.am b/syncd/Makefile.am index 676d3267a6..51b0bd7133 100644 --- a/syncd/Makefile.am +++ b/syncd/Makefile.am @@ -1,7 +1,7 @@ AM_CPPFLAGS = AM_CPPFLAGS += -I$(top_srcdir)/common -bin_PROGRAMS = syncd +bin_PROGRAMS = syncd syncd_request_shutdown if DEBUG DBGFLAGS = -ggdb -DDEBUG @@ -33,3 +33,10 @@ syncd_CPPFLAGS = $(DBGFLAGS) $(AM_CPPFLAGS) $(CFLAGS_COMMON) \ -I/usr/include/sai $(SAIFLAGS) syncd_LDADD = -lhiredis -lswsscommon -lsai -lpthread + +syncd_request_shutdown_SOURCES = syncd_request_shutdown.cpp + +syncd_request_shutdown_CPPFLAGS = $(DBGFLAGS) $(AM_CPPFLAGS) $(CFLAGS_COMMON) + +syncd_request_shutdown_LDADD = -lhiredis -lswsscommon -lpthread + diff --git a/syncd/profile.ini b/syncd/profile.ini new file mode 100644 index 0000000000..b51f4c59b4 --- /dev/null +++ b/syncd/profile.ini @@ -0,0 +1,2 @@ +SAI_WARM_BOOT_READ_FILE=/var/cache/sai_warmboot.bin +SAI_WARM_BOOT_WRITE_FILE=/var/cache/sai_warmboot.bin diff --git a/syncd/syncd.cpp b/syncd/syncd.cpp index 72a6be3f28..7231960fb6 100644 --- a/syncd/syncd.cpp +++ b/syncd/syncd.cpp @@ -1,8 +1,5 @@ -#include #include "syncd.h" -#include - std::mutex g_mutex; swss::RedisClient *g_redisClient = NULL; @@ -858,7 +855,7 @@ void sendResponse(sai_status_t status) std::vector entry; - SWSS_LOG_INFO("sending response: %s", strStatus.c_str()); + SWSS_LOG_NOTICE("sending response: %s", strStatus.c_str()); notifySyncdResponse->send(strStatus, "", entry); } @@ -904,6 +901,7 @@ struct cmdOptions { int countersThreadIntervalInSeconds; bool diagShell; + bool warmStart; bool disableCountersThread; std::string profileMapFile; }; @@ -924,6 +922,7 @@ cmdOptions handleCmdLine(int argc, char **argv) { { "diag", no_argument, 0, 'd' }, { "nocounters", no_argument, 0, 'N' }, + { "warmStart", no_argument, 0, 'w' }, { "profile", required_argument, 0, 'p' }, { "countersInterval", required_argument, 0, 'i' }, { 0, 0, 0, 0 } @@ -931,7 +930,7 @@ cmdOptions handleCmdLine(int argc, char **argv) int option_index = 0; - int c = getopt_long(argc, argv, "dNp:i:", long_options, &option_index); + int c = getopt_long(argc, argv, "dNwp:i:", long_options, &option_index); if (c == -1) break; @@ -939,24 +938,43 @@ cmdOptions handleCmdLine(int argc, char **argv) switch (c) { case 'N': - SWSS_LOG_INFO("disable counters thread"); + SWSS_LOG_NOTICE("disable counters thread"); options.disableCountersThread = true; break; case 'd': - SWSS_LOG_INFO("enable diag shell"); + SWSS_LOG_NOTICE("enable diag shell"); options.diagShell = true; break; case 'p': - SWSS_LOG_INFO("profile map file: %s", optarg); + SWSS_LOG_NOTICE("profile map file: %s", optarg); options.profileMapFile = std::string(optarg); break; case 'i': - SWSS_LOG_INFO("counters thread interval: %s", optarg); - options.countersThreadIntervalInSeconds = - std::max(defaultCountersThreadIntervalInSeconds, std::stoi(std::string(optarg))); + { + SWSS_LOG_NOTICE("counters thread interval: %s", optarg); + + int interval = std::stoi(std::string(optarg)); + + if (interval == 0) + { + // use zero interval to disable counters thread + options.disableCountersThread = true; + } + else + { + options.countersThreadIntervalInSeconds = + std::max(defaultCountersThreadIntervalInSeconds, interval); + } + + break; + } + + case 'w': + SWSS_LOG_NOTICE("warm start request"); + options.warmStart = true; break; case '?': @@ -992,6 +1010,9 @@ void handleProfileMap(const std::string& profileMapFile) while(getline(profile, line)) { + if (line.size() > 0 && (line[0] == '#' || line[0] == ';')) + continue; + size_t pos = line.find("="); if (pos == std::string::npos) @@ -1009,6 +1030,34 @@ void handleProfileMap(const std::string& profileMapFile) } } +bool handleRestartQuery(swss::NotificationConsumer &restartQuery) +{ + SWSS_LOG_ENTER(); + + std::string op; + std::string data; + std::vector values; + + restartQuery.pop(op, data, values); + + SWSS_LOG_DEBUG("op = %d", op.c_str()); + + if (op == "COLD") + { + SWSS_LOG_NOTICE("received COLD switch shutdown event"); + return false; + } + + if (op == "WARM") + { + SWSS_LOG_NOTICE("received WARM switch shutdown event"); + return true; + } + + SWSS_LOG_WARN("received '%s' unknown switch shutdown event, assuming COLD", op.c_str()); + return false; +} + int main(int argc, char **argv) { swss::Logger::getInstance().setMinPrio(swss::Logger::SWSS_DEBUG); @@ -1028,6 +1077,7 @@ int main(int argc, char **argv) swss::ConsumerTable *asicState = new swss::ConsumerTable(db, "ASIC_STATE"); swss::NotificationConsumer *notifySyncdQuery = new swss::NotificationConsumer(db, "NOTIFYSYNCDREQUERY"); + swss::NotificationConsumer *restartQuery = new swss::NotificationConsumer(db, "RESTARTQUERY"); // at the end we cant use producer consumer concept since // if one proces will restart there may be something in the queue @@ -1042,6 +1092,22 @@ int main(int argc, char **argv) gProfileMap[SAI_KEY_INIT_CONFIG_FILE] = mlnx_config_file; #endif /* MLNX_SAI */ + if (options.warmStart) + { + const char *warmBootReadFile = profile_get_value(0, SAI_KEY_WARM_BOOT_READ_FILE); + + SWSS_LOG_NOTICE("using warmBootReadFile: '%s'", warmBootReadFile); + + if (warmBootReadFile == NULL || access(warmBootReadFile, F_OK) == -1) + { + SWSS_LOG_WARN("user requested warmStart but warmBootReadFile is not specified or not accesible, forcing cold start"); + + options.warmStart = false; + } + } + + gProfileMap[SAI_KEY_WARM_BOOT] = options.warmStart ? "1" : "0"; + sai_api_initialize(0, (service_method_table_t*)&test_services); populate_sai_apis(); @@ -1060,7 +1126,7 @@ int main(int argc, char **argv) if (options.diagShell) { - SWSS_LOG_INFO("starting bcm diag shell thread"); + SWSS_LOG_NOTICE("starting bcm diag shell thread"); std::thread bcm_diag_shell_thread = std::thread(sai_diag_shell); bcm_diag_shell_thread.detach(); @@ -1068,26 +1134,29 @@ int main(int argc, char **argv) #endif /* BRCMSAI */ - SWSS_LOG_INFO("syncd started"); + SWSS_LOG_NOTICE("syncd started"); + + bool warmRestartHint = false; try { - onSyncdStart(); - - SWSS_LOG_INFO("syncd listening for events"); + onSyncdStart(options.warmStart); if (options.disableCountersThread == false) { - SWSS_LOG_INFO("starting counters thread"); + SWSS_LOG_NOTICE("starting counters thread"); startCountersThread(options.countersThreadIntervalInSeconds); } + SWSS_LOG_NOTICE("syncd listening for events"); + swss::Select s; s.addSelectable(getRequest); s.addSelectable(asicState); s.addSelectable(notifySyncdQuery); + s.addSelectable(restartQuery); while(true) { @@ -1097,6 +1166,12 @@ int main(int argc, char **argv) int result = s.select(&sel, &fd); + if (sel == restartQuery) + { + warmRestartHint = handleRestartQuery(*restartQuery); + break; + } + if (sel == notifySyncdQuery) { notifySyncd(*notifySyncdQuery); @@ -1112,9 +1187,29 @@ int main(int argc, char **argv) catch(const std::exception &e) { SWSS_LOG_ERROR("Runtime error: %s", e.what()); + + exit(EXIT_FAILURE); } endCountersThread(); + if (warmRestartHint) + { + const char *warmBootWriteFile = profile_get_value(0, SAI_KEY_WARM_BOOT_WRITE_FILE); + + SWSS_LOG_NOTICE("using warmBootWriteFile: '%s'", warmBootWriteFile); + + if (warmBootWriteFile == NULL) + { + SWSS_LOG_WARN("user requested warm shutdown but warmBootWriteFile is not specified, forcing cold shutdown"); + + warmRestartHint = false; + } + } + + sai_switch_api->shutdown_switch(warmRestartHint); + + SWSS_LOG_NOTICE("calling api uninitialize"); + sai_api_uninitialize(); } diff --git a/syncd/syncd.h b/syncd/syncd.h index 8b09fe94b5..8903856896 100644 --- a/syncd/syncd.h +++ b/syncd/syncd.h @@ -6,10 +6,12 @@ #include #include #include +#include #include #include #include +#include #include "string.h" extern "C" { @@ -24,6 +26,7 @@ extern "C" { #include "swss/consumertable.h" #include "swss/notificationconsumer.h" #include "swss/notificationproducer.h" +#include "swss/selectableevent.h" #include "swss/select.h" #include "swss/scheme.h" #include "swss/logger.h" @@ -44,7 +47,7 @@ extern "C" { extern std::mutex g_mutex; -void onSyncdStart(); +void onSyncdStart(bool warmStart); void hardReinit(); sai_object_id_t replaceVidToRid(const sai_object_id_t &virtual_object_id); diff --git a/syncd/syncd_counters.cpp b/syncd/syncd_counters.cpp index 5a77f222ce..a315e27d1e 100644 --- a/syncd/syncd_counters.cpp +++ b/syncd/syncd_counters.cpp @@ -1,4 +1,5 @@ #include "syncd.h" +#include void collectCounters(swss::Table &countersTable, const std::vector &supportedCounters) @@ -84,6 +85,9 @@ std::vector getSupportedCounters(sai_object_id_t portId static volatile bool g_runCountersThread = false; static std::shared_ptr g_countersThread = NULL; +static std::mutex mtx_sleep; +static std::condition_variable cv_sleep; + void collectCountersThread(int intervalInSeconds) { SWSS_LOG_ENTER(); @@ -103,8 +107,8 @@ void collectCountersThread(int intervalInSeconds) { collectCounters(countersTable, supportedCounters); - // collect counters every second - sleep(intervalInSeconds); + std::unique_lock lk(mtx_sleep); + cv_sleep.wait_for(lk, std::chrono::seconds(intervalInSeconds)); } } @@ -123,8 +127,14 @@ void endCountersThread() g_runCountersThread = false; + cv_sleep.notify_all(); + if (g_countersThread != NULL) { + SWSS_LOG_NOTICE("counters thread join"); + g_countersThread->join(); } + + SWSS_LOG_NOTICE("counters thread ended"); } diff --git a/syncd/syncd_reinit.cpp b/syncd/syncd_reinit.cpp index fae67d39a6..f83e1fb4ba 100644 --- a/syncd/syncd_reinit.cpp +++ b/syncd/syncd_reinit.cpp @@ -540,7 +540,7 @@ void helperCheckVlanId() g_redisClient->hset(strKey, "NULL", "NULL"); } -void onSyncdStart() +void onSyncdStart(bool warmStart) { SWSS_LOG_ENTER(); @@ -554,5 +554,13 @@ void onSyncdStart() helperCheckPortIds(); + if (warmStart) + { + SWSS_LOG_NOTICE("skipping hard reinit since WARM start was performed"); + return; + } + + SWSS_LOG_NOTICE("performing hard reinit since COLD start was performed"); + hardReinit(); } diff --git a/syncd/syncd_request_shutdown.cpp b/syncd/syncd_request_shutdown.cpp new file mode 100644 index 0000000000..ccbbe3b3fd --- /dev/null +++ b/syncd/syncd_request_shutdown.cpp @@ -0,0 +1,79 @@ +#include +#include + +#include +#include + +#include "swss/notificationproducer.h" +#include "swss/scheme.h" +#include "swss/logger.h" + +int main(int argc, char **argv) +{ + swss::Logger::getInstance().setMinPrio(swss::Logger::SWSS_DEBUG); + + SWSS_LOG_ENTER(); + + static struct option long_options[] = + { + { "cold", no_argument, 0, 'c' }, + { "warm", no_argument, 0, 'w' } + }; + + bool warmRestartHint = false; + bool optionSpecified = false; + + while(true) + { + int option_index = 0; + + int c = getopt_long(argc, argv, "cw", long_options, &option_index); + + if (c == -1) + break; + + switch (c) + { + case 'c': + warmRestartHint = false; + optionSpecified = true; + break; + + case 'w': + warmRestartHint = true; + optionSpecified = true; + break; + + default: + SWSS_LOG_ERROR("getopt failure"); + exit(EXIT_FAILURE); + } + } + + if (!optionSpecified) + { + SWSS_LOG_ERROR("no shutdown option specified"); + + std::cerr << "Shutdown option must be specified" << std::endl; + std::cerr << "---------------------------------" << std::endl; + std::cerr << " --warm -w for warm restart" << std::endl; + std::cerr << " --cold -c for cold restart" << std::endl; + + exit(EXIT_FAILURE); + } + + swss::DBConnector db(ASIC_DB, "localhost", 6379, 0); + swss::NotificationProducer restartQuery(&db, "RESTARTQUERY"); + + std::vector values; + + std::string op = warmRestartHint ? "WARM" : "COLD"; + + SWSS_LOG_NOTICE("requested %s shutdown", op.c_str()); + + std::cerr << "requested " << op << " shutdown" << std::endl; + + restartQuery.send(op, op, values); + + return EXIT_SUCCESS; +}