Skip to content

Commit

Permalink
[sycnd] add 2 stage shutdown support for warm reboot (sonic-net#391)
Browse files Browse the repository at this point in the history
* [syncd] add support for warm boot preshutdown operation

- issue warm pre-shutdown from syncd_request_shutdown
- update warm shutdown state transitions in state database
- stop notifications and counter polls before pre-shutdown

Signed-off-by: Ying Xie <ying.xie@microsoft.com>

* [syncd] log execution time of important operations

Signed-off-by: Ying Xie <ying.xie@microsoft.com>

* Replace do {} while (0) with {}

Signed-off-by: Ying Xie <ying.xie@microsoft.com>

* Expand operation 'PRE' to 'PRE-SHUTDOWN'

Signed-off-by: Ying Xie <ying.xie@microsoft.com>

* code style change

* [spell check test] allow word 'pre'

Signed-off-by: Ying Xie <ying.xie@microsoft.com>
  • Loading branch information
yxieca authored Nov 28, 2018
1 parent 06418bb commit 11fc262
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 10 deletions.
90 changes: 84 additions & 6 deletions syncd/syncd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <limits.h>

#include "swss/warm_restart.h"
#include "swss/table.h"

extern "C" {
#include <sai.h>
Expand Down Expand Up @@ -1935,7 +1936,12 @@ void on_switch_create_in_init_view(

sai_object_id_t switch_rid;

sai_status_t status = sai_metadata_sai_switch_api->create_switch(&switch_rid, attr_count, attr_list);
sai_status_t status;

{
SWSS_LOG_TIMER("cold boot: create switch");
status = sai_metadata_sai_switch_api->create_switch(&switch_rid, attr_count, attr_list);
}

if (status != SAI_STATUS_SUCCESS)
{
Expand Down Expand Up @@ -3170,6 +3176,8 @@ typedef enum _syncd_restart_type_t

SYNCD_RESTART_TYPE_FAST,

SYNCD_RESTART_TYPE_PRE_SHUTDOWN,

} syncd_restart_type_t;

syncd_restart_type_t handleRestartQuery(swss::NotificationConsumer &restartQuery)
Expand Down Expand Up @@ -3202,6 +3210,12 @@ syncd_restart_type_t handleRestartQuery(swss::NotificationConsumer &restartQuery
return SYNCD_RESTART_TYPE_FAST;
}

if (op == "PRE-SHUTDOWN")
{
SWSS_LOG_NOTICE("received PRE_SHUTDOWN switch event");
return SYNCD_RESTART_TYPE_PRE_SHUTDOWN;
}

SWSS_LOG_WARN("received '%s' unknown switch shutdown event, assuming COLD", op.c_str());
return SYNCD_RESTART_TYPE_COLD;
}
Expand Down Expand Up @@ -3452,6 +3466,8 @@ int syncd_main(int argc, char **argv)
std::shared_ptr<swss::DBConnector> dbAsic = std::make_shared<swss::DBConnector>(ASIC_DB, swss::DBConnector::DEFAULT_UNIXSOCKET, 0);
std::shared_ptr<swss::DBConnector> dbNtf = std::make_shared<swss::DBConnector>(ASIC_DB, swss::DBConnector::DEFAULT_UNIXSOCKET, 0);
std::shared_ptr<swss::DBConnector> dbFlexCounter = std::make_shared<swss::DBConnector>(FLEX_COUNTER_DB, swss::DBConnector::DEFAULT_UNIXSOCKET, 0);
std::shared_ptr<swss::DBConnector> dbState = std::make_shared<swss::DBConnector>(STATE_DB, swss::DBConnector::DEFAULT_UNIXSOCKET, 0);
std::unique_ptr<swss::Table> warmRestartTable = std::unique_ptr<swss::Table>(new swss::Table(dbState.get(), STATE_WARM_RESTART_TABLE_NAME));

g_redisClient = std::make_shared<swss::RedisClient>(dbAsic.get());

Expand Down Expand Up @@ -3539,6 +3555,9 @@ int syncd_main(int argc, char **argv)

syncd_restart_type_t shutdownType = SYNCD_RESTART_TYPE_COLD;

sai_switch_api_t *sai_switch_api = NULL;
sai_api_query(SAI_API_SWITCH, (void**)&sai_switch_api);

try
{
SWSS_LOG_NOTICE("before onSyncdStart");
Expand Down Expand Up @@ -3575,7 +3594,55 @@ int syncd_main(int argc, char **argv)
*/

shutdownType = handleRestartQuery(*restartQuery);
break;
if (shutdownType != SYNCD_RESTART_TYPE_PRE_SHUTDOWN)
{
// break out the event handling loop to shutdown syncd
break;
}

// Handle switch pre-shutdown and wait for the final shutdown
// event

SWSS_LOG_TIMER("warm pre-shutdown");

FlexCounter::removeAllCounters();
stopNotificationsProcessingThread();

sai_attribute_t attr;

attr.id = SAI_SWITCH_ATTR_RESTART_WARM;
attr.value.booldata = true;

status = sai_switch_api->set_switch_attribute(gSwitchId, &attr);

if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to set SAI_SWITCH_ATTR_RESTART_WARM=true: %s for pre-shutdown",
sai_serialize_status(status).c_str());
shutdownType = SYNCD_RESTART_TYPE_COLD;
warmRestartTable->hset("warm-shutdown", "state", "set-flag-failed");
continue;
}

attr.id = SAI_SWITCH_ATTR_PRE_SHUTDOWN;
attr.value.booldata = true;

status = sai_switch_api->set_switch_attribute(gSwitchId, &attr);
if (status == SAI_STATUS_SUCCESS)
{
warmRestartTable->hset("warm-shutdown", "state", "pre-shutdown-succeeded");
}
else
{
SWSS_LOG_ERROR("Failed to set SAI_SWITCH_ATTR_PRE_SHUTDOWN=true: %s",
sai_serialize_status(status).c_str());
warmRestartTable->hset("warm-shutdown", "state", "pre-shutdown-failed");

// Restore cold shutdown.
attr.id = SAI_SWITCH_ATTR_RESTART_WARM;
attr.value.booldata = false;
status = sai_switch_api->set_switch_attribute(gSwitchId, &attr);
}
}
else if (sel == flexCounter.get())
{
Expand All @@ -3598,9 +3665,6 @@ int syncd_main(int argc, char **argv)
exit_and_notify(EXIT_FAILURE);
}

sai_switch_api_t *sai_switch_api = NULL;
sai_api_query(SAI_API_SWITCH, (void**)&sai_switch_api);

if (shutdownType == SYNCD_RESTART_TYPE_WARM)
{
const char *warmBootWriteFile = profile_get_value(0, SAI_KEY_WARM_BOOT_WRITE_FILE);
Expand All @@ -3612,6 +3676,7 @@ int syncd_main(int argc, char **argv)
SWSS_LOG_WARN("user requested warm shutdown but warmBootWriteFile is not specified, forcing cold shutdown");

shutdownType = SYNCD_RESTART_TYPE_COLD;
warmRestartTable->hset("warm-shutdown", "state", "warm-shutdown-failed");
}
else
{
Expand All @@ -3629,6 +3694,7 @@ int syncd_main(int argc, char **argv)
SWSS_LOG_ERROR("Failed to set SAI_SWITCH_ATTR_RESTART_WARM=true: %s, fall back to cold restart",
sai_serialize_status(status).c_str());
shutdownType = SYNCD_RESTART_TYPE_COLD;
warmRestartTable->hset("warm-shutdown", "state", "set-flag-failed");
}
}
}
Expand Down Expand Up @@ -3662,13 +3728,25 @@ int syncd_main(int argc, char **argv)
// Stop notification thread before removing switch
stopNotificationsProcessingThread();

status = sai_switch_api->remove_switch(gSwitchId);
{
SWSS_LOG_TIMER("remove switch");
status = sai_switch_api->remove_switch(gSwitchId);
}

if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_NOTICE("Can't delete a switch. gSwitchId=0x%lx status=%s", gSwitchId,
sai_serialize_status(status).c_str());
}

if (shutdownType == SYNCD_RESTART_TYPE_WARM)
{
warmRestartTable->hset("warm-shutdown", "state",
(status == SAI_STATUS_SUCCESS) ?
"warm-shutdown-succeeded":
"warm-shutdown-failed");
}

SWSS_LOG_NOTICE("calling api uninitialize");

status = sai_api_uninitialize();
Expand Down
14 changes: 12 additions & 2 deletions syncd/syncd_hard_reinit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,12 @@ void processSwitches()
SWSS_LOG_NOTICE("creating switch VID: %s",
sai_serialize_object_id(switch_vid).c_str());

sai_status_t status = sai_metadata_sai_switch_api->create_switch(&switch_rid, attr_count, attr_list);
sai_status_t status;

{
SWSS_LOG_TIMER("Cold boot: create switch");
status = sai_metadata_sai_switch_api->create_switch(&switch_rid, attr_count, attr_list);
}

gSwitchId = switch_rid;
SWSS_LOG_NOTICE("Initialize gSwitchId with ID = 0x%lx", gSwitchId);
Expand Down Expand Up @@ -1286,7 +1291,12 @@ void performWarmRestart()
switch_attrs[i+1].value.ptr = (void *)1; // any non-null pointer
}
check_notifications_pointers((uint32_t)NELMS(switch_attrs), &switch_attrs[0]);
sai_status_t status = sai_metadata_sai_switch_api->create_switch(&switch_rid, (uint32_t)NELMS(switch_attrs), &switch_attrs[0]);
sai_status_t status;

{
SWSS_LOG_TIMER("Warm boot: create switch");
status = sai_metadata_sai_switch_api->create_switch(&switch_rid, (uint32_t)NELMS(switch_attrs), &switch_attrs[0]);
}

if (status != SAI_STATUS_SUCCESS)
{
Expand Down
12 changes: 10 additions & 2 deletions syncd/syncd_request_shutdown.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ int main(int argc, char **argv)
static struct option long_options[] =
{
{ "cold", no_argument, 0, 'c' },
{ "warm", no_argument, 0, 'w' }
{ "warm", no_argument, 0, 'w' },
{ "fast", no_argument, 0, 'f' },
{ "pre", no_argument, 0, 'p' }, // Requesting pre shutdown
};

std::string op;
Expand All @@ -26,7 +28,7 @@ int main(int argc, char **argv)
{
int option_index = 0;

int c = getopt_long(argc, argv, "cw", long_options, &option_index);
int c = getopt_long(argc, argv, "cwfp", long_options, &option_index);

if (c == -1)
break;
Expand All @@ -48,6 +50,11 @@ int main(int argc, char **argv)
optionSpecified = true;
break;

case 'p':
op = "PRE-SHUTDOWN";
optionSpecified = true;
break;

default:
SWSS_LOG_ERROR("getopt failure");
exit(EXIT_FAILURE);
Expand All @@ -61,6 +68,7 @@ int main(int argc, char **argv)
std::cerr << "Shutdown option must be specified" << std::endl;
std::cerr << "---------------------------------" << std::endl;
std::cerr << " --warm -w for warm restart" << std::endl;
std::cerr << " --pre -p for warm pre-shutdown" << std::endl;
std::cerr << " --cold -c for cold restart" << std::endl;
std::cerr << " --fast -f for fast restart" << std::endl;

Expand Down
1 change: 1 addition & 0 deletions tests/aspell.en.pws
Original file line number Diff line number Diff line change
Expand Up @@ -229,3 +229,4 @@ VXLAN
workaroung
xoff
xon
pre

0 comments on commit 11fc262

Please sign in to comment.