Skip to content

Commit

Permalink
[sai_failure_dump]Invoking dump during SAI failure (sonic-net#2644)
Browse files Browse the repository at this point in the history
* [sai_failure_dump]Invoking dump during SAI failure
  • Loading branch information
dgsudharsan authored Feb 8, 2023
1 parent 065a471 commit 44ea6a0
Show file tree
Hide file tree
Showing 7 changed files with 208 additions and 14 deletions.
10 changes: 5 additions & 5 deletions orchagent/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ void syncd_apply_view()
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to notify syncd APPLY_VIEW %d", status);
exit(EXIT_FAILURE);
handleSaiFailure(true);
}
}

Expand Down Expand Up @@ -619,7 +619,7 @@ int main(int argc, char **argv)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to create a switch, rv:%d", status);
exit(EXIT_FAILURE);
handleSaiFailure(true);
}
SWSS_LOG_NOTICE("Create a switch, id:%" PRIu64, gSwitchId);

Expand Down Expand Up @@ -650,7 +650,7 @@ int main(int argc, char **argv)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get MAC address from switch, rv:%d", status);
exit(EXIT_FAILURE);
handleSaiFailure(true);
}
else
{
Expand All @@ -665,7 +665,7 @@ int main(int argc, char **argv)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Fail to get switch virtual router ID %d", status);
exit(EXIT_FAILURE);
handleSaiFailure(true);
}

gVirtualRouterId = attr.value.oid;
Expand Down Expand Up @@ -707,7 +707,7 @@ int main(int argc, char **argv)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to create underlay router interface %d", status);
exit(EXIT_FAILURE);
handleSaiFailure(true);
}

SWSS_LOG_NOTICE("Created underlay router interface ID %" PRIx64, gUnderlayIfId);
Expand Down
2 changes: 1 addition & 1 deletion orchagent/orchdaemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,7 @@ void OrchDaemon::flush()
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to flush redis pipeline %d", status);
abort();
handleSaiFailure(true);
}
}

Expand Down
44 changes: 37 additions & 7 deletions orchagent/saihelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,8 @@ task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, vo
default:
SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
abort();
handleSaiFailure(true);
break;
}
break;
case SAI_API_HOSTIF:
Expand All @@ -514,8 +515,10 @@ task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, vo
default:
SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
abort();
handleSaiFailure(true);
break;
}
break;
default:
switch (status)
{
Expand All @@ -525,7 +528,8 @@ task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, vo
default:
SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
abort();
handleSaiFailure(true);
break;
}
}
return task_need_retry;
Expand Down Expand Up @@ -566,8 +570,10 @@ task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void
default:
SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
abort();
handleSaiFailure(true);
break;
}
break;
case SAI_API_TUNNEL:
switch (status)
{
Expand All @@ -578,12 +584,15 @@ task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void
default:
SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
abort();
handleSaiFailure(true);
break;
}
break;
default:
SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
abort();
handleSaiFailure(true);
break;
}

return task_need_retry;
Expand Down Expand Up @@ -611,7 +620,8 @@ task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, vo
default:
SWSS_LOG_ERROR("Encountered failure in remove operation, exiting orchagent, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
abort();
handleSaiFailure(true);
break;
}
return task_need_retry;
}
Expand Down Expand Up @@ -663,3 +673,23 @@ bool parseHandleSaiStatusFailure(task_process_status status)
}
return true;
}

/* Handling SAI failure. Request redis to invoke SAI failure dump and abort if set*/
void handleSaiFailure(bool abort_on_failure)
{
SWSS_LOG_ENTER();

sai_attribute_t attr;

attr.id = SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD;
attr.value.s32 = SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP;
sai_status_t status = sai_switch_api->set_switch_attribute(gSwitchId, &attr);
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to take sai failure dump %d", status);
}
if (abort_on_failure)
{
abort();
}
}
2 changes: 1 addition & 1 deletion orchagent/saihelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void
task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
task_process_status handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
bool parseHandleSaiStatusFailure(task_process_status status);

void handleSaiFailure(bool abort_on_failure);
1 change: 1 addition & 0 deletions tests/mock_tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ tests_SOURCES = aclorch_ut.cpp \
flowcounterrouteorch_ut.cpp \
orchdaemon_ut.cpp \
warmrestartassist_ut.cpp \
test_failure_handling.cpp \
$(top_srcdir)/lib/gearboxutils.cpp \
$(top_srcdir)/lib/subintf.cpp \
$(top_srcdir)/orchagent/orchdaemon.cpp \
Expand Down
81 changes: 81 additions & 0 deletions tests/mock_tests/portsorch_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "notifier.h"
#define private public
#include "pfcactionhandler.h"
#include <sys/mman.h>
#undef private

#include <sstream>
Expand All @@ -21,6 +22,8 @@ namespace portsorch_test

sai_port_api_t ut_sai_port_api;
sai_port_api_t *pold_sai_port_api;
sai_switch_api_t ut_sai_switch_api;
sai_switch_api_t *pold_sai_switch_api;

bool not_support_fetching_fec;
vector<sai_port_fec_mode_t> mock_port_fec_modes = {SAI_PORT_FEC_MODE_RS, SAI_PORT_FEC_MODE_FC};
Expand Down Expand Up @@ -66,9 +69,28 @@ namespace portsorch_test
_sai_set_port_fec_count++;
_sai_port_fec_mode = attr[0].value.s32;
}
else if (attr[0].id == SAI_PORT_ATTR_AUTO_NEG_MODE)
{
/* Simulating failure case */
return SAI_STATUS_FAILURE;
}
return pold_sai_port_api->set_port_attribute(port_id, attr);
}

uint32_t *_sai_syncd_notifications_count;
int32_t *_sai_syncd_notification_event;
sai_status_t _ut_stub_sai_set_switch_attribute(
_In_ sai_object_id_t switch_id,
_In_ const sai_attribute_t *attr)
{
if (attr[0].id == SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD)
{
*_sai_syncd_notifications_count =+ 1;
*_sai_syncd_notification_event = attr[0].value.s32;
}
return pold_sai_switch_api->set_switch_attribute(switch_id, attr);
}

void _hook_sai_port_api()
{
ut_sai_port_api = *sai_port_api;
Expand All @@ -83,6 +105,19 @@ namespace portsorch_test
sai_port_api = pold_sai_port_api;
}

void _hook_sai_switch_api()
{
ut_sai_switch_api = *sai_switch_api;
pold_sai_switch_api = sai_switch_api;
ut_sai_switch_api.set_switch_attribute = _ut_stub_sai_set_switch_attribute;
sai_switch_api = &ut_sai_switch_api;
}

void _unhook_sai_switch_api()
{
sai_switch_api = pold_sai_switch_api;
}

sai_queue_api_t ut_sai_queue_api;
sai_queue_api_t *pold_sai_queue_api;
int _sai_set_queue_attr_count = 0;
Expand Down Expand Up @@ -473,6 +508,52 @@ namespace portsorch_test
_unhook_sai_port_api();
}

TEST_F(PortsOrchTest, PortTestSAIFailureHandling)
{
_hook_sai_port_api();
_hook_sai_switch_api();
Table portTable = Table(m_app_db.get(), APP_PORT_TABLE_NAME);
std::deque<KeyOpFieldsValuesTuple> entries;

not_support_fetching_fec = false;
// Get SAI default ports to populate DB
auto ports = ut_helper::getInitialSaiPorts();

for (const auto &it : ports)
{
portTable.set(it.first, it.second);
}

// Set PortConfigDone
portTable.set("PortConfigDone", { { "count", to_string(ports.size()) } });

// refill consumer
gPortsOrch->addExistingData(&portTable);

// Apply configuration :
// create ports
static_cast<Orch *>(gPortsOrch)->doTask();

_sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
_sai_syncd_notification_event = (int32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
*_sai_syncd_notifications_count = 0;

entries.push_back({"Ethernet0", "SET",
{
{"autoneg", "on"}
}});
auto consumer = dynamic_cast<Consumer *>(gPortsOrch->getExecutor(APP_PORT_TABLE_NAME));
consumer->addToSync(entries);
ASSERT_DEATH({static_cast<Orch *>(gPortsOrch)->doTask();}, "");

ASSERT_EQ(*_sai_syncd_notifications_count, 1);
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);
_unhook_sai_port_api();
_unhook_sai_switch_api();
}

TEST_F(PortsOrchTest, PortReadinessColdBoot)
{
Table portTable = Table(m_app_db.get(), APP_PORT_TABLE_NAME);
Expand Down
82 changes: 82 additions & 0 deletions tests/mock_tests/test_failure_handling.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#include "saihelper.h"
#include "ut_helper.h"
#include <sys/mman.h>

extern sai_switch_api_t *sai_switch_api;

namespace saifailure_test
{
struct SaiFailureTest : public ::testing::Test
{
};
uint32_t *_sai_syncd_notifications_count;
int32_t *_sai_syncd_notification_event;
sai_switch_api_t *pold_sai_switch_api;
sai_switch_api_t ut_sai_switch_api;

sai_status_t _ut_stub_sai_set_switch_attribute(
_In_ sai_object_id_t switch_id,
_In_ const sai_attribute_t *attr)
{
if (attr[0].id == SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD)
{
*_sai_syncd_notifications_count = *_sai_syncd_notifications_count + 1;
*_sai_syncd_notification_event = attr[0].value.s32;
}
return pold_sai_switch_api->set_switch_attribute(switch_id, attr);
}

void _hook_sai_switch_api()
{
ut_sai_switch_api = *sai_switch_api;
pold_sai_switch_api = sai_switch_api;
ut_sai_switch_api.set_switch_attribute = _ut_stub_sai_set_switch_attribute;
sai_switch_api = &ut_sai_switch_api;
}

void _unhook_sai_switch_api()
{
sai_switch_api = pold_sai_switch_api;
}

TEST_F(SaiFailureTest, handleSaiFailure)
{
_hook_sai_switch_api();
_sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
_sai_syncd_notification_event = (int32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
*_sai_syncd_notifications_count = 0;
uint32_t notif_count = *_sai_syncd_notifications_count;

ASSERT_DEATH({handleSaiCreateStatus(SAI_API_FDB, SAI_STATUS_FAILURE);}, "");
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);

ASSERT_DEATH({handleSaiCreateStatus(SAI_API_HOSTIF, SAI_STATUS_INVALID_PARAMETER);}, "");
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);

ASSERT_DEATH({handleSaiCreateStatus(SAI_API_PORT, SAI_STATUS_FAILURE);}, "");
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);

ASSERT_DEATH({handleSaiSetStatus(SAI_API_HOSTIF, SAI_STATUS_FAILURE);}, "");
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);

ASSERT_DEATH({handleSaiSetStatus(SAI_API_PORT, SAI_STATUS_FAILURE);}, "");
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);

ASSERT_DEATH({handleSaiSetStatus(SAI_API_TUNNEL, SAI_STATUS_FAILURE);}, "");
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);

ASSERT_DEATH({handleSaiRemoveStatus(SAI_API_LAG, SAI_STATUS_FAILURE);}, "");
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);

_unhook_sai_switch_api();
}
}

0 comments on commit 44ea6a0

Please sign in to comment.