Skip to content

Commit

Permalink
Address review comments
Browse files Browse the repository at this point in the history
Signed-off-by: Stephen Sun <stephens@nvidia.com>
  • Loading branch information
stephenxs committed Apr 18, 2024
1 parent 93acfe5 commit eedc5b6
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 59 deletions.
64 changes: 36 additions & 28 deletions orchagent/switchorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,11 @@ void SwitchOrch::initAsicSdkHealthEventNotification()
SWSS_LOG_NOTICE("ASIC/SDK health event is not supported");
}

DBConnector cfgDb("CONFIG_DB", 0);
Table cfgSuppressASHETable(&cfgDb, CFG_SUPPRESS_ASIC_SDK_HEALTH_EVENT_NAME);
string suppressedCategories;
bool atLeastOneSupported = false;

if (!supported)
{
fvVector.emplace_back(SWITCH_CAPABILITY_TABLE_ASIC_SDK_HEALTH_EVENT_CAPABLE, "false");
Expand All @@ -188,44 +193,43 @@ void SwitchOrch::initAsicSdkHealthEventNotification()
supported = querySwitchCapability(SAI_OBJECT_TYPE_SWITCH, get<0>(c));
if (supported)
{
status = registerAsicSdkHealthEventCategories(get<0>(c), get<2>(c));
supported = (status == SAI_STATUS_SUCCESS);
}
else
{
SWSS_LOG_NOTICE("Unsupport to register ASIC/SDK health categories for severity %s", get<2>(c).c_str());
}
cfgSuppressASHETable.hget(get<2>(c), "categories", suppressedCategories);
registerAsicSdkHealthEventCategories(get<0>(c), get<2>(c), suppressedCategories, true);
suppressedCategories.clear();

if (supported)
{
m_supportedAsicSdkHealthEventAttributes.insert(get<0>(c));
fvVector.emplace_back(get<1>(c), "true");
}
else
{
SWSS_LOG_NOTICE("Unsupport to register ASIC/SDK health categories for severity %s", get<2>(c).c_str());
fvVector.emplace_back(get<1>(c), "false");
}
atLeastOneSupported = atLeastOneSupported || supported;
}

set_switch_capability(fvVector);

try
{
// Load the Lua script to eliminate oldest entries
string eliminateEventsLuaScript = swss::loadLuaScript("eliminate_events.lua");
m_eliminateEventsSha = swss::loadRedisScript(m_stateDb.get(), eliminateEventsLuaScript);

// Init timer
auto interv = timespec { .tv_sec = ASIC_SDK_HEALTH_EVENT_ELIMINATE_INTERVAL, .tv_nsec = 0 };
m_eliminateEventsTimer = new SelectableTimer(interv);
auto executor = new ExecutableTimer(m_eliminateEventsTimer, this, "ASIC_SDK_HEALTH_EVENT_ELIMINATE_TIMER");
Orch::addExecutor(executor);
m_eliminateEventsTimer->start();
}
catch (...)
if (atLeastOneSupported)
{
// This can happen only on mock test. If it happens on a real switch, we should log an error message
SWSS_LOG_ERROR("Unable to load the Lua script to eliminate events\n");
try
{
// Load the Lua script to eliminate oldest entries
string eliminateEventsLuaScript = swss::loadLuaScript("eliminate_events.lua");
m_eliminateEventsSha = swss::loadRedisScript(m_stateDb.get(), eliminateEventsLuaScript);

// Init timer
auto interv = timespec { .tv_sec = ASIC_SDK_HEALTH_EVENT_ELIMINATE_INTERVAL, .tv_nsec = 0 };
m_eliminateEventsTimer = new SelectableTimer(interv);
auto executor = new ExecutableTimer(m_eliminateEventsTimer, this, "ASIC_SDK_HEALTH_EVENT_ELIMINATE_TIMER");
Orch::addExecutor(executor);
m_eliminateEventsTimer->start();
}
catch (...)
{
// This can happen only on mock test. If it happens on a real switch, we should log an error message
SWSS_LOG_ERROR("Unable to load the Lua script to eliminate events\n");
}
}
}

Expand Down Expand Up @@ -862,7 +866,7 @@ void SwitchOrch::doCfgSwitchHashTableTask(Consumer &consumer)
}
}

sai_status_t SwitchOrch::registerAsicSdkHealthEventCategories(sai_switch_attr_t saiSeverity, const string &severityString, const string &suppressed_category_list)
void SwitchOrch::registerAsicSdkHealthEventCategories(sai_switch_attr_t saiSeverity, const string &severityString, const string &suppressed_category_list, bool isInitializing)
{
sai_status_t status;
set<sai_switch_asic_sdk_health_category_t> interested_categories_set = switch_asic_sdk_health_event_category_universal_set;
Expand All @@ -886,6 +890,12 @@ sai_status_t SwitchOrch::registerAsicSdkHealthEventCategories(sai_switch_attr_t
}
}

if (isInitializing && interested_categories_set.empty())
{
SWSS_LOG_INFO("All categories are suppressed for severity %s", severityString.c_str());
return;
}

vector<int32_t> sai_categories(interested_categories_set.begin(), interested_categories_set.end());
sai_attribute_t attr;

Expand All @@ -898,8 +908,6 @@ sai_status_t SwitchOrch::registerAsicSdkHealthEventCategories(sai_switch_attr_t
{
SWSS_LOG_ERROR("Failed to register ASIC/SDK health event categories for severity %s, status: %s", severityString.c_str(), sai_serialize_status(status).c_str());
}

return status;
}

void SwitchOrch::doCfgSuppressAsicSdkHealthEventTableTask(Consumer &consumer)
Expand Down
2 changes: 1 addition & 1 deletion orchagent/switchorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ class SwitchOrch : public Orch
uint32_t m_fatalEventCount = 0;

void initAsicSdkHealthEventNotification();
sai_status_t registerAsicSdkHealthEventCategories(sai_switch_attr_t saiSeverity, const std::string &severityString, const std::string &suppressed_category_list="");
void registerAsicSdkHealthEventCategories(sai_switch_attr_t saiSeverity, const std::string &severityString, const std::string &suppressed_category_list="", bool isInitializing=false);

// Switch hash SAI defaults
struct {
Expand Down
96 changes: 66 additions & 30 deletions tests/mock_tests/switchorch_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ namespace switchorch_test
shared_ptr<swss::DBConnector> m_state_db;

sai_switch_attr_t _ut_stub_asic_sdk_health_event_attribute_to_check;
bool _ut_stub_asic_sdk_health_event_check_all;
uint32_t _ut_stub_asic_sdk_health_event_call_count;
map<sai_switch_attr_t, set<sai_switch_asic_sdk_health_category_t>> _ut_stub_asic_sdk_health_event_category_sets;
set<sai_switch_asic_sdk_health_category_t> _ut_stub_asic_sdk_health_event_passed_categories;

bool _ut_reg_event_unsupported;
Expand All @@ -47,10 +50,11 @@ namespace switchorch_test
case SAI_SWITCH_ATTR_REG_FATAL_SWITCH_ASIC_SDK_HEALTH_CATEGORY:
case SAI_SWITCH_ATTR_REG_WARNING_SWITCH_ASIC_SDK_HEALTH_CATEGORY:
case SAI_SWITCH_ATTR_REG_NOTICE_SWITCH_ASIC_SDK_HEALTH_CATEGORY:
if (_ut_stub_asic_sdk_health_event_attribute_to_check == attr[0].id)
if (_ut_stub_asic_sdk_health_event_check_all)
{
_ut_stub_asic_sdk_health_event_call_count++;
auto *passed_category_list = reinterpret_cast<sai_switch_asic_sdk_health_category_t*>(attr[0].value.s32list.list);
_ut_stub_asic_sdk_health_event_passed_categories = set<sai_switch_asic_sdk_health_category_t>(passed_category_list, passed_category_list + attr[0].value.s32list.count);
_ut_stub_asic_sdk_health_event_category_sets[(sai_switch_attr_t)attr[0].id] = set<sai_switch_asic_sdk_health_category_t>(passed_category_list, passed_category_list + attr[0].value.s32list.count);
}
return SAI_STATUS_SUCCESS;
default:
Expand Down Expand Up @@ -80,6 +84,11 @@ namespace switchorch_test

void SetUp() override
{
// Init switch and create dependencies
m_app_db = make_shared<swss::DBConnector>("APPL_DB", 0);
m_config_db = make_shared<swss::DBConnector>("CONFIG_DB", 0);
m_state_db = make_shared<swss::DBConnector>("STATE_DB", 0);

_ut_reg_event_unsupported = false;

map<string, string> profile = {
Expand All @@ -100,11 +109,6 @@ namespace switchorch_test

void initSwitchOrch()
{
// Init switch and create dependencies
m_app_db = make_shared<swss::DBConnector>("APPL_DB", 0);
m_config_db = make_shared<swss::DBConnector>("CONFIG_DB", 0);
m_state_db = make_shared<swss::DBConnector>("STATE_DB", 0);

TableConnector stateDbSwitchTable(m_state_db.get(), "SWITCH_CAPABILITY");
TableConnector conf_asic_sensors(m_config_db.get(), CFG_ASIC_SENSORS_TABLE_NAME);
TableConnector app_switch_table(m_app_db.get(), APP_SWITCH_TABLE_NAME);
Expand All @@ -122,6 +126,10 @@ namespace switchorch_test

void TearDown() override
{
::testing_db::reset();

_ut_stub_asic_sdk_health_event_category_sets.clear();

gDirectory.m_values.clear();

delete gSwitchOrch;
Expand All @@ -133,19 +141,40 @@ namespace switchorch_test

TEST_F(SwitchOrchTest, SwitchOrchTestSuppressCategories)
{
initSwitchOrch();
Table suppressAsicSdkHealthEventTable = Table(m_config_db.get(), CFG_SUPPRESS_ASIC_SDK_HEALTH_EVENT_NAME);

suppressAsicSdkHealthEventTable.set("fatal",
{
{"max_events", "1000"}
});
suppressAsicSdkHealthEventTable.set("warning",
{
{"categories", "software,firmware,cpu_hw,asic_hw"}
});

_ut_stub_asic_sdk_health_event_check_all = true;
auto call_count = _ut_stub_asic_sdk_health_event_call_count;

_hook_sai_apis();
initSwitchOrch();

ASSERT_TRUE(gSwitchOrch->m_eliminateEventsTimer != nullptr);

vector<string> ts;
std::deque<KeyOpFieldsValuesTuple> entries;
Table suppressAsicSdkHealthEventTable = Table(m_config_db.get(), CFG_SUPPRESS_ASIC_SDK_HEALTH_EVENT_NAME);
set<sai_switch_asic_sdk_health_category_t> all_categories({
SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_SW,
SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_FW,
SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_CPU_HW,
SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_ASIC_HW});
set<sai_switch_asic_sdk_health_category_t> empty_category;

call_count += 2;
ASSERT_EQ(_ut_stub_asic_sdk_health_event_call_count, call_count);
ASSERT_EQ(_ut_stub_asic_sdk_health_event_category_sets[SAI_SWITCH_ATTR_REG_FATAL_SWITCH_ASIC_SDK_HEALTH_CATEGORY], all_categories);
ASSERT_EQ(_ut_stub_asic_sdk_health_event_category_sets[SAI_SWITCH_ATTR_REG_WARNING_SWITCH_ASIC_SDK_HEALTH_CATEGORY], empty_category);
ASSERT_EQ(_ut_stub_asic_sdk_health_event_category_sets[SAI_SWITCH_ATTR_REG_NOTICE_SWITCH_ASIC_SDK_HEALTH_CATEGORY], all_categories);

// case: severity: fatal, operation: suppress all categories
entries.push_back({"fatal", "SET",
{
Expand All @@ -154,9 +183,10 @@ namespace switchorch_test
auto consumer = dynamic_cast<Consumer *>(gSwitchOrch->getExecutor(CFG_SUPPRESS_ASIC_SDK_HEALTH_EVENT_NAME));
consumer->addToSync(entries);
entries.clear();
_ut_stub_asic_sdk_health_event_attribute_to_check = SAI_SWITCH_ATTR_REG_FATAL_SWITCH_ASIC_SDK_HEALTH_CATEGORY;
static_cast<Orch *>(gSwitchOrch)->doTask();
ASSERT_EQ(_ut_stub_asic_sdk_health_event_passed_categories, empty_category);
ASSERT_EQ(_ut_stub_asic_sdk_health_event_category_sets[SAI_SWITCH_ATTR_REG_FATAL_SWITCH_ASIC_SDK_HEALTH_CATEGORY], empty_category);
call_count++;
ASSERT_EQ(_ut_stub_asic_sdk_health_event_call_count, call_count);

// case: severity: warning, operation: suppress partial categories
entries.push_back({"warning", "SET",
Expand All @@ -165,11 +195,13 @@ namespace switchorch_test
}});
consumer->addToSync(entries);
entries.clear();
_ut_stub_asic_sdk_health_event_attribute_to_check = SAI_SWITCH_ATTR_REG_WARNING_SWITCH_ASIC_SDK_HEALTH_CATEGORY;
static_cast<Orch *>(gSwitchOrch)->doTask();
ASSERT_EQ(_ut_stub_asic_sdk_health_event_passed_categories, set<sai_switch_asic_sdk_health_category_t>({
SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_FW,
SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_ASIC_HW}));
ASSERT_EQ(_ut_stub_asic_sdk_health_event_category_sets[SAI_SWITCH_ATTR_REG_WARNING_SWITCH_ASIC_SDK_HEALTH_CATEGORY],
set<sai_switch_asic_sdk_health_category_t>({
SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_FW,
SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_ASIC_HW}));
call_count++;
ASSERT_EQ(_ut_stub_asic_sdk_health_event_call_count, call_count);

// case: invalid severity, nothing changed (to satisfy coverate)
entries.push_back({"warninga", "SET",
Expand All @@ -178,11 +210,9 @@ namespace switchorch_test
}});
consumer->addToSync(entries);
entries.clear();
_ut_stub_asic_sdk_health_event_attribute_to_check = SAI_SWITCH_ATTR_REG_WARNING_SWITCH_ASIC_SDK_HEALTH_CATEGORY;
static_cast<Orch *>(gSwitchOrch)->doTask();
ASSERT_EQ(_ut_stub_asic_sdk_health_event_passed_categories, set<sai_switch_asic_sdk_health_category_t>({
SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_FW,
SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_ASIC_HW}));
// No SAI API called
ASSERT_EQ(_ut_stub_asic_sdk_health_event_call_count, call_count);

// case: severity: warning, operation: set max_events only, which means to remove suppress list
entries.push_back({"warning", "SET",
Expand All @@ -191,21 +221,19 @@ namespace switchorch_test
}});
consumer->addToSync(entries);
entries.clear();
_ut_stub_asic_sdk_health_event_attribute_to_check = SAI_SWITCH_ATTR_REG_WARNING_SWITCH_ASIC_SDK_HEALTH_CATEGORY;
static_cast<Orch *>(gSwitchOrch)->doTask();
ASSERT_EQ(_ut_stub_asic_sdk_health_event_passed_categories, set<sai_switch_asic_sdk_health_category_t>({
SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_SW,
SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_FW,
SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_CPU_HW,
SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_ASIC_HW}));
call_count++;
ASSERT_EQ(_ut_stub_asic_sdk_health_event_call_count, call_count);
ASSERT_EQ(_ut_stub_asic_sdk_health_event_category_sets[SAI_SWITCH_ATTR_REG_WARNING_SWITCH_ASIC_SDK_HEALTH_CATEGORY], all_categories);

// case: severity: notice, operation: suppress no category
entries.push_back({"notice", "DEL", {}});
consumer->addToSync(entries);
entries.clear();
_ut_stub_asic_sdk_health_event_attribute_to_check = SAI_SWITCH_ATTR_REG_NOTICE_SWITCH_ASIC_SDK_HEALTH_CATEGORY;
static_cast<Orch *>(gSwitchOrch)->doTask();
ASSERT_EQ(_ut_stub_asic_sdk_health_event_passed_categories, all_categories);
ASSERT_EQ(_ut_stub_asic_sdk_health_event_category_sets[SAI_SWITCH_ATTR_REG_NOTICE_SWITCH_ASIC_SDK_HEALTH_CATEGORY], all_categories);
call_count++;
ASSERT_EQ(_ut_stub_asic_sdk_health_event_call_count, call_count);

_unhook_sai_apis();
}
Expand All @@ -214,6 +242,8 @@ namespace switchorch_test
{
initSwitchOrch();

ASSERT_TRUE(gSwitchOrch->m_eliminateEventsTimer != nullptr);

string value;
gSwitchOrch->m_switchTable.hget("switch", SWITCH_CAPABILITY_TABLE_ASIC_SDK_HEALTH_EVENT_CAPABLE, value);
ASSERT_EQ(value, "true");
Expand All @@ -228,9 +258,13 @@ namespace switchorch_test
TEST_F(SwitchOrchTest, SwitchOrchTestCheckCapabilityUnsupported)
{
_ut_reg_event_unsupported = true;
_ut_stub_asic_sdk_health_event_check_all = true;

_hook_sai_apis();
initSwitchOrch();

ASSERT_EQ(gSwitchOrch->m_eliminateEventsTimer, nullptr);

string value;
gSwitchOrch->m_switchTable.hget("switch", SWITCH_CAPABILITY_TABLE_ASIC_SDK_HEALTH_EVENT_CAPABLE, value);
ASSERT_EQ(value, "false");
Expand All @@ -253,16 +287,18 @@ namespace switchorch_test
auto consumer = dynamic_cast<Consumer *>(gSwitchOrch->getExecutor(CFG_SUPPRESS_ASIC_SDK_HEALTH_EVENT_NAME));
consumer->addToSync(entries);
entries.clear();
_ut_stub_asic_sdk_health_event_attribute_to_check = SAI_SWITCH_ATTR_REG_FATAL_SWITCH_ASIC_SDK_HEALTH_CATEGORY;
_ut_stub_asic_sdk_health_event_passed_categories = empty_category;
auto call_count = _ut_stub_asic_sdk_health_event_call_count;
static_cast<Orch *>(gSwitchOrch)->doTask();
ASSERT_EQ(_ut_stub_asic_sdk_health_event_passed_categories, empty_category);
ASSERT_EQ(_ut_stub_asic_sdk_health_event_category_sets[SAI_SWITCH_ATTR_REG_FATAL_SWITCH_ASIC_SDK_HEALTH_CATEGORY], empty_category);
ASSERT_EQ(_ut_stub_asic_sdk_health_event_call_count, call_count);
}

TEST_F(SwitchOrchTest, SwitchOrchTestHandleEvent)
{
initSwitchOrch();

ASSERT_TRUE(gSwitchOrch->m_eliminateEventsTimer != nullptr);

sai_timespec_t timestamp = {.tv_sec = 1701160447, .tv_nsec = 538710245};
sai_switch_health_data_t data = {.data_type = SAI_HEALTH_DATA_TYPE_GENERAL};
vector<uint8_t> data_from_sai({100, 101, 115, 99, 114, 105, 112, 116, 105, 245, 111, 110, 245, 10, 123, 125, 100, 100});
Expand Down

0 comments on commit eedc5b6

Please sign in to comment.