Skip to content

Commit

Permalink
Fix issue: OA isn't aborted on receiving shutdown after fatal event
Browse files Browse the repository at this point in the history
Signed-off-by: Stephen Sun <stephens@nvidia.com>
  • Loading branch information
stephenxs committed Mar 14, 2024
1 parent d18b7af commit 9291de7
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 0 deletions.
5 changes: 5 additions & 0 deletions orchagent/notifications.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ void on_switch_shutdown_request(sai_object_id_t switch_id)
/* TODO: Later a better restart story will be told here */
SWSS_LOG_ERROR("Syncd stopped");

if (gSwitchOrch->isFatalEventReceived())
{
abort();
}

/*
The quick_exit() is used instead of the exit() to avoid a following data race:
* the exit() calls the destructors for global static variables (e.g.BufferOrch::m_buffer_type_maps)
Expand Down
5 changes: 5 additions & 0 deletions orchagent/switchorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1121,6 +1121,11 @@ void SwitchOrch::onSwitchAsicSdkHealthEvent(sai_object_id_t switch_id,
m_asicSdkHealthEventTable->set(time_ss.str(),values);

event_publish(g_events_handle, "asic-sdk-health-event", &params);

if (severity == SAI_SWITCH_ASIC_SDK_HEALTH_SEVERITY_FATAL)
{
m_fatalEventCount++;
}
}

bool SwitchOrch::setAgingFDB(uint32_t sec)
Expand Down
7 changes: 7 additions & 0 deletions orchagent/switchorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ class SwitchOrch : public Orch
sai_switch_asic_sdk_health_category_t category,
sai_switch_health_data_t data,
const sai_u8_list_t &description);

inline bool isFatalEventReceived() const
{
return (m_fatalEventCount != 0);
}

private:
void doTask(Consumer &consumer);
void doTask(swss::SelectableTimer &timer);
Expand Down Expand Up @@ -120,6 +126,7 @@ class SwitchOrch : public Orch
std::set<sai_switch_attr_t> m_supportedAsicSdkHealthEventAttributes;
std::string m_eliminateEventsSha;
swss::SelectableTimer* m_eliminateEventsTimer = nullptr;
uint32_t m_fatalEventCount = 0;

void initAsicSdkHealthEventNotification();
sai_status_t registerAsicSdkHealthEventCategories(sai_switch_attr_t saiSeverity, const std::string &severityString, const std::string &suppressed_category_list="");
Expand Down

0 comments on commit 9291de7

Please sign in to comment.