Skip to content

Commit

Permalink
Add failure handling for SAI get operations (#1768)
Browse files Browse the repository at this point in the history
What I did
Add failure handling for SAI get operations. The function allows handling failures in SAI get operations according to the orch type, SAI type, SAI status.

Why I did it
Enable custom failure handling for SAI get operations.
  • Loading branch information
shi-su authored and Shi Su committed Aug 17, 2021
1 parent 47b4276 commit c9c1aa2
Show file tree
Hide file tree
Showing 9 changed files with 168 additions and 28 deletions.
6 changes: 5 additions & 1 deletion orchagent/aclorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2311,7 +2311,11 @@ void AclOrch::init(vector<TableConnector>& connectors, PortsOrch *portOrch, Mirr
else
{
SWSS_LOG_ERROR("Failed to get ACL entry priority min/max values, rv:%d", status);
throw "AclOrch initialization failure";
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
throw "AclOrch initialization failure";
}
}

queryAclActionCapability();
Expand Down
6 changes: 5 additions & 1 deletion orchagent/copporch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,11 @@ void CoppOrch::initDefaultTrapGroup()
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get default trap group, rv:%d", status);
throw "CoppOrch initialization failure";
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
throw "CoppOrch initialization failure";
}
}

SWSS_LOG_INFO("Get default trap group");
Expand Down
12 changes: 10 additions & 2 deletions orchagent/crmorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,11 @@ void CrmOrch::getResAvailableCounters()
break;
}
SWSS_LOG_ERROR("Failed to get switch attribute %u , rv:%d", attr.id, status);
break;
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
break;
}
}

res.second.countersMap[CRM_COUNTERS_TABLE_KEY].availableCounter = attr.value.u32;
Expand All @@ -500,7 +504,11 @@ void CrmOrch::getResAvailableCounters()
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get switch attribute %u , rv:%d", attr.id, status);
break;
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
break;
}
}

for (uint32_t i = 0; i < attr.value.aclresource.count; i++)
Expand Down
6 changes: 5 additions & 1 deletion orchagent/fdborch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,11 @@ bool FdbOrch::getPort(const MacAddress& mac, uint16_t vlan, Port& port)
{
SWSS_LOG_ERROR("Failed to get bridge port ID for FDB entry %s, rv:%d",
mac.to_string().c_str(), status);
return false;
task_process_status handle_status = handleSaiGetStatus(SAI_API_FDB, status);
if (handle_status != task_process_status::task_success)
{
return false;
}
}

if (!m_portsOrch->getPortByBridgePortId(attr.value.oid, port))
Expand Down
10 changes: 7 additions & 3 deletions orchagent/fgnhgorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,11 +294,15 @@ bool FgNhgOrch::createFineGrainedNextHopGroup(FGNextHopGroupEntry &syncd_fg_rout
{
SWSS_LOG_ERROR("Failed to query next hop group %s SAI_NEXT_HOP_GROUP_ATTR_REAL_SIZE, rv:%d",
nextHops.to_string().c_str(), status);
if (!removeFineGrainedNextHopGroup(&syncd_fg_route_entry))
task_process_status handle_status = handleSaiGetStatus(SAI_API_NEXT_HOP_GROUP, status);
if (handle_status != task_process_status::task_success)
{
SWSS_LOG_ERROR("Failed to clean-up after next hop group real_size query failure");
if (!removeFineGrainedNextHopGroup(&syncd_fg_route_entry))
{
SWSS_LOG_ERROR("Failed to clean-up after next hop group real_size query failure");
}
return false;
}
return false;
}
fgNhgEntry->real_bucket_size = nhg_attr.value.u32;
}
Expand Down
29 changes: 29 additions & 0 deletions orchagent/orch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,35 @@ task_process_status Orch::handleSaiRemoveStatus(sai_api_t api, sai_status_t stat
return task_need_retry;
}

task_process_status Orch::handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context)
{
/*
* This function aims to provide coarse handling of failures in sairedis get
* operation (i.e., notify users by throwing excepions when failures happen).
* Return value: task_success - Handled the status successfully. No need to retry this SAI operation.
* task_need_retry - Cannot handle the status. Need to retry the SAI operation.
* task_failed - Failed to handle the status but another attempt is unlikely to resolve the failure.
* TODO: 1. Add general handling logic for specific statuses
* 2. Develop fine-grain failure handling mechanisms and replace this coarse handling
* in each orch.
* 3. Take the type of sai api into consideration.
*/
switch (status)
{
case SAI_STATUS_SUCCESS:
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiGetStatus");
return task_success;
case SAI_STATUS_NOT_IMPLEMENTED:
SWSS_LOG_ERROR("Encountered failure in get operation due to the function is not implemented, exiting orchagent, SAI API: %s",
sai_serialize_api(api).c_str());
throw std::logic_error("SAI get function not implemented");
default:
SWSS_LOG_ERROR("Encountered failure in get operation, SAI API: %s, status: %s",
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
}
return task_failed;
}

bool Orch::parseHandleSaiStatusFailure(task_process_status status)
{
/*
Expand Down
1 change: 1 addition & 0 deletions orchagent/orch.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ class Orch
virtual task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
virtual task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
virtual task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
virtual task_process_status handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
bool parseHandleSaiStatusFailure(task_process_status status);
private:
void removeMeFromObjsReferencedByMe(type_map &type_maps, const std::string &table, const std::string &obj_name, const std::string &field, const std::string &old_referenced_obj_name);
Expand Down
102 changes: 86 additions & 16 deletions orchagent/portsorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,11 @@ PortsOrch::PortsOrch(DBConnector *db, vector<table_name_with_pri_t> &tableNames)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get CPU port, rv:%d", status);
throw runtime_error("PortsOrch initialization failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("PortsOrch initialization failure");
}
}

m_cpuPort = Port("CPU", Port::CPU);
Expand All @@ -343,7 +347,11 @@ PortsOrch::PortsOrch(DBConnector *db, vector<table_name_with_pri_t> &tableNames)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get port number, rv:%d", status);
throw runtime_error("PortsOrch initialization failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("PortsOrch initialization failure");
}
}

m_portCount = attr.value.u32;
Expand All @@ -361,7 +369,11 @@ PortsOrch::PortsOrch(DBConnector *db, vector<table_name_with_pri_t> &tableNames)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get port list, rv:%d", status);
throw runtime_error("PortsOrch initialization failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("PortsOrch initialization failure");
}
}

/* Get port hardware lane info */
Expand All @@ -376,7 +388,11 @@ PortsOrch::PortsOrch(DBConnector *db, vector<table_name_with_pri_t> &tableNames)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get hardware lane list pid:%" PRIx64, port_list[i]);
throw runtime_error("PortsOrch initialization failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("PortsOrch initialization failure");
}
}

set<int> tmp_lane_set;
Expand Down Expand Up @@ -407,7 +423,11 @@ PortsOrch::PortsOrch(DBConnector *db, vector<table_name_with_pri_t> &tableNames)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get default 1Q bridge and/or default VLAN, rv:%d", status);
throw runtime_error("PortsOrch initialization failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("PortsOrch initialization failure");
}
}

m_default1QBridge = attrs[0].value.oid;
Expand Down Expand Up @@ -437,7 +457,11 @@ void PortsOrch::removeDefaultVlanMembers()
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get VLAN member list in default VLAN, rv:%d", status);
throw runtime_error("PortsOrch initialization failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_VLAN, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("PortsOrch initialization failure");
}
}

/* Remove VLAN members in default VLAN */
Expand Down Expand Up @@ -471,7 +495,11 @@ void PortsOrch::removeDefaultBridgePorts()
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get bridge port list in default 1Q bridge, rv:%d", status);
throw runtime_error("PortsOrch initialization failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_BRIDGE, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("PortsOrch initialization failure");
}
}

auto bridge_port_count = attr.value.objlist.count;
Expand All @@ -486,7 +514,11 @@ void PortsOrch::removeDefaultBridgePorts()
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get bridge port type, rv:%d", status);
throw runtime_error("PortsOrch initialization failure");
task_process_status handle_status = handleSaiGetStatus(SAI_API_BRIDGE, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("PortsOrch initialization failure");
}
}
if (attr.value.s32 == SAI_BRIDGE_PORT_TYPE_PORT)
{
Expand Down Expand Up @@ -880,7 +912,11 @@ bool PortsOrch::getPortAdminStatus(sai_object_id_t id, bool &up)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get admin status for port pid:%" PRIx64, id);
return false;
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
return false;
}
}

up = attr.value.booldata;
Expand Down Expand Up @@ -1805,9 +1841,19 @@ bool PortsOrch::getPortSpeed(sai_object_id_t id, sai_uint32_t &speed)
status = sai_port_api->get_port_attribute(id, 1, &attr);

if (status == SAI_STATUS_SUCCESS)
{
speed = attr.value.u32;
}
else
{
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
return false;
}
}

return status == SAI_STATUS_SUCCESS;
return true;
}

bool PortsOrch::setPortAdvSpeed(sai_object_id_t port_id, sai_uint32_t speed)
Expand Down Expand Up @@ -1847,7 +1893,11 @@ bool PortsOrch::getQueueTypeAndIndex(sai_object_id_t queue_id, string &type, uin
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get queue type and index for queue %" PRIu64 " rv:%d", queue_id, status);
return false;
task_process_status handle_status = handleSaiGetStatus(SAI_API_QUEUE, status);
if (handle_status != task_process_status::task_success)
{
return false;
}
}

switch (attr[0].value.s32)
Expand Down Expand Up @@ -3473,7 +3523,11 @@ void PortsOrch::initializeQueues(Port &port)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get number of queues for port %s rv:%d", port.m_alias.c_str(), status);
throw runtime_error("PortsOrch initialization failure.");
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("PortsOrch initialization failure.");
}
}
SWSS_LOG_INFO("Get %d queues for port %s", attr.value.u32, port.m_alias.c_str());

Expand All @@ -3493,7 +3547,11 @@ void PortsOrch::initializeQueues(Port &port)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get queue list for port %s rv:%d", port.m_alias.c_str(), status);
throw runtime_error("PortsOrch initialization failure.");
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("PortsOrch initialization failure.");
}
}

SWSS_LOG_INFO("Get queues for port %s", port.m_alias.c_str());
Expand All @@ -3509,7 +3567,11 @@ void PortsOrch::initializePriorityGroups(Port &port)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to get number of priority groups for port %s rv:%d", port.m_alias.c_str(), status);
throw runtime_error("PortsOrch initialization failure.");
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("PortsOrch initialization failure.");
}
}
SWSS_LOG_INFO("Get %d priority groups for port %s", attr.value.u32, port.m_alias.c_str());

Expand All @@ -3530,7 +3592,11 @@ void PortsOrch::initializePriorityGroups(Port &port)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Fail to get priority group list for port %s rv:%d", port.m_alias.c_str(), status);
throw runtime_error("PortsOrch initialization failure.");
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
throw runtime_error("PortsOrch initialization failure.");
}
}
SWSS_LOG_INFO("Get priority groups for port %s", port.m_alias.c_str());
}
Expand Down Expand Up @@ -4844,7 +4910,11 @@ bool PortsOrch::setPortSerdesAttribute(sai_object_id_t port_id,
{
SWSS_LOG_ERROR("Failed to get port attr serdes id %d to port pid:0x%" PRIx64,
port_attr.id, port_id);
return false;
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
if (handle_status != task_process_status::task_success)
{
return false;
}
}

if (port_attr.value.oid != SAI_NULL_OBJECT_ID)
Expand Down
24 changes: 20 additions & 4 deletions orchagent/qosorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -933,7 +933,11 @@ sai_object_id_t QosOrch::getSchedulerGroup(const Port &port, const sai_object_id
if (SAI_STATUS_SUCCESS != sai_status)
{
SWSS_LOG_ERROR("Failed to get number of scheduler groups for port:%s", port.m_alias.c_str());
return SAI_NULL_OBJECT_ID;
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, sai_status);
if (handle_status != task_process_status::task_success)
{
return SAI_NULL_OBJECT_ID;
}
}

/* Get total groups list on the port */
Expand All @@ -947,7 +951,11 @@ sai_object_id_t QosOrch::getSchedulerGroup(const Port &port, const sai_object_id
if (SAI_STATUS_SUCCESS != sai_status)
{
SWSS_LOG_ERROR("Failed to get scheduler group list for port:%s", port.m_alias.c_str());
return SAI_NULL_OBJECT_ID;
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, sai_status);
if (handle_status != task_process_status::task_success)
{
return SAI_NULL_OBJECT_ID;
}
}

m_scheduler_group_port_info[port.m_port_id] = {
Expand All @@ -969,7 +977,11 @@ sai_object_id_t QosOrch::getSchedulerGroup(const Port &port, const sai_object_id
if (SAI_STATUS_SUCCESS != sai_status)
{
SWSS_LOG_ERROR("Failed to get child count for scheduler group:0x%" PRIx64 " of port:%s", group_id, port.m_alias.c_str());
return SAI_NULL_OBJECT_ID;
task_process_status handle_status = handleSaiGetStatus(SAI_API_SCHEDULER_GROUP, sai_status);
if (handle_status != task_process_status::task_success)
{
return SAI_NULL_OBJECT_ID;
}
}

uint32_t child_count = attr.value.u32;
Expand All @@ -988,7 +1000,11 @@ sai_object_id_t QosOrch::getSchedulerGroup(const Port &port, const sai_object_id
if (SAI_STATUS_SUCCESS != sai_status)
{
SWSS_LOG_ERROR("Failed to get child list for scheduler group:0x%" PRIx64 " of port:%s", group_id, port.m_alias.c_str());
return SAI_NULL_OBJECT_ID;
task_process_status handle_status = handleSaiGetStatus(SAI_API_SCHEDULER_GROUP, sai_status);
if (handle_status != task_process_status::task_success)
{
return SAI_NULL_OBJECT_ID;
}
}

m_scheduler_group_port_info[port.m_port_id].child_groups[ii] = std::move(child_groups);
Expand Down

0 comments on commit c9c1aa2

Please sign in to comment.