Skip to content

Commit

Permalink
Handler Port oper down error status notification (#3350)
Browse files Browse the repository at this point in the history
Handler Port oper down error status notification as per sai_port_error_status_t flag as part of existing port oper status notification sai_port_oper_status_notification_t

What I did
Whenever Orchagent gets the notification for port_state_change for link down event, OA will update the error count for the error flags that are set and the timestamp of the error port down event in new STATE_DB table PORT_OPERR_TABLE

Why I did it
To correlate various error events like MAC local and remote fault with the link oper down status notification.
  • Loading branch information
prgeor authored Nov 8, 2024
1 parent ea3e76f commit 956ebd6
Show file tree
Hide file tree
Showing 5 changed files with 326 additions and 5 deletions.
4 changes: 3 additions & 1 deletion orchagent/p4orch/tests/fake_portorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ extern "C"

PortsOrch::PortsOrch(DBConnector *db, DBConnector *stateDb, vector<table_name_with_pri_t> &tableNames,
DBConnector *chassisAppDb)
: Orch(db, tableNames), m_portStateTable(stateDb, STATE_PORT_TABLE_NAME),
: Orch(db, tableNames),
m_portStateTable(stateDb, STATE_PORT_TABLE_NAME),
m_portOpErrTable(stateDb, STATE_PORT_OPER_ERR_TABLE_NAME),
port_stat_manager(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ,
PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true),
port_buffer_drop_stat_manager(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP, StatsMode::READ,
Expand Down
44 changes: 43 additions & 1 deletion orchagent/port.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ extern "C" {
#include <vector>
#include <map>
#include <bitset>
#include <chrono>
#include <unordered_set>

#include <iomanip>
#include <sstream>
#include <macaddress.h>
#include <sairedis.h>

Expand Down Expand Up @@ -74,6 +76,42 @@ struct SystemLagInfo
int32_t spa_id = 0;
};

class PortOperErrorEvent
{
public:
PortOperErrorEvent() = default;
PortOperErrorEvent(const sai_port_error_status_t error, std::string key) : m_errorFlag(error), m_dbKeyError(key){}
~PortOperErrorEvent() = default;

inline void incrementErrorCount(void) { m_errorCount++; }

inline size_t getErrorCount(void) const { return m_errorCount; }

void recordEventTime(void) {
auto now = std::chrono::system_clock::now();
m_eventTime = std::chrono::system_clock::to_time_t(now);
}

std::string getEventTime(void) {
std::ostringstream oss;
oss << std::put_time(std::gmtime(&m_eventTime), "%Y-%m-%d %H:%M:%S");
return oss.str();
}

inline std::string getDbKey(void) const { return m_dbKeyError; }

// Returns true if port oper error flag in sai_port_error_status_t is set
bool isErrorSet(sai_port_error_status_t errstatus) const { return (m_errorFlag & errstatus);}

static const std::unordered_map<sai_port_error_status_t, std::string> db_key_errors;

private:
sai_port_error_status_t m_errorFlag = SAI_PORT_ERROR_STATUS_CLEAR;
size_t m_errorCount = 0;
std::string m_dbKeyError; // DB key for this port error
std::time_t m_eventTime = 0;
};

class Port
{
public:
Expand Down Expand Up @@ -155,6 +193,7 @@ class Port
sai_object_id_t m_parent_port_id = 0;
uint32_t m_dependency_bitmap = 0;
sai_port_oper_status_t m_oper_status = SAI_PORT_OPER_STATUS_UNKNOWN;
sai_port_error_status_t m_oper_error_status = SAI_PORT_ERROR_STATUS_CLEAR; //Bitmap of last port oper error status
std::set<std::string> m_members;
std::set<std::string> m_child_ports;
std::vector<sai_object_id_t> m_queue_ids;
Expand Down Expand Up @@ -193,6 +232,9 @@ class Port
sai_object_id_t m_system_side_id = 0;
sai_object_id_t m_line_side_id = 0;

/* Port oper error status to event map*/
std::unordered_map<sai_port_error_status_t, PortOperErrorEvent> m_portOperErrorToEvent;

/* pre-emphasis */
std::map<sai_port_serdes_attr_t, std::vector<uint32_t>> m_preemphasis;

Expand Down
122 changes: 119 additions & 3 deletions orchagent/portsorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,25 @@ static char* hostif_vlan_tag[] = {
[SAI_HOSTIF_VLAN_TAG_ORIGINAL] = "SAI_HOSTIF_VLAN_TAG_ORIGINAL"
};

const std::unordered_map<sai_port_error_status_t, std::string> PortOperErrorEvent::db_key_errors =
{
// SAI port oper error status to error name mapping
{ SAI_PORT_ERROR_STATUS_MAC_LOCAL_FAULT, "mac_local_fault"},
{ SAI_PORT_ERROR_STATUS_MAC_REMOTE_FAULT, "mac_remote_fault"},
{ SAI_PORT_ERROR_STATUS_FEC_SYNC_LOSS, "fec_sync_loss"},
{ SAI_PORT_ERROR_STATUS_FEC_LOSS_ALIGNMENT_MARKER, "fec_alignment_loss"},
{ SAI_PORT_ERROR_STATUS_HIGH_SER, "high_ser_error"},
{ SAI_PORT_ERROR_STATUS_HIGH_BER, "high ber_error"},
{ SAI_PORT_ERROR_STATUS_CRC_RATE, "crc_rate"},
{ SAI_PORT_ERROR_STATUS_DATA_UNIT_CRC_ERROR, "data_unit_crc_error"},
{ SAI_PORT_ERROR_STATUS_DATA_UNIT_SIZE, "data_unit_size"},
{ SAI_PORT_ERROR_STATUS_DATA_UNIT_MISALIGNMENT_ERROR, "data_unit_misalignment_error"},
{ SAI_PORT_ERROR_STATUS_CODE_GROUP_ERROR, "code_group_error"},
{ SAI_PORT_ERROR_STATUS_SIGNAL_LOCAL_ERROR, "signal_local_error"},
{ SAI_PORT_ERROR_STATUS_NO_RX_REACHABILITY, "no_rx_reachability"}
};


// functions ----------------------------------------------------------------------------------------------------------

static bool isValidPortTypeForLagMember(const Port& port)
Expand Down Expand Up @@ -509,6 +528,7 @@ bool PortsOrch::checkPathTracingCapability()
PortsOrch::PortsOrch(DBConnector *db, DBConnector *stateDb, vector<table_name_with_pri_t> &tableNames, DBConnector *chassisAppDb) :
Orch(db, tableNames),
m_portStateTable(stateDb, STATE_PORT_TABLE_NAME),
m_portOpErrTable(stateDb, STATE_PORT_OPER_ERR_TABLE_NAME),
port_stat_manager(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, false),
gb_port_stat_manager(true,
PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ,
Expand Down Expand Up @@ -808,6 +828,26 @@ void PortsOrch::initializeCpuPort()
SWSS_LOG_NOTICE("Get CPU port pid:%" PRIx64, this->m_cpuPort.m_port_id);
}

// Creating mapping of various port oper errors for error handling
void PortsOrch::initializePortOperErrors(Port &port)
{
SWSS_LOG_ENTER();

SWSS_LOG_NOTICE("Initialize port oper errors for port %s", port.m_alias.c_str());

for (auto& error : PortOperErrorEvent::db_key_errors)
{
const sai_port_error_status_t error_status = error.first;
std::string error_name = error.second;

port.m_portOperErrorToEvent[error_status] = PortOperErrorEvent(error_status, error_name);
SWSS_LOG_NOTICE("Initialize port %s error %s flag=0x%" PRIx32,
port.m_alias.c_str(),
error_name.c_str(),
error_status);
}
}

void PortsOrch::initializePorts()
{
SWSS_LOG_ENTER();
Expand Down Expand Up @@ -3351,6 +3391,26 @@ void PortsOrch::updateDbPortFlapCount(Port& port, sai_port_oper_status_t pstatus
m_portTable->set(port.m_alias, tuples);
}

void PortsOrch::updateDbPortOperError(Port& port, PortOperErrorEvent *pevent)
{
SWSS_LOG_ENTER();

auto key = pevent->getDbKey();
vector<FieldValueTuple> tuples;
FieldValueTuple tup1("oper_error_status", std::to_string(port.m_oper_error_status));
tuples.push_back(tup1);

size_t count = pevent->getErrorCount();
FieldValueTuple tup2(key + "_count", std::to_string(count));
tuples.push_back(tup2);

auto time = pevent->getEventTime();
FieldValueTuple tup3(key + "_time", time);
tuples.push_back(tup3);

m_portOpErrTable.set(port.m_alias, tuples);
}

void PortsOrch::updateDbPortOperStatus(const Port& port, sai_port_oper_status_t status) const
{
SWSS_LOG_ENTER();
Expand Down Expand Up @@ -4613,6 +4673,8 @@ void PortsOrch::doPortTask(Consumer &consumer)
/* create host_tx_ready field in state-db */
initHostTxReadyState(p);

initializePortOperErrors(p);

// Restore admin status if the port was brought down
if (admin_status != p.m_admin_state_up)
{
Expand Down Expand Up @@ -8019,12 +8081,14 @@ void PortsOrch::doTask(NotificationConsumer &consumer)

for (uint32_t i = 0; i < count; i++)
{
Port port;
sai_object_id_t id = portoperstatus[i].port_id;
sai_port_oper_status_t status = portoperstatus[i].port_state;
sai_port_error_status_t port_oper_err = portoperstatus[i].port_error_status;

SWSS_LOG_NOTICE("Get port state change notification id:%" PRIx64 " status:%d", id, status);

Port port;
SWSS_LOG_NOTICE("Get port state change notification id:%" PRIx64 " status:%d "
"oper_error_status:0x%" PRIx32,
id, status, port_oper_err);

if (!getPort(id, port))
{
Expand Down Expand Up @@ -8061,6 +8125,11 @@ void PortsOrch::doTask(NotificationConsumer &consumer)
{
updateDbPortOperFec(port, "N/A");
}
} else {
if (port_oper_err)
{
updatePortErrorStatus(port, port_oper_err);
}
}

/* update m_portList */
Expand Down Expand Up @@ -8089,6 +8158,53 @@ void PortsOrch::doTask(NotificationConsumer &consumer)

}

void PortsOrch::updatePortErrorStatus(Port &port, sai_port_error_status_t errstatus)
{
size_t errors = 0;
string db_port_error_name;
PortOperErrorEvent *portOperErrorEvent = nullptr;
size_t error_count = PortOperErrorEvent::db_key_errors.size();

SWSS_LOG_NOTICE("Port %s error state set from 0x%" PRIx32 "-> 0x%" PRIx32,
port.m_alias.c_str(),
port.m_oper_error_status,
errstatus);

port.m_oper_error_status = errstatus;

// Iterate through all the port oper errors
while ((errstatus >> errors) && (errors < error_count))
{
sai_port_error_status_t error_status = static_cast<sai_port_error_status_t>(errstatus & (1 << errors));

if (port.m_portOperErrorToEvent.find(error_status) == port.m_portOperErrorToEvent.end())
{
++errors;
continue;
}

portOperErrorEvent = &port.m_portOperErrorToEvent[error_status];

if (portOperErrorEvent->isErrorSet(errstatus))
{
SWSS_LOG_NOTICE("Port %s oper error event: %s occurred",
port.m_alias.c_str(),
portOperErrorEvent->getDbKey().c_str());
portOperErrorEvent->recordEventTime();
portOperErrorEvent->incrementErrorCount();
updateDbPortOperError(port, portOperErrorEvent);
}
else
{
SWSS_LOG_WARN("Port %s port oper error %s not updated in DB",
port.m_alias.c_str(),
portOperErrorEvent->getDbKey().c_str());
}

++errors;
}
}

void PortsOrch::updatePortOperStatus(Port &port, sai_port_oper_status_t status)
{
SWSS_LOG_NOTICE("Port %s oper state set from %s to %s",
Expand Down
4 changes: 4 additions & 0 deletions orchagent/portsorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,12 +146,14 @@ class PortsOrch : public Orch, public Subject
void setPort(string alias, Port port);
void getCpuPort(Port &port);
void initHostTxReadyState(Port &port);
void initializePortOperErrors(Port &port);
bool getInbandPort(Port &port);
bool getVlanByVlanId(sai_vlan_id_t vlan_id, Port &vlan);

bool setHostIntfsOperStatus(const Port& port, bool up) const;
void updateDbPortOperStatus(const Port& port, sai_port_oper_status_t status) const;
void updateDbPortFlapCount(Port& port, sai_port_oper_status_t pstatus);
void updateDbPortOperError(Port& port, PortOperErrorEvent *pevent);

bool createVlanHostIntf(Port& vl, string hostif_name);
bool removeVlanHostIntf(Port vl);
Expand Down Expand Up @@ -263,6 +265,7 @@ class PortsOrch : public Orch, public Subject
unique_ptr<Table> m_pgIndexTable;
unique_ptr<Table> m_stateBufferMaximumValueTable;
Table m_portStateTable;
Table m_portOpErrTable;

std::string getQueueWatermarkFlexCounterTableKey(std::string s);
std::string getPriorityGroupWatermarkFlexCounterTableKey(std::string s);
Expand Down Expand Up @@ -502,6 +505,7 @@ class PortsOrch : public Orch, public Subject
bool initGearboxPort(Port &port);
bool getPortOperFec(const Port& port, sai_port_fec_mode_t &fec_mode) const;
void updateDbPortOperFec(Port &port, string fec_str);
void updatePortErrorStatus(Port &port, sai_port_error_status_t port_oper_eror);

map<string, Port::Role> m_recircPortRole;

Expand Down
Loading

0 comments on commit 956ebd6

Please sign in to comment.