diff --git a/orchagent/p4orch/tests/fake_portorch.cpp b/orchagent/p4orch/tests/fake_portorch.cpp index a34a30eb4b..c3340e0cf3 100644 --- a/orchagent/p4orch/tests/fake_portorch.cpp +++ b/orchagent/p4orch/tests/fake_portorch.cpp @@ -14,7 +14,9 @@ extern "C" PortsOrch::PortsOrch(DBConnector *db, DBConnector *stateDb, vector &tableNames, DBConnector *chassisAppDb) - : Orch(db, tableNames), m_portStateTable(stateDb, STATE_PORT_TABLE_NAME), + : Orch(db, tableNames), + m_portStateTable(stateDb, STATE_PORT_TABLE_NAME), + m_portOpErrTable(stateDb, STATE_PORT_OPER_ERR_TABLE_NAME), port_stat_manager(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true), port_buffer_drop_stat_manager(PORT_BUFFER_DROP_STAT_FLEX_COUNTER_GROUP, StatsMode::READ, diff --git a/orchagent/port.h b/orchagent/port.h index 0ae9b97b67..318a60a376 100644 --- a/orchagent/port.h +++ b/orchagent/port.h @@ -10,8 +10,10 @@ extern "C" { #include #include #include +#include #include - +#include +#include #include #include @@ -74,6 +76,42 @@ struct SystemLagInfo int32_t spa_id = 0; }; +class PortOperErrorEvent +{ +public: + PortOperErrorEvent() = default; + PortOperErrorEvent(const sai_port_error_status_t error, std::string key) : m_errorFlag(error), m_dbKeyError(key){} + ~PortOperErrorEvent() = default; + + inline void incrementErrorCount(void) { m_errorCount++; } + + inline size_t getErrorCount(void) const { return m_errorCount; } + + void recordEventTime(void) { + auto now = std::chrono::system_clock::now(); + m_eventTime = std::chrono::system_clock::to_time_t(now); + } + + std::string getEventTime(void) { + std::ostringstream oss; + oss << std::put_time(std::gmtime(&m_eventTime), "%Y-%m-%d %H:%M:%S"); + return oss.str(); + } + + inline std::string getDbKey(void) const { return m_dbKeyError; } + + // Returns true if port oper error flag in sai_port_error_status_t is set + bool isErrorSet(sai_port_error_status_t errstatus) const { return (m_errorFlag & errstatus);} + + static const std::unordered_map db_key_errors; + +private: + sai_port_error_status_t m_errorFlag = SAI_PORT_ERROR_STATUS_CLEAR; + size_t m_errorCount = 0; + std::string m_dbKeyError; // DB key for this port error + std::time_t m_eventTime = 0; +}; + class Port { public: @@ -155,6 +193,7 @@ class Port sai_object_id_t m_parent_port_id = 0; uint32_t m_dependency_bitmap = 0; sai_port_oper_status_t m_oper_status = SAI_PORT_OPER_STATUS_UNKNOWN; + sai_port_error_status_t m_oper_error_status = SAI_PORT_ERROR_STATUS_CLEAR; //Bitmap of last port oper error status std::set m_members; std::set m_child_ports; std::vector m_queue_ids; @@ -193,6 +232,9 @@ class Port sai_object_id_t m_system_side_id = 0; sai_object_id_t m_line_side_id = 0; + /* Port oper error status to event map*/ + std::unordered_map m_portOperErrorToEvent; + /* pre-emphasis */ std::map> m_preemphasis; diff --git a/orchagent/portsorch.cpp b/orchagent/portsorch.cpp index 7c3be48d20..2ce9b31b6f 100644 --- a/orchagent/portsorch.cpp +++ b/orchagent/portsorch.cpp @@ -308,6 +308,25 @@ static char* hostif_vlan_tag[] = { [SAI_HOSTIF_VLAN_TAG_ORIGINAL] = "SAI_HOSTIF_VLAN_TAG_ORIGINAL" }; +const std::unordered_map PortOperErrorEvent::db_key_errors = +{ + // SAI port oper error status to error name mapping + { SAI_PORT_ERROR_STATUS_MAC_LOCAL_FAULT, "mac_local_fault"}, + { SAI_PORT_ERROR_STATUS_MAC_REMOTE_FAULT, "mac_remote_fault"}, + { SAI_PORT_ERROR_STATUS_FEC_SYNC_LOSS, "fec_sync_loss"}, + { SAI_PORT_ERROR_STATUS_FEC_LOSS_ALIGNMENT_MARKER, "fec_alignment_loss"}, + { SAI_PORT_ERROR_STATUS_HIGH_SER, "high_ser_error"}, + { SAI_PORT_ERROR_STATUS_HIGH_BER, "high ber_error"}, + { SAI_PORT_ERROR_STATUS_CRC_RATE, "crc_rate"}, + { SAI_PORT_ERROR_STATUS_DATA_UNIT_CRC_ERROR, "data_unit_crc_error"}, + { SAI_PORT_ERROR_STATUS_DATA_UNIT_SIZE, "data_unit_size"}, + { SAI_PORT_ERROR_STATUS_DATA_UNIT_MISALIGNMENT_ERROR, "data_unit_misalignment_error"}, + { SAI_PORT_ERROR_STATUS_CODE_GROUP_ERROR, "code_group_error"}, + { SAI_PORT_ERROR_STATUS_SIGNAL_LOCAL_ERROR, "signal_local_error"}, + { SAI_PORT_ERROR_STATUS_NO_RX_REACHABILITY, "no_rx_reachability"} +}; + + // functions ---------------------------------------------------------------------------------------------------------- static bool isValidPortTypeForLagMember(const Port& port) @@ -509,6 +528,7 @@ bool PortsOrch::checkPathTracingCapability() PortsOrch::PortsOrch(DBConnector *db, DBConnector *stateDb, vector &tableNames, DBConnector *chassisAppDb) : Orch(db, tableNames), m_portStateTable(stateDb, STATE_PORT_TABLE_NAME), + m_portOpErrTable(stateDb, STATE_PORT_OPER_ERR_TABLE_NAME), port_stat_manager(PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, false), gb_port_stat_manager(true, PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, @@ -808,6 +828,26 @@ void PortsOrch::initializeCpuPort() SWSS_LOG_NOTICE("Get CPU port pid:%" PRIx64, this->m_cpuPort.m_port_id); } +// Creating mapping of various port oper errors for error handling +void PortsOrch::initializePortOperErrors(Port &port) +{ + SWSS_LOG_ENTER(); + + SWSS_LOG_NOTICE("Initialize port oper errors for port %s", port.m_alias.c_str()); + + for (auto& error : PortOperErrorEvent::db_key_errors) + { + const sai_port_error_status_t error_status = error.first; + std::string error_name = error.second; + + port.m_portOperErrorToEvent[error_status] = PortOperErrorEvent(error_status, error_name); + SWSS_LOG_NOTICE("Initialize port %s error %s flag=0x%" PRIx32, + port.m_alias.c_str(), + error_name.c_str(), + error_status); + } +} + void PortsOrch::initializePorts() { SWSS_LOG_ENTER(); @@ -3351,6 +3391,26 @@ void PortsOrch::updateDbPortFlapCount(Port& port, sai_port_oper_status_t pstatus m_portTable->set(port.m_alias, tuples); } +void PortsOrch::updateDbPortOperError(Port& port, PortOperErrorEvent *pevent) +{ + SWSS_LOG_ENTER(); + + auto key = pevent->getDbKey(); + vector tuples; + FieldValueTuple tup1("oper_error_status", std::to_string(port.m_oper_error_status)); + tuples.push_back(tup1); + + size_t count = pevent->getErrorCount(); + FieldValueTuple tup2(key + "_count", std::to_string(count)); + tuples.push_back(tup2); + + auto time = pevent->getEventTime(); + FieldValueTuple tup3(key + "_time", time); + tuples.push_back(tup3); + + m_portOpErrTable.set(port.m_alias, tuples); +} + void PortsOrch::updateDbPortOperStatus(const Port& port, sai_port_oper_status_t status) const { SWSS_LOG_ENTER(); @@ -4613,6 +4673,8 @@ void PortsOrch::doPortTask(Consumer &consumer) /* create host_tx_ready field in state-db */ initHostTxReadyState(p); + initializePortOperErrors(p); + // Restore admin status if the port was brought down if (admin_status != p.m_admin_state_up) { @@ -8019,12 +8081,14 @@ void PortsOrch::doTask(NotificationConsumer &consumer) for (uint32_t i = 0; i < count; i++) { + Port port; sai_object_id_t id = portoperstatus[i].port_id; sai_port_oper_status_t status = portoperstatus[i].port_state; + sai_port_error_status_t port_oper_err = portoperstatus[i].port_error_status; - SWSS_LOG_NOTICE("Get port state change notification id:%" PRIx64 " status:%d", id, status); - - Port port; + SWSS_LOG_NOTICE("Get port state change notification id:%" PRIx64 " status:%d " + "oper_error_status:0x%" PRIx32, + id, status, port_oper_err); if (!getPort(id, port)) { @@ -8061,6 +8125,11 @@ void PortsOrch::doTask(NotificationConsumer &consumer) { updateDbPortOperFec(port, "N/A"); } + } else { + if (port_oper_err) + { + updatePortErrorStatus(port, port_oper_err); + } } /* update m_portList */ @@ -8089,6 +8158,53 @@ void PortsOrch::doTask(NotificationConsumer &consumer) } +void PortsOrch::updatePortErrorStatus(Port &port, sai_port_error_status_t errstatus) +{ + size_t errors = 0; + string db_port_error_name; + PortOperErrorEvent *portOperErrorEvent = nullptr; + size_t error_count = PortOperErrorEvent::db_key_errors.size(); + + SWSS_LOG_NOTICE("Port %s error state set from 0x%" PRIx32 "-> 0x%" PRIx32, + port.m_alias.c_str(), + port.m_oper_error_status, + errstatus); + + port.m_oper_error_status = errstatus; + + // Iterate through all the port oper errors + while ((errstatus >> errors) && (errors < error_count)) + { + sai_port_error_status_t error_status = static_cast(errstatus & (1 << errors)); + + if (port.m_portOperErrorToEvent.find(error_status) == port.m_portOperErrorToEvent.end()) + { + ++errors; + continue; + } + + portOperErrorEvent = &port.m_portOperErrorToEvent[error_status]; + + if (portOperErrorEvent->isErrorSet(errstatus)) + { + SWSS_LOG_NOTICE("Port %s oper error event: %s occurred", + port.m_alias.c_str(), + portOperErrorEvent->getDbKey().c_str()); + portOperErrorEvent->recordEventTime(); + portOperErrorEvent->incrementErrorCount(); + updateDbPortOperError(port, portOperErrorEvent); + } + else + { + SWSS_LOG_WARN("Port %s port oper error %s not updated in DB", + port.m_alias.c_str(), + portOperErrorEvent->getDbKey().c_str()); + } + + ++errors; + } +} + void PortsOrch::updatePortOperStatus(Port &port, sai_port_oper_status_t status) { SWSS_LOG_NOTICE("Port %s oper state set from %s to %s", diff --git a/orchagent/portsorch.h b/orchagent/portsorch.h index 3ae283fb80..ad2bc85b48 100644 --- a/orchagent/portsorch.h +++ b/orchagent/portsorch.h @@ -146,12 +146,14 @@ class PortsOrch : public Orch, public Subject void setPort(string alias, Port port); void getCpuPort(Port &port); void initHostTxReadyState(Port &port); + void initializePortOperErrors(Port &port); bool getInbandPort(Port &port); bool getVlanByVlanId(sai_vlan_id_t vlan_id, Port &vlan); bool setHostIntfsOperStatus(const Port& port, bool up) const; void updateDbPortOperStatus(const Port& port, sai_port_oper_status_t status) const; void updateDbPortFlapCount(Port& port, sai_port_oper_status_t pstatus); + void updateDbPortOperError(Port& port, PortOperErrorEvent *pevent); bool createVlanHostIntf(Port& vl, string hostif_name); bool removeVlanHostIntf(Port vl); @@ -263,6 +265,7 @@ class PortsOrch : public Orch, public Subject unique_ptr m_pgIndexTable; unique_ptr
m_stateBufferMaximumValueTable; Table m_portStateTable; + Table m_portOpErrTable; std::string getQueueWatermarkFlexCounterTableKey(std::string s); std::string getPriorityGroupWatermarkFlexCounterTableKey(std::string s); @@ -502,6 +505,7 @@ class PortsOrch : public Orch, public Subject bool initGearboxPort(Port &port); bool getPortOperFec(const Port& port, sai_port_fec_mode_t &fec_mode) const; void updateDbPortOperFec(Port &port, string fec_str); + void updatePortErrorStatus(Port &port, sai_port_error_status_t port_oper_eror); map m_recircPortRole; diff --git a/tests/mock_tests/portsorch_ut.cpp b/tests/mock_tests/portsorch_ut.cpp index 999aba00ff..0d698b8451 100644 --- a/tests/mock_tests/portsorch_ut.cpp +++ b/tests/mock_tests/portsorch_ut.cpp @@ -680,6 +680,163 @@ namespace portsorch_test cleanupPorts(gPortsOrch); } + /* + * Test port oper error count + */ + TEST_F(PortsOrchTest, PortOperErrorStatus) + { + Table portTable = Table(m_app_db.get(), APP_PORT_TABLE_NAME); + Table portTableOpErrState = Table(m_state_db.get(), STATE_PORT_OPER_ERR_TABLE_NAME); + + // Get SAI default ports to populate DB + auto ports = ut_helper::getInitialSaiPorts(); + + // Populate port table with SAI ports + for (const auto &it : ports) + { + portTable.set(it.first, it.second); + } + + // Set PortConfigDone, PortInitDone + portTable.set("PortConfigDone", { { "count", to_string(ports.size()) } }); + portTable.set("PortInitDone", { { "lanes", "0" } }); + + // refill consumer + gPortsOrch->addExistingData(&portTable); + // Apply configuration : create ports + static_cast(gPortsOrch)->doTask(); + + // Get first port, expect the oper status is not UP + Port port; + gPortsOrch->getPort("Ethernet0", port); + ASSERT_TRUE(port.m_oper_status != SAI_PORT_OPER_STATUS_UP); + ASSERT_TRUE(port.m_flap_count == 0); + + auto exec = static_cast(gPortsOrch->getExecutor("PORT_STATUS_NOTIFICATIONS")); + auto consumer = exec->getNotificationConsumer(); + + std::vector errors = { + SAI_PORT_ERROR_STATUS_MAC_LOCAL_FAULT, + SAI_PORT_ERROR_STATUS_MAC_REMOTE_FAULT, + static_cast( + SAI_PORT_ERROR_STATUS_FEC_SYNC_LOSS | + SAI_PORT_ERROR_STATUS_MAC_LOCAL_FAULT), + static_cast( + SAI_PORT_ERROR_STATUS_FEC_LOSS_ALIGNMENT_MARKER | + SAI_PORT_ERROR_STATUS_HIGH_SER | + SAI_PORT_ERROR_STATUS_HIGH_BER | + SAI_PORT_ERROR_STATUS_CRC_RATE), + SAI_PORT_ERROR_STATUS_DATA_UNIT_CRC_ERROR, + static_cast( + SAI_PORT_ERROR_STATUS_FEC_SYNC_LOSS | + SAI_PORT_ERROR_STATUS_DATA_UNIT_SIZE | + SAI_PORT_ERROR_STATUS_DATA_UNIT_MISALIGNMENT_ERROR), + static_cast( + SAI_PORT_ERROR_STATUS_CODE_GROUP_ERROR | + SAI_PORT_ERROR_STATUS_SIGNAL_LOCAL_ERROR | + SAI_PORT_ERROR_STATUS_NO_RX_REACHABILITY), + static_cast( + SAI_PORT_ERROR_STATUS_FEC_SYNC_LOSS | + SAI_PORT_ERROR_STATUS_MAC_REMOTE_FAULT) + }; + + // mock a redis reply for notification, it notifies that Ehernet0 is going to up + for (uint32_t count=0; count < errors.size(); count++) { + sai_port_oper_status_t oper_status = SAI_PORT_OPER_STATUS_DOWN; + mockReply = (redisReply *)calloc(sizeof(redisReply), 1); + mockReply->type = REDIS_REPLY_ARRAY; + mockReply->elements = 3; // REDIS_PUBLISH_MESSAGE_ELEMNTS + mockReply->element = (redisReply **)calloc(sizeof(redisReply *), mockReply->elements); + mockReply->element[2] = (redisReply *)calloc(sizeof(redisReply), 1); + mockReply->element[2]->type = REDIS_REPLY_STRING; + sai_port_oper_status_notification_t port_oper_status; + memset(&port_oper_status, 0, sizeof(port_oper_status)); + port_oper_status.port_error_status = errors[count]; + port_oper_status.port_state = oper_status; + port_oper_status.port_id = port.m_port_id; + std::string data = sai_serialize_port_oper_status_ntf(1, &port_oper_status); + std::vector notifyValues; + FieldValueTuple opdata("port_state_change", data); + notifyValues.push_back(opdata); + std::string msg = swss::JSon::buildJson(notifyValues); + mockReply->element[2]->str = (char*)calloc(1, msg.length() + 1); + memcpy(mockReply->element[2]->str, msg.c_str(), msg.length()); + + // trigger the notification + consumer->readData(); + gPortsOrch->doTask(*consumer); + mockReply = nullptr; + gPortsOrch->getPort("Ethernet0", port); + gPortsOrch->updatePortErrorStatus(port, errors[count]); + ASSERT_TRUE(port.m_oper_error_status == errors[count]); + } + + std::vector values; + portTableOpErrState.get("Ethernet0", values); + + for (auto &valueTuple : values) + { + if (fvField(valueTuple) == "mac_local_fault_count") + { + ASSERT_TRUE(fvValue(valueTuple) == "2"); + } + else if (fvField(valueTuple) == "mac_remote_fault_count") + { + ASSERT_TRUE(fvValue(valueTuple) == "2"); + } + else if (fvField(valueTuple) == "oper_error_status") + { + ASSERT_TRUE(fvValue(valueTuple) == "3"); + } + else if (fvField(valueTuple) == "fec_sync_loss_count") + { + ASSERT_TRUE(fvValue(valueTuple) == "3"); + } + else if (fvField(valueTuple) == "fec_alignment_loss_count") + { + ASSERT_TRUE(fvValue(valueTuple) == "1"); + } + else if (fvField(valueTuple) == "high_ser_error_count") + { + ASSERT_TRUE(fvValue(valueTuple) == "1"); + } + else if (fvField(valueTuple) == "high ber_error_count") + { + ASSERT_TRUE(fvValue(valueTuple) == "1"); + } + else if (fvField(valueTuple) == "crc_rate_count") + { + ASSERT_TRUE(fvValue(valueTuple) == "1"); + } + else if (fvField(valueTuple) == "data_unit_crc_error_count") + { + ASSERT_TRUE(fvValue(valueTuple) == "1"); + } + else if (fvField(valueTuple) == "data_unit_size_count") + { + ASSERT_TRUE(fvValue(valueTuple) == "1"); + } + else if (fvField(valueTuple) == "data_unit_misalignment_error_count") + { + ASSERT_TRUE(fvValue(valueTuple) == "1"); + } + else if (fvField(valueTuple) == "code_group_error_count") + { + ASSERT_TRUE(fvValue(valueTuple) == "1"); + } + else if (fvField(valueTuple) == "signal_local_error_count") + { + ASSERT_TRUE(fvValue(valueTuple) == "1"); + } + else if (fvField(valueTuple) == "no_rx_reachability_count") + { + ASSERT_TRUE(fvValue(valueTuple) == "1"); + } + } + + cleanupPorts(gPortsOrch); + } + TEST_F(PortsOrchTest, PortBulkCreateRemove) { auto portTable = Table(m_app_db.get(), APP_PORT_TABLE_NAME);