diff --git a/sonic-xcvrd/scripts/xcvrd b/sonic-xcvrd/scripts/xcvrd index 2513397e8689..7a06a6cbf581 100644 --- a/sonic-xcvrd/scripts/xcvrd +++ b/sonic-xcvrd/scripts/xcvrd @@ -19,6 +19,7 @@ try: from sonic_daemon_base import daemon_base from sonic_daemon_base.daemon_base import Logger from sonic_daemon_base.daemon_base import DaemonBase + from enum import Enum except ImportError, e: raise ImportError (str(e) + " - required module not found") @@ -33,6 +34,7 @@ PLATFORM_SPECIFIC_CLASS_NAME = "SfpUtil" TRANSCEIVER_INFO_TABLE = 'TRANSCEIVER_INFO' TRANSCEIVER_DOM_SENSOR_TABLE = 'TRANSCEIVER_DOM_SENSOR' +TRANSCEIVER_STATUS_TABLE = 'TRANSCEIVER_STATUS' SELECT_TIMEOUT_MSECS = 1000 @@ -40,8 +42,17 @@ DOM_INFO_UPDATE_PERIOD_SECS = 60 TIME_FOR_SFP_READY_SECS = 1 XCVRD_MAIN_THREAD_SLEEP_SECS = 60 -SFP_STATUS_INSERTED = '1' +# SFP status definition, shall be aligned with the definition in get_change_event() of ChassisBase SFP_STATUS_REMOVED = '0' +SFP_STATUS_INSERTED = '1' + +# SFP error code enum, new elements can be added to the enum if new errors need to be supported. +SFP_STATUS_ERR_ENUM = Enum('SFP_STATUS_ERR_ENUM', ['SFP_STATUS_ERR_I2C_STUCK', 'SFP_STATUS_ERR_BAD_EEPROM', + 'SFP_STATUS_ERR_UNSUPPORTED_CABLE', 'SFP_STATUS_ERR_HIGH_TEMP', + 'SFP_STATUS_ERR_BAD_CABLE'], start=2) + +# Convert the error code to string and store them in a set for convenience +errors_block_eeprom_reading = set(str(error_code.value) for error_code in SFP_STATUS_ERR_ENUM) EVENT_ON_ALL_SFP = '-1' # events definition @@ -411,15 +422,17 @@ def del_port_sfp_dom_info_from_db(logical_port_name, int_tbl, dom_tbl): ganged_member_num += 1 try: - int_tbl._del(port_name) - dom_tbl._del(port_name) + if int_tbl != None: + int_tbl._del(port_name) + if dom_tbl != None: + dom_tbl._del(port_name) except NotImplementedError: logger.log_error("This functionality is currently not implemented for this platform") sys.exit(NOT_IMPLEMENTED_ERROR) # recover missing sfp table entries if any -def recover_missing_sfp_table_entries(sfp_util, int_tbl, stop_event): +def recover_missing_sfp_table_entries(sfp_util, int_tbl, status_tbl, stop_event): transceiver_dict = {} keys = int_tbl.getKeys() @@ -427,7 +440,7 @@ def recover_missing_sfp_table_entries(sfp_util, int_tbl, stop_event): for logical_port_name in logical_port_list: if stop_event.is_set(): break - if logical_port_name not in keys: + if logical_port_name not in keys and not detect_port_in_error_status(logical_port_name, status_tbl): post_port_sfp_info_to_db(logical_port_name, int_tbl, transceiver_dict, stop_event) @@ -641,6 +654,53 @@ def waiting_time_compensation_with_sleep(time_start, time_to_wait): if time_diff < time_to_wait: time.sleep(time_to_wait - time_diff) +# Update port SFP status table on receiving SFP change event +def update_port_transceiver_status_table(logical_port_name, status_tbl, status): + fvs = swsscommon.FieldValuePairs([('status', status)]) + status_tbl.set(logical_port_name, fvs) + +# Delete port from SFP status table +def delete_port_from_status_table(logical_port_name, status_tbl): + status_tbl._del(logical_port_name) + +# Check whether port in error status +def detect_port_in_error_status(logical_port_name, status_tbl): + rec, fvp = status_tbl.get(logical_port_name) + if rec: + status_dict = dict(fvp) + if status_dict['status'] in errors_block_eeprom_reading: + return True + else: + return False + else: + return False + +# Init TRANSCEIVER_STATUS table +def init_port_sfp_status_tbl(stop_event=threading.Event()): + # Connect to STATE_DB and create transceiver status table + state_db = daemon_base.db_connect(swsscommon.STATE_DB) + status_tbl = swsscommon.Table(state_db, TRANSCEIVER_STATUS_TABLE) + + # Init TRANSCEIVER_STATUS table + logical_port_list = platform_sfputil.logical + for logical_port_name in logical_port_list: + if stop_event.is_set(): + break + physical_port_list = logical_port_name_to_physical_port_list(logical_port_name) + if physical_port_list is None: + logger.log_error("No physical ports found for logical port '%s'" % logical_port_name) + update_port_transceiver_status_table(logical_port_name, status_tbl, SFP_STATUS_REMOVED) + + for physical_port in physical_port_list: + if stop_event.is_set(): + break + + if not _wrapper_get_presence(physical_port): + update_port_transceiver_status_table(logical_port_name, status_tbl, SFP_STATUS_REMOVED) + else: + update_port_transceiver_status_table(logical_port_name, status_tbl, SFP_STATUS_INSERTED) + + # # Helper classes =============================================================== # @@ -657,13 +717,15 @@ class dom_info_update_task: # Connect to STATE_DB and create transceiver dom info table state_db = daemon_base.db_connect(swsscommon.STATE_DB) dom_tbl = swsscommon.Table(state_db, TRANSCEIVER_DOM_SENSOR_TABLE) + status_tbl = swsscommon.Table(state_db, TRANSCEIVER_STATUS_TABLE) # Start loop to update dom info in DB periodically while not self.task_stopping_event.wait(DOM_INFO_UPDATE_PERIOD_SECS): logical_port_list = platform_sfputil.logical for logical_port_name in logical_port_list: - post_port_dom_info_to_db(logical_port_name, dom_tbl, self.task_stopping_event) - post_port_dom_threshold_info_to_db(logical_port_name, dom_tbl, self.task_stopping_event) + if not detect_port_in_error_status(logical_port_name, status_tbl): + post_port_dom_info_to_db(logical_port_name, dom_tbl, self.task_stopping_event) + post_port_dom_threshold_info_to_db(logical_port_name, dom_tbl, self.task_stopping_event) logger.log_info("Stop DOM monitoring loop") @@ -716,6 +778,7 @@ class sfp_state_update_task: state_db = daemon_base.db_connect(swsscommon.STATE_DB) int_tbl = swsscommon.Table(state_db, TRANSCEIVER_INFO_TABLE) dom_tbl = swsscommon.Table(state_db, TRANSCEIVER_DOM_SENSOR_TABLE) + status_tbl = swsscommon.Table(state_db, TRANSCEIVER_STATUS_TABLE) # Connect to APPL_DB to notify Media notifications appl_db = daemon_base.db_connect(swsscommon.APPL_DB) @@ -846,6 +909,9 @@ class sfp_state_update_task: for logical_port in logical_port_list: if value == SFP_STATUS_INSERTED: logger.log_info("Got SFP inserted event") + # A plugin event will clear the error state. + update_port_transceiver_status_table(logical_port, status_tbl, SFP_STATUS_INSERTED) + logger.log_info("receive plug in and update port sfp status table.") rc = post_port_sfp_info_to_db(logical_port, int_tbl, transceiver_dict) # If we didn't get the sfp info, assuming the eeprom is not ready, give a try again. if rc == SFP_EEPROM_NOT_READY: @@ -858,9 +924,23 @@ class sfp_state_update_task: transceiver_dict.clear() elif value == SFP_STATUS_REMOVED: logger.log_info("Got SFP removed event") + update_port_transceiver_status_table(logical_port, status_tbl, SFP_STATUS_REMOVED) + logger.log_info("receive plug out and pdate port sfp status table.") del_port_sfp_dom_info_from_db(logical_port, int_tbl, dom_tbl) + elif value in errors_block_eeprom_reading: + logger.log_info("Got SFP Error event") + # Add port to error table to stop accessing eeprom of it + # If the port already in the error table, the stored error code will + # be updated to the new one. + update_port_transceiver_status_table(logical_port, status_tbl, value) + logger.log_info("receive error update port sfp status table.") + # In this case EEPROM is not accessible, so remove the DOM info + # since it will be outdated if long time no update. + # but will keep the interface info in the DB since it static. + del_port_sfp_dom_info_from_db(logical_port, None, dom_tbl) + else: - # TODO, SFP return error code, need handle accordingly. + # SFP return unkown event, just ignore for now. logger.log_warning("Got unknown event {}, ignored".format(value)) continue else: @@ -1012,6 +1092,7 @@ class DaemonXcvrd(DaemonBase): state_db = daemon_base.db_connect(swsscommon.STATE_DB) self.int_tbl = swsscommon.Table(state_db, TRANSCEIVER_INFO_TABLE) self.dom_tbl = swsscommon.Table(state_db, TRANSCEIVER_DOM_SENSOR_TABLE) + self.status_tbl = swsscommon.Table(state_db, TRANSCEIVER_STATUS_TABLE) self.load_media_settings() warmstart = swsscommon.WarmStart() @@ -1027,6 +1108,10 @@ class DaemonXcvrd(DaemonBase): logger.log_info("Post all port DOM/SFP info to DB") post_port_sfp_dom_info_to_db(is_warm_start, self.stop_event) + # Init port sfp status table + logger.log_info("Init port sfp status table") + init_port_sfp_status_tbl(self.stop_event) + # Deinitialize daemon def deinit(self): logger.log_info("Start daemon deinit...") @@ -1035,6 +1120,7 @@ class DaemonXcvrd(DaemonBase): logical_port_list = platform_sfputil.logical for logical_port_name in logical_port_list: del_port_sfp_dom_info_from_db(logical_port_name, self.int_tbl, self.dom_tbl) + delete_port_from_status_table(logical_port_name, self.status_tbl) # Run daemon def run(self): @@ -1056,7 +1142,7 @@ class DaemonXcvrd(DaemonBase): while not self.stop_event.wait(self.timeout): # Check the integrity of the sfp info table and recover the missing entries if any - recover_missing_sfp_table_entries(platform_sfputil, self.int_tbl, self.stop_event) + recover_missing_sfp_table_entries(platform_sfputil, self.int_tbl, self.status_tbl, self.stop_event) logger.log_info("Stop daemon main loop")