diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py b/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py index f216f6de2c36..7f5cffa420a5 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py @@ -28,13 +28,13 @@ import os from functools import reduce from .utils import extract_RJ45_ports_index + from . import module_host_mgmt_initializer from . import utils from .device_data import DeviceDataManager import re - import queue + import select import threading import time - from sonic_platform import modules_mgmt except ImportError as e: raise ImportError (str(e) + "- required module not found") @@ -132,9 +132,9 @@ def __init__(self): Chassis.chassis_instance = self - self.modules_mgmt_thread = threading.Thread() - self.modules_changes_queue = queue.Queue() - self.modules_mgmt_task_stopping_event = threading.Event() + self.module_host_mgmt_initializer = module_host_mgmt_initializer.ModuleHostMgmtInitializer() + self.poll_obj = None + self.registered_fds = None logger.log_info("Chassis loaded successfully") @@ -344,8 +344,11 @@ def get_all_sfps(self): Returns: A list of objects derived from SfpBase representing all sfps available on this chassis - """ - self.initialize_sfp() + """ + if DeviceDataManager.is_module_host_management_mode(): + self.module_host_mgmt_initializer.initialize(self) + else: + self.initialize_sfp() return self._sfp_list def get_sfp(self, index): @@ -362,7 +365,10 @@ def get_sfp(self, index): An object dervied from SfpBase representing the specified sfp """ index = index - 1 - self.initialize_single_sfp(index) + if DeviceDataManager.is_module_host_management_mode(): + self.module_host_mgmt_initializer.initialize(self) + else: + self.initialize_single_sfp(index) return super(Chassis, self).get_sfp(index) def get_port_or_cage_type(self, index): @@ -412,42 +418,222 @@ def get_change_event(self, timeout=0): indicates that fan 0 has been removed, fan 2 has been inserted and sfp 11 has been removed. """ - if not self.modules_mgmt_thread.is_alive(): - # open new SFP change events thread - self.modules_mgmt_thread = modules_mgmt.ModulesMgmtTask(q=self.modules_changes_queue - , main_thread_stop_event = self.modules_mgmt_task_stopping_event) - # Set the thread as daemon so when pmon/xcvrd are shutting down, modules_mgmt will shut down immedietly. - self.modules_mgmt_thread.daemon = True - self.modules_mgmt_thread.start() - self.initialize_sfp() - wait_for_ever = (timeout == 0) + if DeviceDataManager.is_module_host_management_mode(): + self.module_host_mgmt_initializer.initialize(self) + return self.get_change_event_for_module_host_management_mode(timeout) + else: + self.initialize_sfp() + return self.get_change_event_legacy(timeout) + + def get_change_event_for_module_host_management_mode(self, timeout): + """Get SFP change event when module host management mode is enabled. + + Args: + timeout: Timeout in milliseconds (optional). If timeout == 0, + this method will block until a change is detected. + + Returns: + (bool, dict): + - True if call successful, False if not; - Deprecated, will always return True + - A nested dictionary where key is a device type, + value is a dictionary with key:value pairs in the format of + {'device_id':'device_event'}, + where device_id is the device ID for this device and + device_event, + status='1' represents device inserted, + status='0' represents device removed. + Ex. {'fan':{'0':'0', '2':'1'}, 'sfp':{'11':'0'}} + indicates that fan 0 has been removed, fan 2 + has been inserted and sfp 11 has been removed. + """ + if not self.poll_obj: + self.poll_obj = select.poll() + self.registered_fds = {} + for s in self._sfp_list: + fds = s.get_fds_for_poling() + for fd_type, fd in fds.items(): + self.poll_obj.register(fd, select.POLLERR | select.POLLPRI) + self.registered_fds[fd.fileno()] = (s.sdk_index, fd, fd_type) + + logger.log_debug(f'Registered SFP file descriptors for polling: {self.registered_fds}') + + from . import sfp + + wait_forever = (timeout == 0) + # poll timeout should be no more than 1000ms to ensure fast shutdown flow + timeout = 1000.0 if timeout >= 1000 else float(timeout) + port_dict = {} + error_dict = {} + begin = time.time() + wait_ready_task = sfp.SFP.get_wait_ready_task() + + while True: + fds_events = self.poll_obj.poll(timeout) + for fileno, _ in fds_events: + if fileno not in self.registered_fds: + logger.log_error(f'Unknown file no {fileno} from poll event, registered files are {self.registered_fds}') + continue + + sfp_index, fd, fd_type = self.registered_fds[fileno] + s = self._sfp_list[sfp_index] + fd_value = int(fd.read().strip()) + + # Detecting dummy event + if s.is_dummy_event(fd_type, fd_value): + # Ignore dummy event for the first poll, assume SDK only provide 1 dummy event + logger.log_debug(f'Ignore dummy event {fd_type}:{fd_value} for SFP {sfp_index}') + continue + + logger.log_notice(f'Got SFP event: index={sfp_index}, type={fd_type}, value={fd_value}') + if fd_type == 'hw_present': + # event could be EVENT_NOT_PRESENT or EVENT_PRESENT + event = sfp.EVENT_NOT_PRESENT if fd_value == 0 else sfp.EVENT_PRESENT + s.on_event(event) + elif fd_type == 'present': + if str(fd_value) == sfp.SFP_STATUS_ERROR: + # FW control cable got an error, no need trigger state machine + sfp_status, error_desc = s.get_error_info_from_sdk_error_type() + port_dict[sfp_index + 1] = sfp_status + if error_desc: + error_dict[sfp_index + 1] = error_desc + continue + elif str(fd_value) == sfp.SFP_STATUS_INSERTED: + # FW control cable got present, only case is that the cable is recovering + # from an error. FW control cable has no transition from "Not Present" to "Present" + # because "Not Present" cable is always "software control" and should always poll + # hw_present sysfs instead of present sysfs. + port_dict[sfp_index + 1] = sfp.SFP_STATUS_INSERTED + continue + else: + s.on_event(sfp.EVENT_NOT_PRESENT) + else: + # event could be EVENT_POWER_GOOD or EVENT_POWER_BAD + event = sfp.EVENT_POWER_BAD if fd_value == 0 else sfp.EVENT_POWER_GOOD + s.on_event(event) + + if s.in_stable_state(): + s.fill_change_event(port_dict) + s.refresh_poll_obj(self.poll_obj, self.registered_fds) + else: + logger.log_debug(f'SFP {sfp_index} does not reach stable state, state={s.state}') + + ready_sfp_set = wait_ready_task.get_ready_set() + for sfp_index in ready_sfp_set: + s = self._sfp_list[sfp_index] + s.on_event(sfp.EVENT_RESET_DONE) + if s.in_stable_state(): + s.fill_change_event(port_dict) + s.refresh_poll_obj(self.poll_obj, self.registered_fds) + else: + logger.log_error(f'SFP {sfp_index} failed to reach stable state, state={s.state}') + + if port_dict: + logger.log_notice(f'Sending SFP change event: {port_dict}, error event: {error_dict}') + self.reinit_sfps(port_dict) + return True, { + 'sfp': port_dict, + 'sfp_error': error_dict + } + else: + if not wait_forever: + elapse = time.time() - begin + if elapse * 1000 >= timeout: + return True, {'sfp': {}} + + def get_change_event_legacy(self, timeout): + """Get SFP change event when module host management is disabled. + + Args: + timeout (int): polling timeout in ms + + Returns: + (bool, dict): + - True if call successful, False if not; - Deprecated, will always return True + - A nested dictionary where key is a device type, + value is a dictionary with key:value pairs in the format of + {'device_id':'device_event'}, + where device_id is the device ID for this device and + device_event, + status='1' represents device inserted, + status='0' represents device removed. + Ex. {'fan':{'0':'0', '2':'1'}, 'sfp':{'11':'0'}} + indicates that fan 0 has been removed, fan 2 + has been inserted and sfp 11 has been removed. + """ + if not self.poll_obj: + self.poll_obj = select.poll() + self.registered_fds = {} + # SDK always sent event for the first time polling. Such event should not be sent to xcvrd. + # Store SFP state before first time polling so that we can detect dummy event. + self.sfp_states_before_first_poll = {} + for s in self._sfp_list: + fd = s.get_fd_for_polling_legacy() + self.poll_obj.register(fd, select.POLLERR | select.POLLPRI) + self.registered_fds[fd.fileno()] = (s.sdk_index, fd) + self.sfp_states_before_first_poll[s.sdk_index] = s.get_module_status() + + logger.log_debug(f'Registered SFP file descriptors for polling: {self.registered_fds}') + + from . import sfp + + wait_forever = (timeout == 0) # poll timeout should be no more than 1000ms to ensure fast shutdown flow timeout = 1000.0 if timeout >= 1000 else float(timeout) port_dict = {} error_dict = {} begin = time.time() - i = 0 + while True: - try: - logger.log_info(f'get_change_event() trying to get changes from queue on iteration {i}') - port_dict = self.modules_changes_queue.get(timeout=timeout / 1000) - logger.log_info(f'get_change_event() iteration {i} port_dict: {port_dict}') - except queue.Empty: - logger.log_info(f"failed to get item from modules changes queue on itertaion {i}") + fds_events = self.poll_obj.poll(timeout) + for fileno, _ in fds_events: + if fileno not in self.registered_fds: + logger.log_error(f'Unknown file no {fileno} from poll event, registered files are {self.registered_fds}') + continue + + sfp_index, fd = self.registered_fds[fileno] + fd.seek(0) + fd.read() + s = self._sfp_list[sfp_index] + sfp_status = s.get_module_status() + + if sfp_index in self.sfp_states_before_first_poll: + # Detecting dummy event + sfp_state_before_poll = self.sfp_states_before_first_poll[sfp_index] + self.sfp_states_before_first_poll.pop(sfp_index) + if sfp_state_before_poll == sfp_status: + # Ignore dummy event for the first poll, assume SDK only provide 1 dummy event + logger.log_debug(f'Ignore dummy event {sfp_status} for SFP {sfp_index}') + continue + + logger.log_notice(f'Got SFP event: index={sfp_index}, value={sfp_status}') + if sfp_status == sfp.SFP_STATUS_UNKNOWN: + # in the following sequence, STATUS_UNKNOWN can be returned. + # so we shouldn't raise exception here. + # 1. some sfp module is inserted + # 2. sfp_event gets stuck and fails to fetch the change event instantaneously + # 3. and then the sfp module is removed + # 4. sfp_event starts to try fetching the change event + logger.log_notice("unknown module state, maybe the port suffers two adjacent insertion/removal") + continue + + if sfp_status == sfp.SFP_STATUS_ERROR: + sfp_status, error_desc = s.get_error_info_from_sdk_error_type() + if error_desc: + error_dict[sfp_index + 1] = error_desc + port_dict[sfp_index + 1] = sfp_status if port_dict: + logger.log_notice(f'Sending SFP change event: {port_dict}, error event: {error_dict}') self.reinit_sfps(port_dict) - result_dict = {'sfp': port_dict} - result_dict['sfp_error'] = error_dict - return True, result_dict + return True, { + 'sfp': port_dict, + 'sfp_error': error_dict + } else: - if not wait_for_ever: + if not wait_forever: elapse = time.time() - begin - logger.log_info(f"get_change_event: wait_for_ever {wait_for_ever} elapse {elapse} iteartion {i}") if elapse * 1000 >= timeout: - logger.log_info(f"elapse {elapse} > timeout {timeout} iteartion {i} returning empty dict") return True, {'sfp': {}} - i += 1 def reinit_sfps(self, port_dict): """ diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py b/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py index aeceb15d1983..d24cca3a1080 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. +# Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. # Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -242,7 +242,7 @@ def get_cpld_component_list(cls): @classmethod @utils.read_only_cache() - def is_independent_mode(cls): + def is_module_host_management_mode(cls): from sonic_py_common import device_info _, hwsku_dir = device_info.get_paths_to_platform_and_hwsku_dirs() sai_profile_file = os.path.join(hwsku_dir, 'sai.profile') @@ -258,7 +258,7 @@ def wait_platform_ready(cls): """ conditions = [] sysfs_nodes = ['power_mode', 'power_mode_policy', 'present', 'reset', 'status', 'statuserror'] - if cls.is_independent_mode(): + if cls.is_module_host_management_mode(): sysfs_nodes.extend(['control', 'frequency', 'frequency_support', 'hw_present', 'hw_reset', 'power_good', 'power_limit', 'power_on', 'temperature/input']) else: diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/module_host_mgmt_initializer.py b/platform/mellanox/mlnx-platform-api/sonic_platform/module_host_mgmt_initializer.py new file mode 100644 index 000000000000..d9bec65987e0 --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/module_host_mgmt_initializer.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from . import utils +from sonic_py_common.logger import Logger + +import atexit +import os +import sys +import threading + +MODULE_READY_MAX_WAIT_TIME = 300 +MODULE_READY_CHECK_INTERVAL = 5 +MODULE_READY_CONTAINER_FILE = '/tmp/module_host_mgmt_ready' +MODULE_READY_HOST_FILE = '/tmp/nv-syncd-shared/module_host_mgmt_ready' +DEDICATE_INIT_DAEMON = 'xcvrd' +initialization_owner = False + +logger = Logger() + + +class ModuleHostMgmtInitializer: + """Responsible for initializing modules for host management mode. + """ + def __init__(self): + self.initialized = False + self.lock = threading.Lock() + + def initialize(self, chassis): + """Initialize all modules. Only applicable for module host management mode. + The real initialization job shall only be done in xcvrd. Only 1 owner is allowed + to to the initialization. Other daemon/CLI shall wait for the initialization done. + + Args: + chassis (object): chassis object + """ + global initialization_owner + if self.initialized: + return + + if utils.is_host(): + self.wait_module_ready() + chassis.initialize_sfp() + else: + if self.is_initialization_owner(): + if not self.initialized: + with self.lock: + if not self.initialized: + logger.log_notice('Starting module initialization for module host management...') + initialization_owner = True + self.remove_module_ready_file() + + chassis.initialize_sfp() + + from .sfp import SFP + SFP.initialize_sfp_modules(chassis._sfp_list) + + self.create_module_ready_file() + self.initialized = True + logger.log_notice('Module initialization for module host management done') + else: + self.wait_module_ready() + chassis.initialize_sfp() + + @classmethod + def create_module_ready_file(cls): + """Create module ready file + """ + with open(MODULE_READY_CONTAINER_FILE, 'w'): + pass + + @classmethod + def remove_module_ready_file(cls): + """Remove module ready file + """ + if os.path.exists(MODULE_READY_CONTAINER_FILE): + os.remove(MODULE_READY_CONTAINER_FILE) + + def wait_module_ready(self): + """Wait up to MODULE_READY_MAX_WAIT_TIME seconds for all modules to be ready + """ + if utils.is_host(): + module_ready_file = MODULE_READY_HOST_FILE + else: + module_ready_file = MODULE_READY_CONTAINER_FILE + + if os.path.exists(module_ready_file): + self.initialized = True + return + else: + print('Waiting module to be initialized...') + + if utils.wait_until(os.path.exists, MODULE_READY_MAX_WAIT_TIME, MODULE_READY_CHECK_INTERVAL, module_ready_file): + self.initialized = True + else: + logger.log_error('Module initialization timeout', True) + + def is_initialization_owner(self): + """Indicate whether current thread is the owner of doing module initialization + + Returns: + bool: True if current thread is the owner + """ + cmd = os.path.basename(sys.argv[0]) + return DEDICATE_INIT_DAEMON in cmd + +@atexit.register +def clean_up(): + """Remove module ready file when program exits. + When module host management is enabled, xcvrd is the dependency for all other + daemon/CLI who potentially uses SFP API. + """ + if initialization_owner: + ModuleHostMgmtInitializer.remove_module_ready_file() diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/modules_mgmt.py b/platform/mellanox/mlnx-platform-api/sonic_platform/modules_mgmt.py deleted file mode 100644 index 448e0ca06809..000000000000 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/modules_mgmt.py +++ /dev/null @@ -1,769 +0,0 @@ -# -# Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. -# Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import threading -import time -import queue -import os -import select -import traceback - -try: - from sonic_py_common.logger import Logger - from sonic_py_common import device_info, multi_asic - from .device_data import DeviceDataManager - from sonic_platform_base.sonic_xcvr.fields import consts - from sonic_platform_base.sonic_xcvr.api.public import cmis - from . import sfp as sfp_module - from . import utils - from swsscommon.swsscommon import SonicV2Connector -except ImportError as e: - raise ImportError (str(e) + "- required module not found") - -# Global logger class instance -logger = Logger() - -STATE_HW_NOT_PRESENT = "Initial state. module is not plugged to cage." -STATE_HW_PRESENT = "Module is plugged to cage" -STATE_MODULE_AVAILABLE = "Module hw present and power is good" -STATE_POWERED = "Module power is already loaded" -STATE_NOT_POWERED = "Module power is not loaded" -STATE_FW_CONTROL = "The module is not CMIS and FW needs to handle" -STATE_SW_CONTROL = "The module is CMIS and SW needs to handle" -STATE_ERROR_HANDLER = "An error occurred - read/write error, power limit or power cap." -STATE_POWER_LIMIT_ERROR = "The cage has not enough power for the plugged module" -STATE_SYSFS_ERROR = "An error occurred while writing/reading SySFS." - -SAI_PROFILE_FILE = "/{}/sai.profile" -SAI_INDEP_MODULE_MODE = "SAI_INDEPENDENT_MODULE_MODE" -SAI_INDEP_MODULE_MODE_DELIMITER = "=" -SAI_INDEP_MODULE_MODE_TRUE_STR = "1" -SYSFS_LEGACY_FD_PRESENCE = "/sys/module/sx_core/asic0/module{}/present" -ASIC_NUM = 0 -SYSFS_INDEPENDENT_FD_PREFIX_WO_MODULE = "/sys/module/sx_core/asic{}".format(ASIC_NUM) -SYSFS_INDEPENDENT_FD_PREFIX = SYSFS_INDEPENDENT_FD_PREFIX_WO_MODULE + "/module{}" -SYSFS_INDEPENDENT_FD_PRESENCE = os.path.join(SYSFS_INDEPENDENT_FD_PREFIX, "hw_present") -SYSFS_INDEPENDENT_FD_POWER_GOOD = os.path.join(SYSFS_INDEPENDENT_FD_PREFIX, "power_good") -SYSFS_INDEPENDENT_FD_POWER_ON = os.path.join(SYSFS_INDEPENDENT_FD_PREFIX, "power_on") -SYSFS_INDEPENDENT_FD_HW_RESET = os.path.join(SYSFS_INDEPENDENT_FD_PREFIX, "hw_reset") -SYSFS_INDEPENDENT_FD_POWER_LIMIT = os.path.join(SYSFS_INDEPENDENT_FD_PREFIX, "power_limit") -SYSFS_INDEPENDENT_FD_FW_CONTROL = os.path.join(SYSFS_INDEPENDENT_FD_PREFIX, "control") -# echo /sys/module/sx_core/$asic/$module/frequency // val: 0 - up to 400KHz, 1 - up to 1MHz -SYSFS_INDEPENDENT_FD_FREQ = os.path.join(SYSFS_INDEPENDENT_FD_PREFIX, "frequency") -SYSFS_INDEPENDENT_FD_FREQ_SUPPORT = os.path.join(SYSFS_INDEPENDENT_FD_PREFIX, "frequency_support") -IS_INDEPENDENT_MODULE = 'is_independent_module' -PROC_CMDLINE = "/proc/cmdline" -CMDLINE_STR_TO_LOOK_FOR = 'SONIC_BOOT_TYPE=' -CMDLINE_VAL_TO_LOOK_FOR = 'fastfast' - -MAX_EEPROM_ERROR_RESET_RETRIES = 4 - - -class ModulesMgmtTask(threading.Thread): - - def __init__(self, namespaces=None, main_thread_stop_event=None, q=None): - threading.Thread.__init__(self) - self.name = "ModulesMgmtTask" - self.main_thread_stop_event = main_thread_stop_event - self.sfp_port_dict_initial = {} - self.sfp_port_dict = {} - self.sfp_changes_dict = {} - self.sfp_delete_list_from_port_dict = [] - self.namespaces = namespaces - self.modules_changes_queue = q - self.is_supported_indep_mods_system = False - self.modules_lock_list = [] - # A set to hold those modules waiting 3 seconds since power on and hw reset - self.waiting_modules_list = set() - self.timer = threading.Thread() - self.poll_obj = None - self.fds_mapping_to_obj = {} - self.port_to_fds = {} - self.fds_events_count_dict = {} - self.delete_ports_and_reset_states_dict = {} - self.setName("ModulesMgmtTask") - self.register_hw_present_fds = [] - self.is_warm_reboot = False - self.port_control_dict = {} - - # SFPs state machine - def get_sm_func(self, sm, port): - SFP_SM_ENUM = {STATE_HW_NOT_PRESENT: self.check_if_hw_present - , STATE_HW_PRESENT: self.check_if_power_on - , STATE_NOT_POWERED: self.power_on_module - , STATE_POWERED: self.check_if_module_available - , STATE_MODULE_AVAILABLE: self.check_module_type - , STATE_FW_CONTROL: self.save_module_control_mode - , STATE_SW_CONTROL: self.save_module_control_mode - , STATE_ERROR_HANDLER: STATE_ERROR_HANDLER - , STATE_POWER_LIMIT_ERROR: STATE_POWER_LIMIT_ERROR - , STATE_SYSFS_ERROR: STATE_SYSFS_ERROR - } - logger.log_info("getting func for state {} for port {}".format(sm, port)) - try: - func = SFP_SM_ENUM[sm] - logger.log_info("got func {} for state {} for port {}".format(func, sm, port)) - return func - except KeyError as e: - logger.log_error("exception {} for port {} sm {}".format(e, port, sm)) - return None - - def run(self): - # check first if the system supports independent mode and set boolean accordingly - (platform_path, hwsku_dir) = device_info.get_paths_to_platform_and_hwsku_dirs() - logger.log_info("hwsku_dir {} found, continue to check sai.profile file".format(hwsku_dir)) - independent_file = SAI_PROFILE_FILE.format(hwsku_dir) - if os.path.isfile(independent_file): - logger.log_info("file {} found, checking content for independent mode value".format(independent_file)) - with open(independent_file, "r") as independent_file_fd: - found = False - independent_file_content = ' ' - logger.log_info("file {} found, checking content for independent mode value".format(independent_file)) - while independent_file_content and not found: - independent_file_content = independent_file_fd.readline() - if SAI_INDEP_MODULE_MODE in independent_file_content and \ - SAI_INDEP_MODULE_MODE_DELIMITER in independent_file_content: - independent_file_splitted = independent_file_content.split(SAI_INDEP_MODULE_MODE_DELIMITER) - if (len(independent_file_splitted) > 1): - self.is_supported_indep_mods_system = int(independent_file_splitted[1]) == int(SAI_INDEP_MODULE_MODE_TRUE_STR) - logger.log_info("file {} found, system will work in independent mode".format(independent_file)) - logger.log_info("value of indep mode var: {} found in file".format(independent_file_splitted[1])) - found = True - else: - logger.log_info("file {} not found, system stays in legacy mode".format(independent_file)) - - # static init - at first go over all ports and check each one if it's independent module or legacy - self.sfp_changes_dict = {} - # check for each port if the module connected and if it supports independent mode or legacy - num_of_ports = DeviceDataManager.get_sfp_count() - # create the modules sysfs fds poller - self.poll_obj = select.poll() - # read cmdline to check if warm reboot done. cannot use swsscommon warmstart since this code runs after - # warm-reboot is finished. if done, need to read control sysfs per port and act accordingly since modules are - # not reset in warm-reboot - cmdline_dict = {} - proc_cmdline_str = utils.read_str_from_file(PROC_CMDLINE) - if CMDLINE_STR_TO_LOOK_FOR in proc_cmdline_str: - cmdline_dict[CMDLINE_STR_TO_LOOK_FOR] = proc_cmdline_str.split(CMDLINE_STR_TO_LOOK_FOR)[1] - if CMDLINE_STR_TO_LOOK_FOR in cmdline_dict.keys(): - self.is_warm_reboot = cmdline_dict[CMDLINE_STR_TO_LOOK_FOR] == CMDLINE_VAL_TO_LOOK_FOR - logger.log_info(f"system was warm rebooted is_warm_reboot: {self.is_warm_reboot}") - for port in range(num_of_ports): - # check sysfs per port whether it's independent mode or legacy - temp_module_sm = ModuleStateMachine(port_num=port, initial_state=STATE_HW_NOT_PRESENT - , current_state=STATE_HW_NOT_PRESENT) - module_fd_indep_path = SYSFS_INDEPENDENT_FD_PRESENCE.format(port) - logger.log_info("system in indep mode: {} port {}".format(self.is_supported_indep_mods_system, port)) - if self.is_warm_reboot: - logger.log_info("system was warm rebooted is_warm_reboot: {} trying to read control sysfs for port {}" - .format(self.is_warm_reboot, port)) - port_control_file = SYSFS_INDEPENDENT_FD_FW_CONTROL.format(port) - try: - port_control = utils.read_int_from_file(port_control_file, raise_exception=True) - self.port_control_dict[port] = port_control - logger.log_info(f"port control sysfs is {port_control} for port {port}") - except Exception as e: - logger.log_error("exception {} for port {} trying to read port control sysfs {}" - .format(e, port, port_control_file)) - if (self.is_supported_indep_mods_system and os.path.isfile(module_fd_indep_path)) \ - and not (self.is_warm_reboot and 0 == port_control): - logger.log_info("system in indep mode: {} port {} reading file {}".format(self.is_supported_indep_mods_system, port, module_fd_indep_path)) - temp_module_sm.set_is_indep_modules(True) - temp_module_sm.set_module_fd_path(module_fd_indep_path) - module_fd = open(module_fd_indep_path, "r") - temp_module_sm.set_module_fd(module_fd) - else: - module_fd_legacy_path = self.get_sysfs_ethernet_port_fd(SYSFS_LEGACY_FD_PRESENCE, port) - temp_module_sm.set_module_fd_path(module_fd_legacy_path) - module_fd = open(module_fd_legacy_path, "r") - temp_module_sm.set_module_fd(module_fd) - # add lock to use with timer task updating next state per module object - self.modules_lock_list.append(threading.Lock()) - # start SM for this independent module - logger.log_info("adding temp_module_sm {} to sfp_port_dict".format(temp_module_sm)) - self.sfp_port_dict_initial[port] = temp_module_sm - self.sfp_port_dict[port] = temp_module_sm - - i = 0 - # need at least 1 module in final state until it makes sense to send changes dict - is_final_state_module = False - all_static_detection_done = False - logger.log_info(f"sfp_port_dict before starting static detection: {self.sfp_port_dict} main_thread_stop_event: " - f"{self.main_thread_stop_event.is_set()} all_static_detection_done: {all_static_detection_done}") - # static detection - loop on different state for all ports until all done - while not self.main_thread_stop_event.is_set() and not all_static_detection_done: - logger.log_info("static detection running iteration {}".format(i)) - waiting_list_len = len(self.waiting_modules_list) - sfp_port_dict_keys_len = len(self.sfp_port_dict.keys()) - if waiting_list_len == sfp_port_dict_keys_len: - logger.log_info("static detection length of waiting list {}: {} and sfp port dict keys {}:{} is the same, sleeping 1 second..." - .format(waiting_list_len, self.waiting_modules_list, sfp_port_dict_keys_len, self.sfp_port_dict.keys())) - time.sleep(1) - else: - logger.log_info("static detectionlength of waiting list {}: {} and sfp port dict keys {}: {} is different, NOT sleeping 1 second" - .format(waiting_list_len, self.waiting_modules_list, sfp_port_dict_keys_len, self.sfp_port_dict.keys())) - for port_num, module_sm_obj in self.sfp_port_dict.items(): - curr_state = module_sm_obj.get_current_state() - logger.log_info(f'static detection STATE_LOG {port_num}: curr_state is {curr_state}') - func = self.get_sm_func(curr_state, port_num) - logger.log_info("static detection got returned func {} for state {}".format(func, curr_state)) - try: - if not isinstance(func, str): - if func is not None: - next_state = func(port_num, module_sm_obj) - except TypeError as e: - logger.log_info("static detection exception {} for port {} traceback:\n{}".format(e, port_num, traceback.format_exc())) - module_sm_obj.set_final_state(STATE_ERROR_HANDLER) - continue - logger.log_info(f'static detection STATE_LOG {port_num}: next_state is {next_state}') - if self.timer.is_alive(): - logger.log_info("static detection timer threads is alive, acquiring lock") - self.modules_lock_list[port_num].acquire() - # for STATE_NOT_POWERED we dont advance to next state, timerTask is doing it into STATE_POWERED - if curr_state != STATE_NOT_POWERED or not module_sm_obj.wait_for_power_on: - module_sm_obj.set_next_state(next_state) - module_sm_obj.advance_state() - if module_sm_obj.get_final_state(): - logger.log_info(f'static detection STATE_LOG {port_num}: enter final state {module_sm_obj.get_final_state()}') - is_final_state_module = True - if self.timer.is_alive(): - self.modules_lock_list[port_num].release() - is_timer_alive = self.timer.is_alive() - logger.log_info("static detection timer thread is_alive {} port {}".format(is_timer_alive, port_num)) - if STATE_NOT_POWERED == curr_state: - if not is_timer_alive: - logger.log_info ("static detection curr_state is {} and timer thread is_alive {}, running timer task thread" - .format(curr_state, is_timer_alive)) - # call timer task - self.timer = threading.Timer(1.0, self.timerTask) - self.timer.start() - if self.timer.is_alive(): - logger.log_info("timer thread is_alive {}, locking module obj".format(self.timer.is_alive())) - self.modules_lock_list[port_num].acquire() - module_sm_obj.set_next_state(next_state) - if self.timer.is_alive(): - logger.log_info("timer thread is_alive {}, releasing module obj".format(self.timer.is_alive())) - self.modules_lock_list[port_num].release() - - if is_final_state_module: - self.map_ports_final_state() - self.delete_ports_from_dict() - self.send_changes_to_shared_queue() - self.register_presece_closed_ports(False, self.register_hw_present_fds) - i += 1 - self.register_hw_present_fds = [] - logger.log_info("sfp_port_dict: {}".format(self.sfp_port_dict)) - for port_num, module_sm_obj in self.sfp_port_dict.items(): - logger.log_info("static detection port_num: {} initial state: {} current_state: {} next_state: {}" - .format(port_num, module_sm_obj.initial_state, module_sm_obj.get_current_state() - , module_sm_obj.get_next_state())) - sfp_port_dict_keys_len = len(self.sfp_port_dict.keys()) - if sfp_port_dict_keys_len == 0: - logger.log_info("static detection len of keys of sfp_port_dict is 0: {}".format(sfp_port_dict_keys_len)) - all_static_detection_done = True - else: - logger.log_info("static detection len of keys of sfp_port_dict is not 0: {}".format(sfp_port_dict_keys_len)) - logger.log_info("static detection all_static_detection_done: {}".format(all_static_detection_done)) - - logger.log_info(f"sfp_port_dict before dynamic detection: {self.sfp_port_dict} " - f"main_thread_stop_event.is_set(): {self.main_thread_stop_event.is_set()}") - # dynamic detection - loop on polling changes, run state machine for them and put them into shared queue - i = 0 - # need at least 1 module in final state until it makes sense to send changes dict - is_final_state_module = False - # initialize fds events count to 0 - for fd_fileno in self.fds_mapping_to_obj: - module_obj = self.fds_mapping_to_obj[fd_fileno]['module_obj'] - # for debug purposes - self.fds_events_count_dict[module_obj.port_num] = { 'presence' : 0 , 'power_good' : 0 } - while not self.main_thread_stop_event.is_set(): - logger.log_info("dynamic detection running iteration {}".format(i)) - # poll for changes with 1 second timeout - fds_events = self.poll_obj.poll(1000) - logger.log_info("dynamic detection polled obj checking fds_events iteration {}".format(i)) - for fd, event in fds_events: - # get modules object from fd according to saved key-value of fd-module obj saved earlier - logger.log_info("dynamic detection working on fd {} event {}".format(fd, event)) - module_obj = self.fds_mapping_to_obj[fd]['module_obj'] - module_fd = self.fds_mapping_to_obj[fd]['fd'] - fd_name = self.fds_mapping_to_obj[fd]['fd_name'] - if 'presence' == fd_name: - module_fd_path = module_obj.module_fd_path - elif 'power_good' == fd_name: - module_fd_path = module_obj.module_power_good_fd_path - self.fds_events_count_dict[module_obj.port_num][fd_name] += 1 - try: - module_fd.seek(0) - val = module_fd.read().strip() - logger.log_info("dynamic detection got module_obj {} with port {} from fd number {} path {} val {} count {}" - .format(module_obj, module_obj.port_num, fd, module_fd_path - , val, self.fds_events_count_dict[module_obj.port_num])) - if self.is_dummy_event(int(val), module_obj): - logger.log_info(f"dynamic detection dummy event port {module_obj.port_num} from fd number {fd}") - continue - if module_obj.port_num not in self.sfp_port_dict.keys(): - logger.log_info("dynamic detection port {} not found in sfp_port_dict keys: {} adding it" - .format(module_obj.port_num, self.sfp_port_dict.keys())) - self.deregister_fd_from_polling(module_obj.port_num) - # put again module obj in sfp_port_dict so next loop will work on it - self.sfp_port_dict[module_obj.port_num] = module_obj - self.delete_ports_and_reset_states_dict[module_obj.port_num] = val - except Exception as e: - logger.log_error("dynamic detection exception on read presence {} for port {} fd name {} traceback:\n{}" - .format(e, module_obj.port_num, module_fd.name, traceback.format_exc())) - for port, val in self.delete_ports_and_reset_states_dict.items(): - logger.log_info(f"dynamic detection resetting all states for port {port} close_presence_ports {val}") - module_obj = self.sfp_port_dict[port] - module_obj.reset_all_states(close_presence_ports=val) - self.delete_ports_and_reset_states_dict = {} - for port_num, module_sm_obj in self.sfp_port_dict.items(): - curr_state = module_sm_obj.get_current_state() - logger.log_info(f'dynamic detection STATE_LOG {port_num}: curr_state is {curr_state}') - func = self.get_sm_func(curr_state, port) - logger.log_info("dynamic detection got returned func {} for state {}".format(func, curr_state)) - try: - if func is not None: - next_state = func(port_num, module_sm_obj, dynamic=True) - except TypeError as e: - logger.log_info("exception {} for port {}".format(e, port_num)) - continue - logger.log_info(f'dynamic detection STATE_LOG {port_num}: next_state is {next_state}') - if self.timer.is_alive(): - logger.log_info("dynamic detection timer threads is alive, acquiring lock") - self.modules_lock_list[port_num].acquire() - if curr_state != STATE_NOT_POWERED or not module_sm_obj.wait_for_power_on: - module_sm_obj.set_next_state(next_state) - module_sm_obj.advance_state() - if module_sm_obj.get_final_state(): - logger.log_info(f'dynamic detection STATE_LOG {port_num}: enter final state {module_sm_obj.get_final_state()}') - is_final_state_module = True - if self.timer.is_alive(): - self.modules_lock_list[port_num].release() - is_timer_alive = self.timer.is_alive() - logger.log_info("dynamic detection timer thread is_alive {} port {}".format(is_timer_alive, port_num)) - if STATE_NOT_POWERED == curr_state: - if not is_timer_alive: - logger.log_info("dynamic detection curr_state is {} and timer thread is_alive {}, running timer task thread" - .format(curr_state, is_timer_alive)) - # call timer task - self.timer = threading.Timer(1.0, self.timerTask) - self.timer.start() - if self.timer.is_alive(): - logger.log_info("dynamic detection timer thread is_alive {}, locking module obj".format(self.timer.is_alive())) - self.modules_lock_list[port_num].acquire() - module_sm_obj.set_next_state(next_state) - if self.timer.is_alive(): - logger.log_info( - "dynamic detection timer thread is_alive {}, releasing module obj".format(self.timer.is_alive())) - self.modules_lock_list[port_num].release() - - if is_final_state_module: - self.map_ports_final_state(dynamic=True) - self.delete_ports_from_dict(dynamic=True) - self.send_changes_to_shared_queue(dynamic=True) - self.register_presece_closed_ports(True, self.register_hw_present_fds) - if not self.sfp_port_dict and is_final_state_module: - is_final_state_module = False - logger.log_info(f"sft_port_dict is empty {self.sfp_port_dict}, set is_final_state_module to {is_final_state_module}") - self.register_hw_present_fds = [] - i += 1 - logger.log_info("sfp_port_dict: {}".format(self.sfp_port_dict)) - for port_num, module_sm_obj in self.sfp_port_dict.items(): - logger.log_info("port_num: {} module_sm_obj initial state: {} current_state: {} next_state: {}" - .format(port_num, module_sm_obj.initial_state, module_sm_obj.get_current_state(), module_sm_obj.get_next_state())) - - def is_dummy_event(self, val, module_sm_obj): - if val == 1: - return module_sm_obj.final_state in (STATE_HW_PRESENT, STATE_SW_CONTROL, STATE_FW_CONTROL) - elif val == 0: - return module_sm_obj.final_state in (STATE_HW_NOT_PRESENT,) - return False - - def check_if_hw_present(self, port, module_sm_obj, dynamic=False): - detection_method = 'dynamic' if dynamic else 'static' - logger.log_info(f"{detection_method} detection enter check_if_hw_present port {port} module_sm_obj {module_sm_obj}") - module_fd_indep_path = module_sm_obj.module_fd_path - if os.path.isfile(module_fd_indep_path): - try: - val_int = utils.read_int_from_file(module_fd_indep_path) - if 0 == val_int: - logger.log_info("returning {} for val {}".format(STATE_HW_NOT_PRESENT, val_int)) - retval_state = STATE_HW_NOT_PRESENT - module_sm_obj.set_final_state(retval_state, detection_method) - return retval_state - elif 1 == val_int: - logger.log_info("returning {} for val {}".format(STATE_HW_PRESENT, val_int)) - retval_state = STATE_HW_PRESENT - if not self.is_supported_indep_mods_system or (self.is_warm_reboot and 0 == self.port_control_dict[port] and not dynamic): - module_sm_obj.set_final_state(retval_state, detection_method) - self.register_fd_for_polling(module_sm_obj, module_sm_obj.module_fd, 'presence') - return retval_state - except Exception as e: - logger.log_info("exception {} for port {} setting final state STATE_ERROR_HANDLER".format(e, port)) - module_sm_obj.set_final_state(STATE_ERROR_HANDLER) - return STATE_ERROR_HANDLER - module_sm_obj.set_final_state(STATE_HW_NOT_PRESENT, detection_method) - return STATE_HW_NOT_PRESENT - - def check_if_module_available(self, port, module_sm_obj, dynamic=False): - logger.log_info("enter check_if_module_available port {} module_sm_obj {}".format(port, module_sm_obj)) - module_fd_indep_path = SYSFS_INDEPENDENT_FD_POWER_GOOD.format(port) - if os.path.isfile(module_fd_indep_path): - try: - # not using utils.read_int_from_file since need to catch the exception here if no such file or it is - # not accesible. utils.read_int_from_file will return 0 in such a case - module_power_good_fd = open(module_fd_indep_path, "r") - val = module_power_good_fd.read() - val_int = int(val) - module_sm_obj.module_power_good_fd_path = module_fd_indep_path - module_sm_obj.module_power_good_fd = module_power_good_fd - - if 0 == val_int: - logger.log_info(f'port {port} power is not good') - module_sm_obj.set_final_state(STATE_HW_NOT_PRESENT) - return STATE_HW_NOT_PRESENT - elif 1 == val_int: - logger.log_info(f'port {port} power is good') - return STATE_MODULE_AVAILABLE - except Exception as e: - logger.log_info("exception {} for port {}".format(e, port)) - return STATE_HW_NOT_PRESENT - logger.log_info(f'port {port} has no power good file {module_fd_indep_path}') - module_sm_obj.set_final_state(STATE_HW_NOT_PRESENT) - return STATE_HW_NOT_PRESENT - - def check_if_power_on(self, port, module_sm_obj, dynamic=False): - logger.log_info(f'enter check_if_power_on for port {port}') - module_fd_indep_path = SYSFS_INDEPENDENT_FD_POWER_ON.format(port) - if os.path.isfile(module_fd_indep_path): - try: - val_int = utils.read_int_from_file(module_fd_indep_path) - if 0 == val_int: - logger.log_info(f'check_if_power_on port {port} is not powered') - return STATE_NOT_POWERED - elif 1 == val_int: - logger.log_info(f'check_if_power_on port {port} is powered') - return STATE_POWERED - except Exception as e: - logger.log_info(f'check_if_power_on got exception {e}') - module_sm_obj.set_final_state(STATE_HW_NOT_PRESENT) - return STATE_HW_NOT_PRESENT - - def power_on_module(self, port, module_sm_obj, dynamic=False): - logger.log_info(f'enter power_on_module for port {port}') - if not module_sm_obj.wait_for_power_on: - module_fd_indep_path_po = SYSFS_INDEPENDENT_FD_POWER_ON.format(port) - module_fd_indep_path_r = SYSFS_INDEPENDENT_FD_HW_RESET.format(port) - try: - if os.path.isfile(module_fd_indep_path_po): - logger.log_info("powerOnModule powering on via {} for port {}".format(module_fd_indep_path_po, port)) - # echo 1 > /sys/module/sx_core/$asic/$module/power_on - utils.write_file(module_fd_indep_path_po, "1") - if os.path.isfile(module_fd_indep_path_r): - logger.log_info("powerOnModule resetting via {} for port {}".format(module_fd_indep_path_r, port)) - # de-assert hw_reset - low polarity. 1 for de-assert 0 for assert - # echo 1 > /sys/module/sx_core/$asic/$module/hw_reset - utils.write_file(module_fd_indep_path_r, "1") - self.add_port_to_wait_reset(module_sm_obj) - except Exception as e: - logger.log_info("exception in powerOnModule {} for port {}".format(e, port)) - return STATE_HW_NOT_PRESENT - return STATE_NOT_POWERED - - def check_module_type(self, port, module_sm_obj, dynamic=False): - logger.log_info("enter check_module_type port {} module_sm_obj {}".format(port, module_sm_obj)) - sfp = sfp_module.SFP(port) - xcvr_api = sfp.get_xcvr_api() - if not xcvr_api: - logger.log_info("check_module_type calling sfp reinit for port {} module_sm_obj {}" - .format(port, module_sm_obj)) - sfp.reinit() - logger.log_info("check_module_type setting as FW control as xcvr_api is empty for port {} module_sm_obj {}" - .format(port, module_sm_obj)) - return STATE_FW_CONTROL - # QSFP-DD ID is 24, OSFP ID is 25 - only these 2 are supported currently as independent module - SW controlled - if not isinstance(xcvr_api, cmis.CmisApi): - logger.log_info("check_module_type setting STATE_FW_CONTROL for {} in check_module_type port {} module_sm_obj {}" - .format(xcvr_api, port, module_sm_obj)) - return STATE_FW_CONTROL - else: - if xcvr_api.is_flat_memory(): - logger.log_info("check_module_type port {} setting STATE_FW_CONTROL module ID {} due to flat_mem device" - .format(xcvr_api, port)) - return STATE_FW_CONTROL - logger.log_info("check_module_type checking power cap for {} in check_module_type port {} module_sm_obj {}" - .format(xcvr_api, port, module_sm_obj)) - power_cap = self.check_power_cap(port, module_sm_obj) - if power_cap is STATE_POWER_LIMIT_ERROR: - module_sm_obj.set_final_state(STATE_POWER_LIMIT_ERROR) - return STATE_POWER_LIMIT_ERROR - else: - # first read the frequency support - if it's 1 then continue, if it's 0 no need to do anything - module_fd_freq_support_path = SYSFS_INDEPENDENT_FD_FREQ_SUPPORT.format(port) - val_int = utils.read_int_from_file(module_fd_freq_support_path) - if 1 == val_int: - # read the module maximum supported clock of Management Comm Interface (MCI) from module EEPROM. - # from byte 2 bits 3-2: - # 00b means module supports up to 400KHz - # 01b means module supports up to 1MHz - logger.log_info(f"check_module_type reading mci max frequency for port {port}") - read_mci = xcvr_api.xcvr_eeprom.read_raw(2, 1) - logger.log_info(f"check_module_type read mci max frequency {read_mci} for port {port}") - mci_bits = read_mci & 0b00001100 - logger.log_info(f"check_module_type read mci max frequency bits {mci_bits} for port {port}") - # Then, set it to frequency Sysfs using: - # echo > /sys/module/sx_core/$asic/$module/frequency // val: 0 - up to 400KHz, 1 - up to 1MHz - indep_fd_freq = SYSFS_INDEPENDENT_FD_FREQ.format(port) - utils.write_file(indep_fd_freq, mci_bits) - return STATE_SW_CONTROL - - def check_power_cap(self, port, module_sm_obj, dynamic=False): - logger.log_info("enter check_power_cap port {} module_sm_obj {}".format(port, module_sm_obj)) - sfp = sfp_module.SFP(port) - xcvr_api = sfp.get_xcvr_api() - field = xcvr_api.xcvr_eeprom.mem_map.get_field(consts.MAX_POWER_FIELD) - powercap_ba = xcvr_api.xcvr_eeprom.reader(field.get_offset(), field.get_size()) - logger.log_info("check_power_cap got powercap bytearray {} for port {} module_sm_obj {}".format(powercap_ba, port, module_sm_obj)) - powercap = int.from_bytes(powercap_ba, "big") - logger.log_info("check_power_cap got powercap {} for port {} module_sm_obj {}".format(powercap, port, module_sm_obj)) - indep_fd_power_limit = self.get_sysfs_ethernet_port_fd(SYSFS_INDEPENDENT_FD_POWER_LIMIT, port) - cage_power_limit = utils.read_int_from_file(indep_fd_power_limit) - logger.log_info("check_power_cap got cage_power_limit {} for port {} module_sm_obj {}".format(cage_power_limit, port, module_sm_obj)) - if powercap > int(cage_power_limit): - logger.log_info("check_power_cap powercap {} != cage_power_limit {} for port {} module_sm_obj {}".format(powercap, cage_power_limit, port, module_sm_obj)) - module_sm_obj.set_final_state(STATE_POWER_LIMIT_ERROR) - return STATE_POWER_LIMIT_ERROR - - def save_module_control_mode(self, port, module_sm_obj, dynamic=False): - detection_method = 'dynamic' if dynamic else 'static' - logger.log_info("{} detection save_module_control_mode setting current state {} for port {} as final state" - .format(detection_method, module_sm_obj.get_current_state(), port)) - state = module_sm_obj.get_current_state() - module_sm_obj.set_final_state(state) - try: - if state == STATE_FW_CONTROL: - # echo 0 > /sys/module/sx_core/$asic/$module/control - indep_fd_fw_control = SYSFS_INDEPENDENT_FD_FW_CONTROL.format(port) - utils.write_file(indep_fd_fw_control, "0") - logger.log_info("save_module_control_mode set FW control for state {} port {}".format(state, port)) - # update the presence sysfs fd to legacy FD presence, first close the previous fd - module_sm_obj.module_fd.close() - module_fd_legacy_path = SYSFS_LEGACY_FD_PRESENCE.format(port) - module_sm_obj.set_module_fd_path(module_fd_legacy_path) - module_fd = open(module_fd_legacy_path, "r") - module_sm_obj.set_module_fd(module_fd) - logger.log_info("save_module_control_mode changed module fd to legacy present for port {}".format(port)) - else: - # registering power good sysfs even if not good, so we can get an event from poller upon changes - self.register_fd_for_polling(module_sm_obj, module_sm_obj.module_power_good_fd, 'power_good') - # register the module's sysfs fd to poller with ERR and PRI attrs - logger.log_info("save_module_control_mode registering sysfs fd {} number {} path {} for port {}" - .format(module_sm_obj.module_fd, module_sm_obj.module_fd.fileno(), module_sm_obj.set_module_fd_path, port)) - except Exception as e: - logger.log_error("{} detection exception on read presence {} for port {} fd name {} traceback:\n{}" - .format(detection_method, e, port, module_sm_obj.module_fd.name, traceback.format_exc())) - self.register_fd_for_polling(module_sm_obj, module_sm_obj.module_fd, 'presence') - logger.log_info("save_module_control_mode set current state {} for port {} as final state {}".format( - module_sm_obj.get_current_state(), port, module_sm_obj.get_final_state())) - - def register_fd_for_polling(self, module_sm_obj, fd, fd_name): - self.fds_mapping_to_obj[fd.fileno()] = {'module_obj' : module_sm_obj, - 'fd': fd, - 'fd_name' : fd_name} - if module_sm_obj.port_num not in self.port_to_fds: - self.port_to_fds[module_sm_obj.port_num] = [fd] - else: - self.port_to_fds[module_sm_obj.port_num].append(fd) - self.poll_obj.register(fd, select.POLLERR | select.POLLPRI) - - def deregister_fd_from_polling(self, port): - if port in self.port_to_fds: - fds = self.port_to_fds[port] - for fd in fds: - self.fds_mapping_to_obj.pop(fd.fileno()) - self.poll_obj.unregister(fd) - self.port_to_fds.pop(port) - - def timerTask(self): # wakes up every 1 second - logger.log_info("timerTask entered run state") - empty = False - i = 0 - while not empty: - logger.log_info("timerTask while loop itartion {}".format(i)) - empty = True - port_list_to_delete = [] - for port in self.waiting_modules_list: - logger.log_info("timerTask working on port {}".format(port)) - empty = False - module = self.sfp_port_dict[port] - logger.log_info("timerTask got module with port_num {} from port {}".format(module.port_num, port)) - state = module.get_current_state() - if module and state == STATE_NOT_POWERED: - logger.log_info("timerTask module {} current_state {} counting seconds since reset_start_time" - .format(module, module.get_current_state())) - if time.time() - module.reset_start_time >= 3: - # set next state as STATE_POWERED state to trigger the function of check module type - logger.log_info("timerTask module port {} locking lock of port {}".format(module.port_num, module.port_num)) - self.modules_lock_list[module.port_num].acquire() - logger.log_info("timerTask module port {} setting next state to STATE_POWERED".format(module.port_num)) - module.set_next_state(STATE_POWERED) - logger.log_info("timerTask module port {} advancing next state".format(module.port_num)) - module.advance_state() - logger.log_info("timerTask module port {} releasing lock of port {}".format(port, module.port_num)) - self.modules_lock_list[module.port_num].release() - logger.log_info("timerTask module port {} adding to delete list to remove from waiting_modules_list".format(module.port_num)) - port_list_to_delete.append(module.port_num) - logger.log_info("timerTask deleting ports {} from waiting_modules_list...".format(port_list_to_delete)) - for port in port_list_to_delete: - logger.log_info("timerTask deleting port {} from waiting_modules_list".format(port)) - self.waiting_modules_list.remove(port) - logger.log_info("timerTask waiting_modules_list after deletion: {}".format(self.waiting_modules_list)) - time.sleep(1) - i += 1 - - def get_sysfs_ethernet_port_fd(self, sysfs_fd, port): - sysfs_eth_port_fd = sysfs_fd.format(port) - return sysfs_eth_port_fd - - def add_port_to_wait_reset(self, module_sm_obj): - module_sm_obj.reset_start_time = time.time() - logger.log_info("add_port_to_wait_reset reset_start_time {}".format(module_sm_obj.reset_start_time)) - module_sm_obj.wait_for_power_on = True - logger.log_info("add_port_to_wait_reset wait_for_power_on {}".format(module_sm_obj.wait_for_power_on)) - self.waiting_modules_list.add(module_sm_obj.port_num) - logger.log_info("add_port_to_wait_reset waiting_list after adding: {}".format(self.waiting_modules_list)) - - def map_ports_final_state(self, dynamic=False): - detection_method = 'dynamic' if dynamic else 'static' - logger.log_info(f"{detection_method} detection enter map_ports_final_state") - for port, module_obj in self.sfp_port_dict.items(): - final_state = module_obj.get_final_state() - if final_state: - # add port to delete list that we will iterate on later and delete the ports from sfp_port_dict - self.sfp_delete_list_from_port_dict.append(port) - if final_state in [STATE_HW_NOT_PRESENT, STATE_POWER_LIMIT_ERROR, STATE_ERROR_HANDLER]: - port_status = '0' - logger.log_info(f"{detection_method} detection adding port {port} to register_hw_present_fds") - self.register_hw_present_fds.append(module_obj) - else: - port_status = '1' - self.sfp_changes_dict[str(module_obj.port_num + 1)] = port_status - - def delete_ports_from_dict(self, dynamic=False): - detection_method = 'dynamic' if dynamic else 'static' - logger.log_info(f"{detection_method} detection sfp_port_dict before deletion: {self.sfp_port_dict}") - for port in self.sfp_delete_list_from_port_dict: - del self.sfp_port_dict[port] - self.sfp_delete_list_from_port_dict = [] - logger.log_info("{} detection sfp_port_dict after deletion: {}".format(detection_method, self.sfp_port_dict)) - - def send_changes_to_shared_queue(self, dynamic=False): - detection_method = 'dynamic' if dynamic else 'static' - if self.sfp_changes_dict: - logger.log_info(f"{detection_method} detection putting sfp_changes_dict {self.sfp_changes_dict} " - f"in modules changes queue...") - try: - self.modules_changes_queue.put(self.sfp_changes_dict, timeout=1) - self.sfp_changes_dict = {} - logger.log_info(f"{detection_method} sfp_changes_dict after put changes: {self.sfp_changes_dict}") - except queue.Full: - logger.log_info(f"{detection_method} failed to put item from modules changes queue, queue is full") - else: - logger.log_info(f"{detection_method} sfp_changes_dict {self.sfp_changes_dict} is empty...") - - def register_presece_closed_ports(self, dynamic=False, module_obj_list=[]): - detection_method = 'dynamic' if dynamic else 'static' - logger.log_info(f"{detection_method} detection enter register_presence_closed_ports") - for module_obj in module_obj_list: - port = module_obj.port_num - if self.is_supported_indep_mods_system: - module_fd_indep_path = SYSFS_INDEPENDENT_FD_PRESENCE.format(port) - else: - module_fd_indep_path = SYSFS_LEGACY_FD_PRESENCE.format(port) - module_obj.set_module_fd_path(module_fd_indep_path) - module_fd = open(module_fd_indep_path, "r") - module_obj.set_module_fd(module_fd) - logger.log_info(f"{detection_method} registering fd {module_fd} fd name {module_fd.name} for port {port}") - self.register_fd_for_polling(module_obj, module_fd, 'presence') - -class ModuleStateMachine(object): - - def __init__(self, port_num=0, initial_state=STATE_HW_NOT_PRESENT, current_state=STATE_HW_NOT_PRESENT - , next_state=STATE_HW_NOT_PRESENT, final_state='', is_indep_module=False - , module_fd_path='', module_fd=None, reset_start_time=None - , eeprom_poweron_reset_retries=1, module_power_good_fd_path=None, module_power_good_fd=None): - - self.port_num = port_num - self.initial_state = initial_state - self.current_state = current_state - self.next_state = next_state - self.final_state = final_state - self.is_indep_modules = is_indep_module - self.module_fd_path = module_fd_path - self.module_fd = module_fd - self.reset_start_time = reset_start_time - self.wait_for_power_on = False - self.eeprom_poweron_reset_retries = eeprom_poweron_reset_retries - self.module_power_good_fd_path = module_power_good_fd_path - self.module_power_good_fd = module_power_good_fd - - def set_initial_state(self, state): - self.initial_state = state - - def get_current_state(self): - return self.current_state - - def set_current_state(self, state): - self.current_state = state - - def get_next_state(self): - return self.next_state - - def set_next_state(self, state): - self.next_state = state - - def get_final_state(self): - return self.final_state - - def set_final_state(self, state, detection_method='static'): - logger.log_info(f"{detection_method} set_final_state setting {state} port {self.port_num}") - self.final_state = state - - def advance_state(self): - self.set_current_state(self.next_state) - self.next_state = '' - - def set_is_indep_modules(self, is_indep_modules): - self.is_indep_modules = is_indep_modules - - def set_module_fd_path(self, module_fd_path): - self.module_fd_path = module_fd_path - - def set_module_fd(self, module_fd): - self.module_fd = module_fd - - def reset_all_states(self, def_state=STATE_HW_NOT_PRESENT, retries=1, close_presence_ports='0'): - self.initial_state = def_state - self.current_state = def_state - self.next_state = def_state - self.final_state = '' - self.wait_for_power_on = False - self.eeprom_poweron_reset_retries = retries - if '0' == close_presence_ports: - self.module_fd.close() - if self.module_power_good_fd: - self.module_power_good_fd.close() diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py index 90462e9ed0fe..a45840ffc26b 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py @@ -24,16 +24,18 @@ try: import ctypes + import select import subprocess import os import threading + import time from sonic_py_common.logger import Logger from sonic_py_common.general import check_output_pipe from . import utils from .device_data import DeviceDataManager from sonic_platform_base.sonic_xcvr.sfp_optoe_base import SfpOptoeBase from sonic_platform_base.sonic_xcvr.fields import consts - from sonic_platform_base.sonic_xcvr.api.public import sff8636, sff8436 + from sonic_platform_base.sonic_xcvr.api.public import cmis, sff8636, sff8436 except ImportError as e: raise ImportError (str(e) + "- required module not found") @@ -127,7 +129,44 @@ CPU_MASK = PORT_TYPE_MASK & (PORT_TYPE_CPU << PORT_TYPE_OFFSET) # parameters for SFP presence +SFP_STATUS_REMOVED = '0' SFP_STATUS_INSERTED = '1' +SFP_STATUS_ERROR = '2' +SFP_STATUS_UNKNOWN = '-1' + +# SFP status from PMAOS register +# 0x1 plug in +# 0x2 plug out +# 0x3 plug in with error +# 0x4 disabled, at this status SFP eeprom is not accessible, +# and presence status also will be not present, +# so treate it as plug out. +SDK_SFP_STATE_IN = 0x1 +SDK_SFP_STATE_OUT = 0x2 +SDK_SFP_STATE_ERR = 0x3 +SDK_SFP_STATE_DIS = 0x4 +SDK_SFP_STATE_UNKNOWN = 0x5 + +SDK_STATUS_TO_SONIC_STATUS = { + SDK_SFP_STATE_IN: SFP_STATUS_INSERTED, + SDK_SFP_STATE_OUT: SFP_STATUS_REMOVED, + SDK_SFP_STATE_ERR: SFP_STATUS_ERROR, + SDK_SFP_STATE_DIS: SFP_STATUS_REMOVED, + SDK_SFP_STATE_UNKNOWN: SFP_STATUS_UNKNOWN +} + +# SDK error definitions begin + +# SFP errors that will block eeprom accessing +SDK_SFP_BLOCKING_ERRORS = [ + 0x2, # SFP.SFP_ERROR_BIT_I2C_STUCK, + 0x3, # SFP.SFP_ERROR_BIT_BAD_EEPROM, + 0x5, # SFP.SFP_ERROR_BIT_UNSUPPORTED_CABLE, + 0x6, # SFP.SFP_ERROR_BIT_HIGH_TEMP, + 0x7, # SFP.SFP_ERROR_BIT_BAD_CABLE +] + +# SDK error definitions end # SFP constants SFP_PAGE_SIZE = 256 # page size of page0h @@ -162,6 +201,60 @@ SFP_DEFAULT_TEMP_CRITICAL_THRESHOLD = 80.0 SFP_TEMPERATURE_SCALE = 8.0 +# Module host management definitions begin +SFP_SW_CONTROL = 1 +SFP_FW_CONTROL = 0 + +CMIS_MAX_POWER_OFFSET = 201 + +SFF_POWER_CLASS_MASK = 0xE3 +SFF_POWER_CLASS_MAPPING = { + 0: 1.5, # 1.5W + 64: 2, # 2.0W + 128: 2.5, # 2.5W + 192: 3.5, # 3.5W + 193: 4, # 4.0W + 194: 4.5, # 4.5W + 195: 5 # 5.0W +} +SFF_POWER_CLASS_OFFSET = 129 +SFF_POWER_CLASS_8_INDICATOR = 32 +SFF_POWER_CLASS_8_OFFSET = 107 + +CMIS_MCI_EEPROM_OFFSET = 2 +CMIS_MCI_MASK = 0b00001100 + +STATE_DOWN = 'Down' # Initial state +STATE_INIT = 'Initializing' # Module starts initializing, check module present, also power on the module if need +STATE_RESETTING = 'Resetting' # Module is resetting the firmware +STATE_POWERED_ON = 'Power On' # Module is powered on, module firmware has been loaded, check module power is in good state +STATE_SW_CONTROL = 'Software Control' # Module is under software control +STATE_FW_CONTROL = 'Firmware Control' # Module is under firmware control +STATE_POWER_BAD = 'Power Bad' # Module power_good returns 0 +STATE_POWER_LIMIT_ERROR = 'Exceed Power Limit' # Module power exceeds cage power limit +STATE_NOT_PRESENT = 'Not Present' # Module is not present + +EVENT_START = 'Start' +EVENT_NOT_PRESENT = 'Not Present' +EVENT_RESET = 'Reset' +EVENT_POWER_ON = 'Power On' +EVENT_RESET_DONE = 'Reset Done' +EVENT_POWER_BAD = 'Power Bad' +EVENT_SW_CONTROL = 'Software Control' +EVENT_FW_CONTROL = 'Firmware Control' +EVENT_POWER_LIMIT_EXCEED = 'Power Limit Exceed' +EVENT_POWER_GOOD = 'Power Good' +EVENT_PRESENT = 'Present' + +ACTION_ON_START = 'On Start' +ACTION_ON_RESET = 'On Reset' +ACTION_ON_POWERED = 'On Powered' +ACTION_ON_SW_CONTROL = 'On Software Control' +ACTION_ON_FW_CONTROL = 'On Firmware Control' +ACTION_ON_POWER_LIMIT_ERROR = 'On Power Limit Error' +ACTION_ON_CANCEL_WAIT = 'On Cancel Wait' +# Module host management definitions end + # SFP EEPROM limited bytes limited_eeprom = { SFP_TYPE_CMIS: { @@ -252,30 +345,6 @@ def _get_module_info(self, sdk_index): return oper_state, error_type - @classmethod - def get_sfp_index_to_logical_port(cls, force=False): - if not cls.sfp_index_to_logical_port_dict or force: - config_db = utils.DbUtils.get_db_instance('CONFIG_DB') - port_data = config_db.get_table('PORT') - for key, data in port_data.items(): - if data['index'] not in cls.sfp_index_to_logical_port_dict: - cls.sfp_index_to_logical_port_dict[int(data['index']) - 1] = key - - @classmethod - def get_logical_port_by_sfp_index(cls, sfp_index): - with cls.sfp_index_to_logical_lock: - cls.get_sfp_index_to_logical_port() - logical_port_name = cls.sfp_index_to_logical_port_dict.get(sfp_index) - if not logical_port_name: - cls.get_sfp_index_to_logical_port(force=True) - else: - config_db = utils.DbUtils.get_db_instance('CONFIG_DB') - current_index = int(config_db.get('CONFIG_DB', f'PORT|{logical_port_name}', 'index')) - if current_index != sfp_index: - cls.get_sfp_index_to_logical_port(force=True) - logical_port_name = cls.sfp_index_to_logical_port_dict.get(sfp_index) - return logical_port_name - class SFP(NvidiaSFPCommon): """Platform-specific SFP class""" @@ -285,12 +354,43 @@ class SFP(NvidiaSFPCommon): SFP_MLNX_ERROR_DESCRIPTION_PMD_TYPE_NOT_ENABLED = 'PMD type not enabled' SFP_MLNX_ERROR_DESCRIPTION_PCIE_POWER_SLOT_EXCEEDED = 'PCIE system power slot exceeded' SFP_MLNX_ERROR_DESCRIPTION_RESERVED = 'Reserved' + + SDK_ERRORS_TO_DESCRIPTION = { + 0x1: SFP_MLNX_ERROR_DESCRIPTION_LONGRANGE_NON_MLNX_CABLE, + 0x4: SFP_MLNX_ERROR_DESCRIPTION_ENFORCE_PART_NUMBER_LIST, + 0x8: SFP_MLNX_ERROR_DESCRIPTION_PMD_TYPE_NOT_ENABLED, + 0xc: SFP_MLNX_ERROR_DESCRIPTION_PCIE_POWER_SLOT_EXCEEDED + } SFP_MLNX_ERROR_BIT_LONGRANGE_NON_MLNX_CABLE = 0x00010000 SFP_MLNX_ERROR_BIT_ENFORCE_PART_NUMBER_LIST = 0x00020000 SFP_MLNX_ERROR_BIT_PMD_TYPE_NOT_ENABLED = 0x00040000 SFP_MLNX_ERROR_BIT_PCIE_POWER_SLOT_EXCEEDED = 0x00080000 SFP_MLNX_ERROR_BIT_RESERVED = 0x80000000 + + SDK_ERRORS_TO_ERROR_BITS = { + 0x0: SfpOptoeBase.SFP_ERROR_BIT_POWER_BUDGET_EXCEEDED, + 0x1: SFP_MLNX_ERROR_BIT_LONGRANGE_NON_MLNX_CABLE, + 0x2: SfpOptoeBase.SFP_ERROR_BIT_I2C_STUCK, + 0x3: SfpOptoeBase.SFP_ERROR_BIT_BAD_EEPROM, + 0x4: SFP_MLNX_ERROR_BIT_ENFORCE_PART_NUMBER_LIST, + 0x5: SfpOptoeBase.SFP_ERROR_BIT_UNSUPPORTED_CABLE, + 0x6: SfpOptoeBase.SFP_ERROR_BIT_HIGH_TEMP, + 0x7: SfpOptoeBase.SFP_ERROR_BIT_BAD_CABLE, + 0x8: SFP_MLNX_ERROR_BIT_PMD_TYPE_NOT_ENABLED, + 0xc: SFP_MLNX_ERROR_BIT_PCIE_POWER_SLOT_EXCEEDED + } + + # Class level state machine object, only applicable for module host management + sm = None + + # Class level wait SFP ready task, the task waits for module to load its firmware after resetting, + # only applicable for module host management + wait_ready_task = None + + # Class level action table which stores the mapping from action name to action function, + # only applicable for module host management + action_table = None def __init__(self, sfp_index, sfp_type=None, slot_id=0, linecard_port_count=0, lc_name=None): super(SFP, self).__init__(sfp_index) @@ -311,6 +411,11 @@ def __init__(self, sfp_index, sfp_type=None, slot_id=0, linecard_port_count=0, l self.slot_id = slot_id self._sfp_type_str = None + # SFP state, only applicable for module host management + self.state = STATE_DOWN + + def __str__(self): + return f'SFP {self.sdk_index}' def reinit(self): """ @@ -318,7 +423,7 @@ def reinit(self): :return: """ self._sfp_type_str = None - self.refresh_xcvr_api() + self._xcvr_api = None def get_presence(self): """ @@ -327,10 +432,6 @@ def get_presence(self): Returns: bool: True if device is present, False if not """ - try: - self.is_sw_control() - except: - return False eeprom_raw = self._read_eeprom(0, 1, log_on_error=False) return eeprom_raw is not None @@ -439,7 +540,7 @@ def get_lpmode(self): if self.is_sw_control(): api = self.get_xcvr_api() return api.get_lpmode() if api else False - elif DeviceDataManager.is_independent_mode(): + elif DeviceDataManager.is_module_host_management_mode(): file_path = SFP_SDK_MODULE_SYSFS_ROOT_TEMPLATE.format(self.sdk_index) + SFP_SYSFS_POWER_MODE power_mode = utils.read_int_from_file(file_path) return power_mode == POWER_MODE_LOW @@ -646,7 +747,7 @@ def set_lpmode(self, lpmode): # If at some point get_lpmode=desired_lpmode, it will return true. # If after timeout ends, lpmode will not be desired_lpmode, it will return false. return utils.wait_until(check_lpmode, 2, 1, api=api, lpmode=lpmode) - elif DeviceDataManager.is_independent_mode(): + elif DeviceDataManager.is_module_host_management_mode(): # FW control under CMIS host management mode. # Currently, we don't support set LPM under this mode. # Just return False to indicate set Fail @@ -745,6 +846,31 @@ def get_error_description(self): else: error_description = "Unknow SFP module status ({})".format(oper_status) return error_description + + def get_error_info_from_sdk_error_type(self): + """Translate SDK error type to SONiC error state and error description. Only calls + when sysfs "present" returns "2". + + Returns: + tuple: (error state, error description) + """ + error_type = utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/temperature/statuserror', default=-1) + sfp_state_bits = SFP.SDK_ERRORS_TO_ERROR_BITS.get(error_type) + if sfp_state_bits is None: + logger.log_error(f"Unrecognized error {error_type} detected on SFP {self.sdk_index}") + return SFP_STATUS_ERROR, "Unknown error ({})".format(error_type) + + if error_type in SDK_SFP_BLOCKING_ERRORS: + # In SFP at error status case, need to overwrite the sfp_state with the exact error code + sfp_state_bits |= SFP.SFP_ERROR_BIT_BLOCKING + + # An error should be always set along with 'INSERTED' + sfp_state_bits |= SFP.SFP_STATUS_BIT_INSERTED + + # For vendor specific errors, the description should be returned as well + error_description = SFP.SDK_ERRORS_TO_DESCRIPTION.get(error_type) + sfp_state = str(sfp_state_bits) + return sfp_state, error_description def _get_eeprom_path(self): return SFP_EEPROM_ROOT_TEMPLATE.format(self.sdk_index) @@ -976,24 +1102,541 @@ def get_xcvr_api(self): return self._xcvr_api def is_sw_control(self): - if not DeviceDataManager.is_independent_mode(): + if not DeviceDataManager.is_module_host_management_mode(): return False - - db = utils.DbUtils.get_db_instance('STATE_DB') - logical_port = NvidiaSFPCommon.get_logical_port_by_sfp_index(self.sdk_index) - if not logical_port: - raise Exception(f'Module {self.sdk_index} is not present or under initialization') - - initialized = db.exists('STATE_DB', f'TRANSCEIVER_STATUS|{logical_port}') - if not initialized: - raise Exception(f'Module {self.sdk_index} is not present or under initialization') - try: return utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/control', raise_exception=True, log_func=None) == 1 except: # just in case control file does not exist - raise Exception(f'Module {self.sdk_index} is under initialization') + raise Exception(f'control sysfs for SFP {self.sdk_index} does not exist') + + def get_module_status(self): + """Get value of sysfs status. It could return: + SXD_PMPE_MODULE_STATUS_PLUGGED_ENABLED_E = 0x1, + SXD_PMPE_MODULE_STATUS_UNPLUGGED_E = 0x2, + SXD_PMPE_MODULE_STATUS_MODULE_PLUGGED_ERROR_E = 0x3, + SXD_PMPE_MODULE_STATUS_PLUGGED_DISABLED_E = 0x4, + SXD_PMPE_MODULE_STATUS_UNKNOWN_E = 0x5, + + Returns: + str: sonic status of the module + """ + status = utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/status') + return SDK_STATUS_TO_SONIC_STATUS[status] + + def get_hw_present(self): + """Get hardware present status, only applicable on host management mode + + Returns: + bool: True if module is in the cage + """ + return utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/hw_present') == 1 + + def get_power_on(self): + """Get power on status, only applicable on host management mode + + Returns: + bool: True if the module is powered on + """ + return utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/power_on') == 1 + + def set_power(self, on): + """Control the power of this module, only applicable on host management mode + + Args: + on (bool): True if on + """ + value = 1 if on else 0 + utils.write_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/power_on', value) + + def get_reset_state(self): + """Get reset state of this module, only applicable on host management mode + + Returns: + bool: True if module is not in reset status + """ + return utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/hw_reset') == 1 + + def set_hw_reset(self, value): + """Set the module reset status + + Args: + value (int): 1 for reset, 0 for leaving reset + """ + utils.write_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/hw_reset', value) + + def get_power_good(self): + """Get power good status of this module, only applicable on host management mode + + Returns: + bool: True if the power is in good status + """ + return utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/power_good') == 1 + + def set_control_type(self, control_type): + """Set control type for the module + + Args: + control_type (int): 0 for firmware control, currently only 0 is allowed + """ + utils.write_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/control', control_type) + + def determine_control_type(self): + """Determine control type according to module type + + Returns: + enum: software control or firmware control + """ + api = self.get_xcvr_api() + if not api: + logger.log_error(f'Failed to get api object for SFP {self.sdk_index}, probably module EEPROM is not ready') + return SFP_FW_CONTROL + + if not self.is_supported_for_software_control(api): + return SFP_FW_CONTROL + else: + return SFP_SW_CONTROL + + def is_cmis_api(self, xcvr_api): + """Check if the api type is CMIS + + Args: + xcvr_api (object): xcvr api object + + Returns: + bool: True if the api is of type CMIS + """ + return isinstance(xcvr_api, cmis.CmisApi) + + def is_sff_api(self, xcvr_api): + """Check if the api type is SFF + + Args: + xcvr_api (object): xcvr api object + + Returns: + bool: True if the api is of type SFF + """ + return isinstance(xcvr_api, sff8636.Sff8636Api) or isinstance(xcvr_api, sff8436.Sff8436Api) + + def is_supported_for_software_control(self, xcvr_api): + """Check if the api object supports software control + + Args: + xcvr_api (object): xcvr api object + + Returns: + bool: True if the api object supports software control + """ + return self.is_cmis_api(xcvr_api) or self.is_sff_api(xcvr_api) + + def check_power_capability(self): + """Check module max power with cage power limit + + Returns: + bool: True if max power does not exceed cage power limit + """ + max_power = self.get_module_max_power() + if max_power < 0: + return False + + power_limit = self.get_power_limit() + logger.log_info(f'SFP {self.sdk_index}: max_power={max_power}, power_limit={power_limit}') + return max_power <= power_limit + + def get_power_limit(self): + """Get power limit of this module + + Returns: + int: Power limit in unit of 0.25W + """ + return utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/power_limit') + + def get_module_max_power(self): + """Get module max power from EEPROM + + Returns: + int: max power in terms of 0.25W. Return POWER_CLASS_INVALID if EEPROM data is incorrect. + """ + xcvr_api = self.get_xcvr_api() + if self.is_cmis_api(xcvr_api): + powercap_raw = self.read_eeprom(CMIS_MAX_POWER_OFFSET, 1) + return powercap_raw[0] + elif self.is_sff_api(xcvr_api): + power_class_raw = self.read_eeprom(SFF_POWER_CLASS_OFFSET, 1) + power_class_bit = power_class_raw[0] & SFF_POWER_CLASS_MASK + if power_class_bit in SFF_POWER_CLASS_MAPPING: + powercap = SFF_POWER_CLASS_MAPPING[power_class_bit] + elif power_class_bit == SFF_POWER_CLASS_8_INDICATOR: + # According to standard: + # Byte 128: + # if bit 5 is 1, "Power Class 8 implemented (Max power declared in byte 107)" + # Byte 107: + # "Maximum power consumption of module. Unsigned integer with LSB = 0.1 W." + power_class_8_byte = self.read_eeprom(SFF_POWER_CLASS_8_OFFSET, 1) + powercap = power_class_8_byte[0] * 0.1 + else: + logger.log_error(f'SFP {self.sdk_index} got invalid value for power class field: {power_class_bit}') + return -1 + + # Multiplying the sysfs value (0.25 Watt units) by 4 aligns it with the EEPROM max power value (1 Watt units), + # ensuring both are in the same unit for a meaningful comparison + return powercap * 4 # + else: + # Should never hit, just in case + logger.log_error(f'SFP {self.sdk_index} with api type {xcvr_api} does not support getting max power') + return -1 + + def update_i2c_frequency(self): + """Update I2C frequency for the module. + """ + if self.get_frequency_support(): + api = self.get_xcvr_api() + if self.is_cmis_api(api): + # for CMIS modules, read the module maximum supported clock of Management Comm Interface (MCI) from module EEPROM. + # from byte 2 bits 3-2: + # 00b means module supports up to 400KHz + # 01b means module supports up to 1MHz + logger.log_debug(f"Reading mci max frequency for SFP {self.sdk_index}") + read_mci = self.read_eeprom(CMIS_MCI_EEPROM_OFFSET, 1) + logger.log_debug(f"Read mci max frequency {read_mci[0]} for SFP {self.sdk_index}") + frequency = (read_mci[0] & CMIS_MCI_MASK) >> 2 + elif self.is_sff_api(api): + # for SFF modules, frequency is always 400KHz + frequency = 0 + else: + # Should never hit, just in case + logger.log_error(f'SFP {self.sdk_index} with api type {api} does not support updating frequency but frequency_support sysfs return 1') + return + + logger.log_info(f"Read mci max frequency bits {frequency} for SFP {self.sdk_index}") + self.set_frequency(frequency) + + def get_frequency_support(self): + """Get frequency support for this module + + Returns: + bool: True if supported + """ + return utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/frequency_support') == 1 + + def set_frequency(self, freqeuncy): + """Set module frequency. + + Args: + freqeuncy (int): 0 - up to 400KHz, 1 - up to 1MHz + """ + utils.write_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/frequency', freqeuncy) + + def disable_tx_for_sff_optics(self): + """Disable TX for SFF optics + """ + api = self.get_xcvr_api() + if self.is_sff_api(api) and api.get_tx_disable_support(): + logger.log_info(f'Disabling tx for SFP {self.sdk_index}') + api.tx_disable(True) + + @classmethod + def get_state_machine(cls): + """Get state machine object, create if not exists + + Returns: + object: state machine object + """ + if not cls.sm: + from .state_machine import StateMachine + sm = StateMachine() + sm.add_state(STATE_DOWN).add_transition(EVENT_START, STATE_INIT) + sm.add_state(STATE_INIT).set_entry_action(ACTION_ON_START) \ + .add_transition(EVENT_NOT_PRESENT, STATE_NOT_PRESENT) \ + .add_transition(EVENT_RESET, STATE_RESETTING) \ + .add_transition(EVENT_POWER_ON, STATE_POWERED_ON) + sm.add_state(STATE_RESETTING).set_entry_action(ACTION_ON_RESET) \ + .add_transition(EVENT_RESET_DONE, STATE_POWERED_ON) \ + .add_transition(EVENT_NOT_PRESENT, STATE_NOT_PRESENT, ACTION_ON_CANCEL_WAIT) + sm.add_state(STATE_POWERED_ON).set_entry_action(ACTION_ON_POWERED) \ + .add_transition(EVENT_POWER_BAD, STATE_POWER_BAD) \ + .add_transition(EVENT_SW_CONTROL, STATE_SW_CONTROL) \ + .add_transition(EVENT_FW_CONTROL, STATE_FW_CONTROL) + sm.add_state(STATE_SW_CONTROL).set_entry_action(ACTION_ON_SW_CONTROL) \ + .add_transition(EVENT_NOT_PRESENT, STATE_NOT_PRESENT) \ + .add_transition(EVENT_POWER_LIMIT_EXCEED, STATE_POWER_LIMIT_ERROR) \ + .add_transition(EVENT_POWER_BAD, STATE_POWER_BAD) + sm.add_state(STATE_FW_CONTROL).set_entry_action(ACTION_ON_FW_CONTROL) \ + .add_transition(EVENT_NOT_PRESENT, STATE_NOT_PRESENT) + sm.add_state(STATE_POWER_BAD).add_transition(EVENT_POWER_GOOD, STATE_POWERED_ON) \ + .add_transition(EVENT_NOT_PRESENT, STATE_NOT_PRESENT) + sm.add_state(STATE_NOT_PRESENT).add_transition(EVENT_PRESENT, STATE_INIT) + sm.add_state(STATE_POWER_LIMIT_ERROR).set_entry_action(ACTION_ON_POWER_LIMIT_ERROR) \ + .add_transition(EVENT_POWER_GOOD, STATE_POWERED_ON) \ + .add_transition(EVENT_NOT_PRESENT, STATE_NOT_PRESENT) + + cls.action_table = {} + cls.action_table[ACTION_ON_START] = cls.action_on_start + cls.action_table[ACTION_ON_RESET] = cls.action_on_reset + cls.action_table[ACTION_ON_POWERED] = cls.action_on_powered + cls.action_table[ACTION_ON_SW_CONTROL] = cls.action_on_sw_control + cls.action_table[ACTION_ON_FW_CONTROL] = cls.action_on_fw_control + cls.action_table[ACTION_ON_CANCEL_WAIT] = cls.action_on_cancel_wait + cls.action_table[ACTION_ON_POWER_LIMIT_ERROR] = cls.action_on_power_limit_error + + cls.sm = sm + + return cls.sm + + @classmethod + def action_on_start(cls, sfp): + if not sfp.get_hw_present(): + logger.log_info(f'SFP {sfp.sdk_index} is not present') + sfp.on_event(EVENT_NOT_PRESENT) + return + + if not sfp.get_power_on(): + logger.log_info(f'SFP {sfp.sdk_index} is not powered on') + sfp.set_power(True) + sfp.set_hw_reset(1) + sfp.on_event(EVENT_RESET) + else: + if not sfp.get_reset_state(): + logger.log_info(f'SFP {sfp.sdk_index} is in reset state') + sfp.set_hw_reset(1) + sfp.on_event(EVENT_RESET) + else: + sfp.on_event(EVENT_POWER_ON) + + @classmethod + def action_on_reset(cls, sfp): + logger.log_info(f'SFP {sfp.sdk_index} is scheduled to wait for resetting done') + cls.get_wait_ready_task().schedule_wait(sfp.sdk_index) + + @classmethod + def action_on_powered(cls, sfp): + if not sfp.get_power_good(): + logger.log_info(f'SFP {sfp.sdk_index} is not in power good state') + sfp.on_event(EVENT_POWER_BAD) + return + + control_type = sfp.determine_control_type() + if control_type == SFP_SW_CONTROL: + sfp.on_event(EVENT_SW_CONTROL) + else: + sfp.on_event(EVENT_FW_CONTROL) + + @classmethod + def action_on_sw_control(cls, sfp): + if not sfp.check_power_capability(): + sfp.on_event(EVENT_POWER_LIMIT_EXCEED) + return + + sfp.update_i2c_frequency() + sfp.disable_tx_for_sff_optics() + logger.log_info(f'SFP {sfp.sdk_index} is set to software control') + + @classmethod + def action_on_fw_control(cls, sfp): + logger.log_info(f'SFP {sfp.sdk_index} is set to firmware control') + sfp.set_control_type(SFP_FW_CONTROL) + + @classmethod + def action_on_cancel_wait(cls, sfp): + cls.get_wait_ready_task().cancel_wait(sfp.sdk_index) + + @classmethod + def action_on_power_limit_error(cls, sfp): + logger.log_info(f'SFP {sfp.sdk_index} is powered off due to exceeding power limit') + sfp.set_power(False) + sfp.set_hw_reset(0) + + @classmethod + def get_wait_ready_task(cls): + """Get SFP wait ready task. Create if not exists. + + Returns: + object: an instance of WaitSfpReadyTask + """ + if not cls.wait_ready_task: + from .wait_sfp_ready_task import WaitSfpReadyTask + cls.wait_ready_task = WaitSfpReadyTask() + return cls.wait_ready_task + + def get_state(self): + """Return the current state. + + Returns: + str: current state + """ + return self.state + + def change_state(self, new_state): + """Change from old state to new state + + Args: + new_state (str): new state + """ + self.state = new_state + + def on_action(self, action_name): + """Called when a state machine action is executing + + Args: + action_name (str): action name + """ + SFP.action_table[action_name](self) + + def on_event(self, event): + """Called when a state machine event arrives + + Args: + event (str): State machine event + """ + SFP.get_state_machine().on_event(self, event) + + def in_stable_state(self): + """Indicate whether this module is in a stable state. 'Stable state' means the module is pending on a polling event + from SDK. + + Returns: + bool: True if the module is in a stable state + """ + return self.state in (STATE_NOT_PRESENT, STATE_SW_CONTROL, STATE_FW_CONTROL, STATE_POWER_BAD, STATE_POWER_LIMIT_ERROR) + + def get_fd(self, fd_type): + return open(f'/sys/module/sx_core/asic0/module{self.sdk_index}/{fd_type}') + + def get_fds_for_poling(self): + if self.state == STATE_FW_CONTROL: + return { + 'present': self.get_fd('present') + } + else: + return { + 'hw_present': self.get_fd('hw_present'), + 'power_good': self.get_fd('power_good') + } + + def get_fd_for_polling_legacy(self): + """Get polling fds for when module host management is disabled + + Returns: + object: file descriptor of present + """ + return self.get_fd('present') + + def fill_change_event(self, port_dict): + """Fill change event data based on current state. + + Args: + port_dict (dict): {:} + """ + if self.state == STATE_NOT_PRESENT: + port_dict[self.sdk_index + 1] = SFP_STATUS_REMOVED + elif self.state == STATE_SW_CONTROL: + port_dict[self.sdk_index + 1] = SFP_STATUS_INSERTED + elif self.state == STATE_FW_CONTROL: + port_dict[self.sdk_index + 1] = SFP_STATUS_INSERTED + elif self.state == STATE_POWER_BAD or self.state == STATE_POWER_LIMIT_ERROR: + sfp_state = SFP.SFP_ERROR_BIT_POWER_BUDGET_EXCEEDED | SFP.SFP_STATUS_BIT_INSERTED + port_dict[self.sdk_index + 1] = str(sfp_state) + + def refresh_poll_obj(self, poll_obj, all_registered_fds): + """Refresh polling object and registered fds. This function is usually called when a cable plugin + event occurs. For example, user plugs out a software control module and replaces with a firmware + control cable. In such case, poll_obj was polling "hw_present" and "power_good" for software control, + and it needs to be changed to poll "present" for new control type which is firmware control. + + Args: + poll_obj (object): poll object + all_registered_fds (dict): fds that have been registered to poll object + """ + # find fds registered by this SFP + current_registered_fds = {item[2]: (fileno, item[1]) for fileno, item in all_registered_fds.items() if item[0] == self.sdk_index} + logger.log_debug(f'SFP {self.sdk_index} registered fds are: {current_registered_fds}') + if self.state == STATE_FW_CONTROL: + target_poll_types = ['present'] + else: + target_poll_types = ['hw_present', 'power_good'] + + for target_poll_type in target_poll_types: + if target_poll_type not in current_registered_fds: + # need add new fd for polling + logger.log_debug(f'SFP {self.sdk_index} is registering file descriptor: {target_poll_type}') + fd = self.get_fd(target_poll_type) + poll_obj.register(fd, select.POLLERR | select.POLLPRI) + all_registered_fds[fd.fileno()] = (self.sdk_index, fd, target_poll_type) + else: + # the fd is already in polling + current_registered_fds.pop(target_poll_type) + + for _, item in current_registered_fds.items(): + # Deregister poll, close fd + logger.log_debug(f'SFP {self.sdk_index} is de-registering file descriptor: {item}') + poll_obj.poll_obj.unregister(item[1]) + all_registered_fds.pop(item[0]) + item[1].close() + + def is_dummy_event(self, fd_type, fd_value): + """Check whether an event is dummy event + + Args: + origin_state (str): original state before polling + fd_type (str): polling sysfs type + fd_value (int): polling sysfs value + + Returns: + bool: True if the event is a dummy event + """ + if fd_type == 'hw_present' or fd_type == 'present': + if fd_value == int(SFP_STATUS_INSERTED): + return self.state in (STATE_SW_CONTROL, STATE_FW_CONTROL, STATE_POWER_BAD, STATE_POWER_LIMIT_ERROR) + elif fd_value == int(SFP_STATUS_REMOVED): + return self.state == STATE_NOT_PRESENT + elif fd_type == 'power_good': + if fd_value == 1: + return self.state == STATE_SW_CONTROL + else: + return self.state in (STATE_POWER_BAD, STATE_POWER_LIMIT_ERROR) + return False + + @classmethod + def initialize_sfp_modules(cls, sfp_list): + """Initialize all modules. Only applicable when module host management is enabled + + Args: + sfp_list (object): all sfps + """ + wait_ready_task = cls.get_wait_ready_task() + wait_ready_task.start() + + for s in sfp_list: + s.on_event(EVENT_START) + + if not wait_ready_task.empty(): + # Wait until wait_ready_task is up + while not wait_ready_task.is_alive(): + pass + + # Resetting SFP requires a reloading of module firmware, it takes up to 3 seconds + # according to standard + max_wait_time = 3.5 + begin = time.time() + while True: + ready_sfp_set = wait_ready_task.get_ready_set() + for sfp_index in ready_sfp_set: + s = sfp_list[sfp_index] + logger.log_debug(f'SFP {sfp_index} is recovered from resetting state') + s.on_event(EVENT_RESET_DONE) + elapse = time.time() - begin + if elapse < max_wait_time: + time.sleep(0.5) + else: + break + + # Verify that all modules are in a stable state + for index, s in enumerate(sfp_list): + if not s.in_stable_state(): + logger.log_error(f'SFP {index} is not in stable state after initializing, state={s.state}') + logger.log_notice(f'SFP {index} is in state {s.state} after module initialization') class RJ45Port(NvidiaSFPCommon): diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py deleted file mode 100644 index 133001020495..000000000000 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py +++ /dev/null @@ -1,409 +0,0 @@ -# -# Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. -# Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -''' -listen to the SDK for the SFP change event and return to chassis. -''' - - -import sys, errno -import os -import time -import select - -from .device_data import DeviceDataManager -try: - if 'PLATFORM_API_UNIT_TESTING' not in os.environ: - from python_sdk_api.sx_api import * - else: - from mock import MagicMock - class MockSxFd(object): - fd = 99 - new_sx_fd_t_p = MagicMock(return_value=MockSxFd()) - new_sx_user_channel_t_p = MagicMock() -except KeyError: - pass -from sonic_py_common.logger import Logger -from .sfp import SFP - -# SFP status from PMAOS register -# 0x1 plug in -# 0x2 plug out -# 0x3 plug in with error -# 0x4 disabled, at this status SFP eeprom is not accessible, -# and presence status also will be not present, -# so treate it as plug out. -SDK_SFP_STATE_IN = 0x1 -SDK_SFP_STATE_OUT = 0x2 -SDK_SFP_STATE_ERR = 0x3 -SDK_SFP_STATE_DIS = 0x4 -SDK_SFP_STATE_UNKNOWN = 0x5 - -# SFP status used in this file only, will not expose to XCVRD -# STATUS_ERROR will be mapped to different status according to the error code -STATUS_UNKNOWN = '-1' -STATUS_ERROR = '-2' - -# SFP error code, only valid when SFP at SDK_SFP_STATE_ERR status -# Only 0x2, 0x3, 0x5, 0x6 and 0x7 will block the eeprom access, -# so will only report above errors to XCVRD and other errors will be -# printed to syslog. - -''' -0x0: "Power_Budget_Exceeded", -0x1: "Long_Range_for_non_MLNX_cable_or_module", -0x2: "Bus_stuck", -0x3: "bad_or_unsupported_EEPROM", -0x4: "Enforce_part_number_list", -0x5: "unsupported_cable", -0x6: "High_Temperature", -0x7: "bad_cable", -0x8: "PMD_type_is_not_enabled", -0x9: "[internal]Laster_TEC_failure", -0xa: "[internal]High_current", -0xb: "[internal]High_voltage", -0xd: "[internal]High_power", -0xe: "[internal]Module_state_machine_fault", -0xc: "pcie_system_power_slot_Exceeded" -''' - -# SFP errors that will block eeprom accessing -SDK_SFP_BLOCKING_ERRORS = [ - 0x2, # SFP.SFP_ERROR_BIT_I2C_STUCK, - 0x3, # SFP.SFP_ERROR_BIT_BAD_EEPROM, - 0x5, # SFP.SFP_ERROR_BIT_UNSUPPORTED_CABLE, - 0x6, # SFP.SFP_ERROR_BIT_HIGH_TEMP, - 0x7, # SFP.SFP_ERROR_BIT_BAD_CABLE -] - -SDK_ERRORS_TO_ERROR_BITS = { - 0x0: SFP.SFP_ERROR_BIT_POWER_BUDGET_EXCEEDED, - 0x1: SFP.SFP_MLNX_ERROR_BIT_LONGRANGE_NON_MLNX_CABLE, - 0x2: SFP.SFP_ERROR_BIT_I2C_STUCK, - 0x3: SFP.SFP_ERROR_BIT_BAD_EEPROM, - 0x4: SFP.SFP_MLNX_ERROR_BIT_ENFORCE_PART_NUMBER_LIST, - 0x5: SFP.SFP_ERROR_BIT_UNSUPPORTED_CABLE, - 0x6: SFP.SFP_ERROR_BIT_HIGH_TEMP, - 0x7: SFP.SFP_ERROR_BIT_BAD_CABLE, - 0x8: SFP.SFP_MLNX_ERROR_BIT_PMD_TYPE_NOT_ENABLED, - 0xc: SFP.SFP_MLNX_ERROR_BIT_PCIE_POWER_SLOT_EXCEEDED -} - -SDK_ERRORS_TO_DESCRIPTION = { - 0x1: SFP.SFP_MLNX_ERROR_DESCRIPTION_LONGRANGE_NON_MLNX_CABLE, - 0x4: SFP.SFP_MLNX_ERROR_DESCRIPTION_ENFORCE_PART_NUMBER_LIST, - 0x8: SFP.SFP_MLNX_ERROR_DESCRIPTION_PMD_TYPE_NOT_ENABLED, - 0xc: SFP.SFP_MLNX_ERROR_DESCRIPTION_PCIE_POWER_SLOT_EXCEEDED -} - -sfp_value_status_dict = { - SDK_SFP_STATE_IN: str(SFP.SFP_STATUS_BIT_INSERTED), - SDK_SFP_STATE_OUT: str(SFP.SFP_STATUS_BIT_REMOVED), - SDK_SFP_STATE_ERR: STATUS_ERROR, - SDK_SFP_STATE_DIS: str(SFP.SFP_STATUS_BIT_REMOVED), -} - -# system level event/error -EVENT_ON_ALL_SFP = '-1' -SYSTEM_NOT_READY = 'system_not_ready' -SYSTEM_READY = 'system_become_ready' -SYSTEM_FAIL = 'system_fail' - -SDK_DAEMON_READY_FILE = '/tmp/sdk_ready' - -PMPE_PACKET_SIZE = 2000 - -logger = Logger() - -class sfp_event: - ''' Listen to plugin/plugout cable events ''' - - SX_OPEN_RETRIES = 30 - SX_OPEN_TIMEOUT = 5 - SELECT_TIMEOUT = 1 - - def __init__(self, rj45_port_list=None): - self.swid = 0 - self.handle = None - - # Allocate SDK fd and user channel structures - self.rx_fd_p = new_sx_fd_t_p() - self.user_channel_p = new_sx_user_channel_t_p() - if rj45_port_list: - self.RJ45_port_set = set(rj45_port_list) - else: - self.RJ45_port_set = set() - - def initialize(self): - swid_cnt_p = None - - try: - # Wait for SDK daemon to be started with detect the sdk_ready file - retry = 0 - while not os.path.exists(SDK_DAEMON_READY_FILE): - if retry >= self.SX_OPEN_RETRIES: - raise RuntimeError("SDK daemon failed to start after {} retries and {} seconds waiting, exiting..." - .format(retry, self.SX_OPEN_TIMEOUT * self.SX_OPEN_RETRIES)) - else: - logger.log_info("SDK daemon not started yet, retry {} times".format(retry)) - retry += 1 - time.sleep(self.SX_OPEN_TIMEOUT) - - # After SDK daemon started, sx_api_open and sx_api_host_ifc_open is ready for call - rc, self.handle = sx_api_open(None) - if rc != SX_STATUS_SUCCESS: - raise RuntimeError("failed to call sx_api_open with rc {}, exiting...".format(rc)) - - rc = sx_api_host_ifc_open(self.handle, self.rx_fd_p) - if rc != SX_STATUS_SUCCESS: - raise RuntimeError("failed to call sx_api_host_ifc_open with rc {}, exiting...".format(rc)) - - self.user_channel_p.type = SX_USER_CHANNEL_TYPE_FD - self.user_channel_p.channel.fd = self.rx_fd_p - - # Wait for switch to be created and initialized inside SDK - retry = 0 - swid_cnt_p = new_uint32_t_p() - uint32_t_p_assign(swid_cnt_p, 0) - swid_cnt = 0 - while True: - if retry >= self.SX_OPEN_RETRIES: - raise RuntimeError("switch not created after {} retries and {} seconds waiting, exiting..." - .format(retry, self.SX_OPEN_RETRIES * self.SX_OPEN_TIMEOUT)) - else: - rc = sx_api_port_swid_list_get(self.handle, None, swid_cnt_p) - if rc == SX_STATUS_SUCCESS: - swid_cnt = uint32_t_p_value(swid_cnt_p) - if swid_cnt > 0: - delete_uint32_t_p(swid_cnt_p) - swid_cnt_p = None - break - else: - logger.log_info("switch not created yet, swid_cnt {}, retry {} times and wait for {} seconds" - .format(swid_cnt, retry, self.SX_OPEN_TIMEOUT * retry)) - else: - raise RuntimeError("sx_api_port_swid_list_get fail with rc {}, retry {} times and wait for {} seconds". - format(rc, retry, self.SX_OPEN_TIMEOUT * retry)) - - retry += 1 - time.sleep(self.SX_OPEN_TIMEOUT) - - # After switch was created inside SDK, sx_api_host_ifc_trap_id_register_set is ready to call - rc = sx_api_host_ifc_trap_id_register_set(self.handle, - SX_ACCESS_CMD_REGISTER, - self.swid, - SX_TRAP_ID_PMPE, - self.user_channel_p) - - if rc != SX_STATUS_SUCCESS: - raise RuntimeError("sx_api_host_ifc_trap_id_register_set failed with rc {}, exiting...".format(rc)) - except Exception as e: - logger.log_error("sfp_event initialization failed due to {}, exiting...".format(repr(e))) - if swid_cnt_p is not None: - delete_uint32_t_p(swid_cnt_p) - self.deinitialize() - - def deinitialize(self): - if self.handle is None: - return - - # unregister trap id - rc = sx_api_host_ifc_trap_id_register_set(self.handle, - SX_ACCESS_CMD_DEREGISTER, - self.swid, - SX_TRAP_ID_PMPE, - self.user_channel_p) - if rc != SX_STATUS_SUCCESS: - logger.log_error("sx_api_host_ifc_trap_id_register_set exited with error, rc {}".format(rc)) - - rc = sx_api_host_ifc_close(self.handle, self.rx_fd_p) - if rc != SX_STATUS_SUCCESS: - logger.log_error("sx_api_host_ifc_close exited with error, rc {}".format(rc)) - - rc = sx_api_close(self.handle) - if rc != SX_STATUS_SUCCESS: - logger.log_error("sx_api_close exited with error, rc {}".format(rc)) - - delete_sx_fd_t_p(self.rx_fd_p) - delete_sx_user_channel_t_p(self.user_channel_p) - - def check_sfp_status(self, port_change, error_dict, timeout): - """ - the meaning of timeout is aligned with select.select, which has the following meaning: - 0: poll, returns without blocked - arbitrary positive value: doesn't returns until at least fd in the set is ready or - seconds elapsed - Note: - check_sfp_status makes the use of select to retrieve the notifications, which means - it should has the logic of reading out all the notifications in the fd selected without blocked. - However, it fails to do that due to some sdk API's characteristics: - sx_lib_host_ifc_recv can only read one notification each time and will block when no notification in that fd. - sx_lib_host_ifc_recv_list can return all notification in the fd via a single reading operation but - not supported by PMPE register (I've tested it but failed) - as a result the only way to satisfy the logic is to call sx_lib_host_ifc_recv in a loop until all notifications - has been read and we have to find a way to check that. it seems the only way to check that is via using select. - in this sense, we return one notification each time check_sfp_status called and let the caller, get_change_event, - to repeat calling it with timeout = 0 in a loop until no new notification read (in this case it returns false). - by doing so all the notifications in the fd can be retrieved through a single call to get_change_event. - """ - found = 0 - - try: - read, _, _ = select.select([self.rx_fd_p.fd], [], [], float(timeout) / 1000) - print(read) - except select.error as err: - rc, msg = err - if rc == errno.EAGAIN or rc == errno.EINTR: - return False - else: - raise - - for fd in read: - if fd == self.rx_fd_p.fd: - success, port_list, module_state, error_type = self.on_pmpe(self.rx_fd_p) - print('success = ', success) - if not success: - logger.log_error("failed to read from {}".format(fd)) - break - - sfp_state = sfp_value_status_dict.get(module_state, STATUS_UNKNOWN) - error_description = None - if sfp_state == STATUS_UNKNOWN: - # in the following sequence, STATUS_UNKNOWN can be returned. - # so we shouldn't raise exception here. - # 1. some sfp module is inserted - # 2. sfp_event gets stuck and fails to fetch the change event instantaneously - # 3. and then the sfp module is removed - # 4. sfp_event starts to try fetching the change event - # in this case found is increased so that True will be returned - logger.log_info("unknown module state {}, maybe the port suffers two adjacent insertion/removal".format(module_state)) - found += 1 - continue - - # If get SFP status error(0x3) from SDK, then need to read the error_type to get the detailed error - if sfp_state == STATUS_ERROR: - sfp_state_bits = SDK_ERRORS_TO_ERROR_BITS.get(error_type) - if sfp_state_bits is None: - logger.log_error("Unrecognized error {} detected on ports {}".format(error_type, port_list)) - found += 1 - continue - - if error_type in SDK_SFP_BLOCKING_ERRORS: - # In SFP at error status case, need to overwrite the sfp_state with the exact error code - sfp_state_bits |= SFP.SFP_ERROR_BIT_BLOCKING - - # An error should be always set along with 'INSERTED' - sfp_state_bits |= SFP.SFP_STATUS_BIT_INSERTED - - # For vendor specific errors, the description should be returned as well - error_description = SDK_ERRORS_TO_DESCRIPTION.get(error_type) - - sfp_state = str(sfp_state_bits) - - for port in port_list: - logger.log_info("SFP on port {} state {}".format(port, sfp_state)) - port_change[port+1] = sfp_state - if error_description: - error_dict[port+1] = error_description - found += 1 - - return found != 0 - - def on_pmpe(self, fd_p): - ''' on port module plug event handler ''' - - # recv parameters - pkt_size = PMPE_PACKET_SIZE - pkt_size_p = new_uint32_t_p() - uint32_t_p_assign(pkt_size_p, pkt_size) - pkt = new_uint8_t_arr(pkt_size) - recv_info_p = new_sx_receive_info_t_p() - pmpe_t = sx_event_pmpe_t() - port_cnt_p = new_uint32_t_p() - uint32_t_p_assign(port_cnt_p, 0) - label_port_list = [] - module_state = 0 - error_type = pmpe_t.error_type - - rc = sx_lib_host_ifc_recv(fd_p, pkt, pkt_size_p, recv_info_p) - if rc != 0: - logger.log_error("sx_lib_host_ifc_recv exited with error, rc %d" % rc) - status = False - else: - status = True - unknown = False - pmpe_t = recv_info_p.event_info.pmpe - port_list_size = pmpe_t.list_size - logical_port_list = pmpe_t.log_port_list - module_state = pmpe_t.module_state - error_type = pmpe_t.error_type - module_id = pmpe_t.module_id - slot_id = pmpe_t.slot_id # For non-modular chassis, it should return 0 - - if module_state == SDK_SFP_STATE_ERR: - logger.log_error("Receive PMPE error event on module {}: status {} error type {}".format(module_id, module_state, error_type)) - elif module_state == SDK_SFP_STATE_DIS: - logger.log_notice("Receive PMPE disable event on module {}: status {}".format(module_id, module_state)) - elif module_state == SDK_SFP_STATE_IN or module_state == SDK_SFP_STATE_OUT: - logger.log_notice("Receive PMPE plug in/out event on module {}: status {}".format(module_id, module_state)) - elif module_state == SDK_SFP_STATE_UNKNOWN: - unknown = True - else: - logger.log_error("Receive PMPE unknown event on module {}: status {}".format(module_id, module_state)) - - # Call sx_api_port_device_get with port_cnt_p=0, SDK will return the logical port number - rc = sx_api_port_device_get(self.handle, 1, 0, None, port_cnt_p) - if rc != SX_STATUS_SUCCESS: - logger.log_error("Failed to get logical port number") - status = False - else: - port_cnt = uint32_t_p_value(port_cnt_p) - port_attributes_list = new_sx_port_attributes_t_arr(port_cnt) - rc = sx_api_port_device_get(self.handle, 1, 0, port_attributes_list, port_cnt_p) - if rc != SX_STATUS_SUCCESS: - logger.log_error("Failed to get logical port attributes") - status = False - else: - for i in range(port_list_size): - label_port = None - logical_port = sx_port_log_id_t_arr_getitem(logical_port_list, i) - for j in range(port_cnt): - port_attributes = sx_port_attributes_t_arr_getitem(port_attributes_list,j) - if port_attributes.log_port == logical_port: - label_port = slot_id * DeviceDataManager.get_linecard_max_port_count() + port_attributes.port_mapping.module_port - break - - if label_port is not None: - label_port_list.append(label_port) - delete_sx_port_attributes_t_arr(port_attributes_list) - - if unknown: - SFP_ports_with_unknown_event = set(label_port_list) - self.RJ45_port_set - if SFP_ports_with_unknown_event: - logger.log_error("Receive PMPE unknown event on module {}: status {}".format(module_id, module_state)) - else: - # For RJ45 ports, we treat unknown as disconnect - module_state = SDK_SFP_STATE_DIS - - delete_uint32_t_p(pkt_size_p) - delete_uint8_t_arr(pkt) - delete_sx_receive_info_t_p(recv_info_p) - delete_uint32_t_p(port_cnt_p) - - if not label_port_list: - logger.log_error('Dropping PMPE event due to label port not found') - - return status, label_port_list, module_state, error_type diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/state_machine.py b/platform/mellanox/mlnx-platform-api/sonic_platform/state_machine.py new file mode 100644 index 000000000000..d7b6faf10c37 --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/state_machine.py @@ -0,0 +1,168 @@ +# +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from sonic_py_common.logger import Logger + +logger = Logger() + + +class State: + """Represent a state in a state machine + """ + def __init__(self, name): + self.name = name + self.entry_action = None + self.leave_action = None + self.transitions = {} + self.event_actions = {} + + def set_entry_action(self, action_name): + """Set an action when entering this state + + Args: + action_name (str): action name + + Returns: + object: self + """ + self.entry_action = action_name + return self + + def set_leave_action(self, action_name): + """Set a leave action when leaving the state + + Args: + action_name (str): action name + + Returns: + object: self + """ + self.leave_action = action_name + return self + + def add_transition(self, event, next_state, event_action=None): + """Add a transition item to this state + + Args: + event (str): event name + next_state (str): next state that the state entity will transit to upon this event. + event_action (str): action called when event arrives + + Raises: + RuntimeError: raise if the event is already in the transition table + + Returns: + object: self + """ + if event in self.transitions: + raise RuntimeError(f'event {event} already exists in transition table of state {self.name}') + + self.transitions[event] = next_state + + if event_action: + if event in self.event_actions: + raise RuntimeError(f'event {event} already exists in action table of state {self.name}') + self.event_actions[event] = event_action + return self + + def on_enter(self, entity): + """Called when state entity enters the state + + Args: + entity (obj): state entity + """ + if self.entry_action: + logger.log_debug(f'{entity} entered state [{self.name}] and is triggering action [{self.entry_action}]') + entity.on_action(self.entry_action) + else: + logger.log_debug(f'{entity} entered state [{self.name}]') + + def on_leave(self, entity): + """Called when state entity leaves the state + + Args: + entity (obj): state entity + """ + if self.leave_action: + entity.on_action(self.leave_action) + + def on_event(self, entity, event): + """Called when state entity has got an event + + Args: + entity (object): state entity + event (str): event name + + Returns: + str: next event name + """ + if event not in self.transitions: + logger.log_error(f'{event} is not defined in state {self.name}') + return self.name + else: + if event in self.event_actions: + entity.on_action(self.event_actions[event]) + return self.transitions[event] + + +class StateMachine: + def __init__(self): + self.states = {} + + def add_state(self, state_name): + """Register a state to state machine + + Args: + state_name (str): name of the state + + Raises: + RuntimeError: raise if state name already exists + + Returns: + object: the new state object + """ + if state_name in self.states: + raise RuntimeError(f'state {state_name} already exists') + + state = State(state_name) + self.states[state_name] = state + return state + + def on_event(self, entity, event): + """Called when an event occurs + + Args: + entity (object): state entity + event (str): event name + + Raises: + RuntimeError: raise if the current state is not registered + RuntimeError: raise if next state is not registered + """ + current_state_name = entity.get_state() + if current_state_name not in self.states: + raise RuntimeError(f'Unknown state {current_state_name}') + + current_state = self.states[current_state_name] + next_state_name = current_state.on_event(entity, event) + logger.log_debug(f'{entity} has got event [{event}], it is changing from state [{current_state}] to [{next_state_name}]') + if next_state_name not in self.states: + raise RuntimeError(f'Unknown next state {next_state_name}') + if next_state_name != current_state_name: + current_state.on_leave(entity) + entity.change_state(next_state_name) + self.states[next_state_name].on_enter(entity) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py index 5c118b4c9a07..3512a0cf52e5 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. +# Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. # Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -33,7 +33,7 @@ def initialize(cls): and any other vendor specific initialization. :return: """ - if DeviceDataManager.is_independent_mode(): + if DeviceDataManager.is_module_host_management_mode(): from .chassis import Chassis cls.thermal_updater_task = thermal_updater.ThermalUpdater(Chassis.chassis_instance.get_all_sfps()) cls.thermal_updater_task.start() @@ -46,5 +46,5 @@ def deinitialize(cls): is a no-op. :return: """ - if DeviceDataManager.is_independent_mode() and cls.thermal_updater_task: + if DeviceDataManager.is_module_host_management_mode() and cls.thermal_updater_task: cls.thermal_updater_task.stop() diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_updater.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_updater.py index f2f0f75b2fd1..889bc96d3bec 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_updater.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_updater.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. # Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -81,10 +81,6 @@ def load_tc_config(self): def start(self): self.clean_thermal_data() - if not self.wait_all_sfp_ready(): - logger.log_error('Failed to wait for all SFP ready, will put hw-management-tc to suspend') - self.control_tc(True) - return self.control_tc(False) self.load_tc_config() self._timer.start() @@ -106,25 +102,6 @@ def clean_thermal_data(self): sfp.sdk_index + 1 ) - def wait_all_sfp_ready(self): - logger.log_notice('Waiting for all SFP modules ready...') - max_wait_time = 300 - ready_set = set() - while len(ready_set) != len(self._sfp_list): - for sfp in self._sfp_list: - try: - sfp.is_sw_control() - ready_set.add(sfp) - except: - continue - max_wait_time -= 1 - if max_wait_time == 0: - return False - time.sleep(1) - - logger.log_notice('All SFP modules are ready') - return True - def get_asic_temp(self): temperature = utils.read_int_from_file('/sys/module/sx_core/asic0/temperature/input', default=None) return temperature * ASIC_TEMPERATURE_SCALE if temperature is not None else None diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py b/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py index a7354ac7b864..77aad4a315c7 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. +# Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. # Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -282,11 +282,13 @@ def wait_until(predict, timeout, interval=1, *args, **kwargs): Returns: _type_: _description_ """ + if predict(*args, **kwargs): + return True while timeout > 0: - if predict(*args, **kwargs): - return True time.sleep(interval) timeout -= interval + if predict(*args, **kwargs): + return True return False diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/wait_sfp_ready_task.py b/platform/mellanox/mlnx-platform-api/sonic_platform/wait_sfp_ready_task.py new file mode 100644 index 000000000000..56b1f479fd44 --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/wait_sfp_ready_task.py @@ -0,0 +1,139 @@ +# +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import copy +import threading +import time +from sonic_py_common.logger import Logger + +logger = Logger() +EMPTY_SET = set() + + +class WaitSfpReadyTask(threading.Thread): + """When bring a module from powered off to powered on, it takes 3 seconds + for module to load its firmware. This class is designed to perform a wait for + those modules who are loading firmware. + """ + WAIT_TIME = 3 + + def __init__(self): + # Set daemon to True so that the thread will be destroyed when daemon exits. + super().__init__(daemon=True) + self.running = False + + # Lock to protect the wait list + self.lock = threading.Lock() + + # Event to wake up thread function + self.event = threading.Event() + + # A list of SFP to be waited. Key is SFP index, value is the expire time. + self._wait_dict = {} + + # The queue to store those SFPs who finish loading firmware. + self._ready_set = set() + + def stop(self): + """Stop the task, only used in unit test + """ + self.running = False + self.event.set() + + def schedule_wait(self, sfp_index): + """Add a SFP to the wait list + + Args: + sfp_index (int): the index of the SFP object + """ + logger.log_debug(f'SFP {sfp_index} is scheduled for waiting reset done') + with self.lock: + if len(self._wait_dict) == 0: + is_empty = True + # The item will be expired in 3 seconds + self._wait_dict[sfp_index] = time.time() + self.WAIT_TIME + + if is_empty: + logger.log_debug('An item arrives, wake up WaitSfpReadyTask') + # wake up the thread + self.event.set() + + def cancel_wait(self, sfp_index): + """Cancel a SFP from the wait list + + Args: + sfp_index (int): the index of the SFP object + """ + logger.log_debug(f'SFP {sfp_index} is canceled for waiting reset done') + with self.lock: + if sfp_index in self._wait_dict: + self._wait_dict.pop(sfp_index) + if sfp_index in self._ready_set: + self._ready_set.pop(sfp_index) + + def get_ready_set(self): + """Get ready set and clear it + + Returns: + set: a deep copy of self._ready_set + """ + with self.lock: + if not self._ready_set: + return EMPTY_SET + ready_set = copy.deepcopy(self._ready_set) + self._ready_set.clear() + return ready_set + + def empty(self): + """Indicate if wait_dict is empty + + Returns: + bool: True if wait_dict is empty + """ + with self.lock: + return len(self._wait_dict) == 0 + + def run(self): + """Thread function + """ + self.running = True + pending_remove_set = set() + is_empty = True + while self.running: + if is_empty: + logger.log_debug(f'WaitSfpReadyTask is waiting for task...') + # If wait_dict is empty, hold the thread until an item coming + self.event.wait() + self.event.clear() + + now = time.time() + with self.lock: + logger.log_debug(f'Processing wait SFP dict: {self._wait_dict}, now={now}') + for sfp_index, expire_time in self._wait_dict.items(): + # If now time is greater than the expire time, remove + # the item from wait_dict + if now >= expire_time: + pending_remove_set.add(sfp_index) + + for sfp_index in pending_remove_set: + self._wait_dict.pop(sfp_index) + self._ready_set.add(sfp_index) + + is_empty = (len(self._wait_dict) == 0) + + pending_remove_set.clear() + time.sleep(1) diff --git a/platform/mellanox/mlnx-platform-api/tests/test_change_event.py b/platform/mellanox/mlnx-platform-api/tests/test_change_event.py new file mode 100644 index 000000000000..83992901429c --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/tests/test_change_event.py @@ -0,0 +1,219 @@ +# +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import sys + +if sys.version_info.major == 3: + from unittest import mock +else: + import mock + +test_path = os.path.dirname(os.path.abspath(__file__)) +modules_path = os.path.dirname(test_path) +sys.path.insert(0, modules_path) + +from sonic_platform import chassis +from sonic_platform import sfp + + +class TestChangeEvent: + @mock.patch('sonic_platform.sfp.SFP.get_fd_for_polling_legacy') + @mock.patch('select.poll') + @mock.patch('time.time') + @mock.patch('sonic_platform.device_data.DeviceDataManager.is_module_host_management_mode', mock.MagicMock(return_value=False)) + @mock.patch('sonic_platform.device_data.DeviceDataManager.get_sfp_count', mock.MagicMock(return_value=1)) + @mock.patch('sonic_platform.chassis.extract_RJ45_ports_index', mock.MagicMock(return_value=[])) + @mock.patch('sonic_platform.sfp.SFP.get_module_status') + def test_get_change_event_legacy(self, mock_status, mock_time, mock_create_poll, mock_get_fd): + c = chassis.Chassis() + s = c.get_sfp(1) + + mock_status.return_value = sfp.SFP_STATUS_INSERTED + + # mock poll object + mock_poll = mock.MagicMock() + mock_create_poll.return_value = mock_poll + mock_poll.poll = mock.MagicMock(return_value = []) + + # mock file descriptor for polling + mock_file = mock.MagicMock() + mock_get_fd.return_value = mock_file + mock_file.fileno = mock.MagicMock(return_value = 1) + + timeout = 1000 + # mock time function so that the while loop exit early + mock_time.side_effect = [0, timeout] + + # no event, expect returning empty change event + _, change_event = c.get_change_event(timeout) + assert 'sfp' in change_event and not change_event['sfp'] + + # dummy event, expect returning empty change event + sfp_index = s.sdk_index + 1 + mock_poll.poll.return_value = [(1, 10)] + mock_time.side_effect = [0, timeout] + _, change_event = c.get_change_event(timeout) + assert 'sfp' in change_event and not change_event['sfp'] + + # plug out event, expect returning remove event + mock_time.side_effect = [0, timeout] + mock_status.return_value = sfp.SFP_STATUS_REMOVED + _, change_event = c.get_change_event(timeout) + assert 'sfp' in change_event and sfp_index in change_event['sfp'] and change_event['sfp'][sfp_index] == sfp.SFP_STATUS_REMOVED + + # error event, expect returning error event + mock_time.side_effect = [0, timeout] + mock_status.return_value = sfp.SFP_STATUS_ERROR + s.get_error_info_from_sdk_error_type = mock.MagicMock(return_value=('2', 'some error')) + _, change_event = c.get_change_event(timeout) + assert 'sfp' in change_event and sfp_index in change_event['sfp'] and change_event['sfp'][sfp_index] == '2' + assert 'sfp_error' in change_event and sfp_index in change_event['sfp_error'] and change_event['sfp_error'][sfp_index] == 'some error' + + @mock.patch('sonic_platform.sfp.SFP.get_fd') + @mock.patch('select.poll') + @mock.patch('time.time') + @mock.patch('sonic_platform.device_data.DeviceDataManager.is_module_host_management_mode', mock.MagicMock(return_value=True)) + @mock.patch('sonic_platform.device_data.DeviceDataManager.get_sfp_count', mock.MagicMock(return_value=1)) + @mock.patch('sonic_platform.chassis.extract_RJ45_ports_index', mock.MagicMock(return_value=[])) + @mock.patch('sonic_platform.module_host_mgmt_initializer.ModuleHostMgmtInitializer.initialize', mock.MagicMock()) + def test_get_change_event_for_module_host_management_mode(self, mock_time, mock_create_poll, mock_get_fd): + """Test steps: + 1. Simulate polling with no event + 2. Simulate polling the first dummy event. (SDK always return a event when first polling the fd even if there is no change) + 3. Simulate a plug out event, module transfer from sw control to not present + 4. Simulate plugging in a fw control module, module transfer to fw control + 5. Simulate an error event + 6. Simulate a plug out event, module transfer from fw control to not present + 7. Simulate plugging in a sw control module, module transfer to sw control + 8. Simulate a power bad event, module transfer from sw control to power bad + 9. Simulate a power good event, module transfer from power bad to sw control + """ + c = chassis.Chassis() + c.initialize_sfp() + s = c._sfp_list[0] + s.state = sfp.STATE_SW_CONTROL + + # mock poll object + mock_poll = mock.MagicMock() + mock_create_poll.return_value = mock_poll + mock_poll.poll = mock.MagicMock(return_value = []) + + # mock file descriptors for polling + mock_hw_present_file = mock.MagicMock() + mock_power_good_file = mock.MagicMock() + mock_present_file = mock.MagicMock() + mock_hw_present_file.read = mock.MagicMock(return_value=sfp.SFP_STATUS_INSERTED) + mock_hw_present_file.fileno = mock.MagicMock(return_value = 1) + mock_power_good_file.read = mock.MagicMock(return_value=1) + mock_power_good_file.fileno = mock.MagicMock(return_value = 2) + mock_present_file.read = mock.MagicMock(return_value=sfp.SFP_STATUS_INSERTED) + mock_present_file.fileno = mock.MagicMock(return_value = 3) + def get_fd(fd_type): + if fd_type == 'hw_present': + return mock_hw_present_file + elif fd_type == 'power_good': + return mock_power_good_file + else: + return mock_present_file + mock_get_fd.side_effect = get_fd + + timeout = 1000 + # mock time function so that the while loop exit early + mock_time.side_effect = [0, timeout] + + # no event, expect returning empty change event + _, change_event = c.get_change_event(timeout) + assert 'sfp' in change_event and not change_event['sfp'] + + # dummy event, expect returning empty change event + sfp_index = s.sdk_index + 1 + mock_poll.poll.return_value = [(1, 10)] + mock_time.side_effect = [0, timeout] + _, change_event = c.get_change_event(timeout) + assert 'sfp' in change_event and not change_event['sfp'] + + # plug out event, expect returning remove event + mock_time.side_effect = [0, timeout] + mock_hw_present_file.read.return_value = sfp.SFP_STATUS_REMOVED + _, change_event = c.get_change_event(timeout) + assert 'sfp' in change_event and sfp_index in change_event['sfp'] and change_event['sfp'][sfp_index] == sfp.SFP_STATUS_REMOVED + assert s.state == sfp.STATE_NOT_PRESENT + + # plug in with a fw control cable, expect returning insert event + s.get_hw_present = mock.MagicMock(return_value=True) + s.get_power_on = mock.MagicMock(return_value=True) + s.get_reset_state = mock.MagicMock(return_value=True) + s.get_power_good = mock.MagicMock(return_value=True) + s.determine_control_type = mock.MagicMock(return_value=sfp.SFP_FW_CONTROL) + s.set_control_type = mock.MagicMock() + mock_time.side_effect = [0, timeout] + mock_hw_present_file.read.return_value = sfp.SFP_STATUS_INSERTED + _, change_event = c.get_change_event(timeout) + assert 'sfp' in change_event and sfp_index in change_event['sfp'] and change_event['sfp'][sfp_index] == sfp.SFP_STATUS_INSERTED + assert s.state == sfp.STATE_FW_CONTROL + assert 1 not in c.registered_fds # stop polling hw_present + assert 2 not in c.registered_fds # stop polling power_good + assert 3 in c.registered_fds # start polling present because it is firmware control + print(c.registered_fds) + + # error event, expect returning error + mock_time.side_effect = [0, timeout] + mock_poll.poll.return_value = [(3, 10)] + mock_present_file.read.return_value = sfp.SFP_STATUS_ERROR + s.get_error_info_from_sdk_error_type = mock.MagicMock(return_value=('2', 'some error')) + _, change_event = c.get_change_event(timeout) + assert 'sfp' in change_event and sfp_index in change_event['sfp'] and change_event['sfp'][sfp_index] == '2' + assert 'sfp_error' in change_event and sfp_index in change_event['sfp_error'] and change_event['sfp_error'][sfp_index] == 'some error' + + # plug out the firmware control cable, expect returning remove event + mock_time.side_effect = [0, timeout] + mock_present_file.read.return_value = sfp.SFP_STATUS_REMOVED + _, change_event = c.get_change_event(timeout) + assert 'sfp' in change_event and sfp_index in change_event['sfp'] and change_event['sfp'][sfp_index] == sfp.SFP_STATUS_REMOVED + assert s.state == sfp.STATE_NOT_PRESENT + assert 1 in c.registered_fds # start polling hw_present because cable is not present, always assume software control + assert 2 in c.registered_fds # start polling power_good because cable is not present, always assume software control + assert 3 not in c.registered_fds # stop polling present + + # plug in a software control cable, expect returning insert event + mock_time.side_effect = [0, timeout] + mock_poll.poll.return_value = [(1, 10)] + mock_hw_present_file.read.return_value = sfp.SFP_STATUS_INSERTED + s.determine_control_type.return_value = sfp.SFP_SW_CONTROL + s.check_power_capability = mock.MagicMock(return_value=True) + s.update_i2c_frequency = mock.MagicMock() + s.disable_tx_for_sff_optics = mock.MagicMock() + _, change_event = c.get_change_event(timeout) + assert 'sfp' in change_event and sfp_index in change_event['sfp'] and change_event['sfp'][sfp_index] == sfp.SFP_STATUS_INSERTED + assert s.state == sfp.STATE_SW_CONTROL + + # power bad event, expect returning error event + mock_time.side_effect = [0, timeout] + mock_poll.poll.return_value = [(2, 10)] + mock_power_good_file.read.return_value = '0' + _, change_event = c.get_change_event(timeout) + assert 'sfp' in change_event and sfp_index in change_event['sfp'] and change_event['sfp'][sfp_index] == '5' + assert s.state == sfp.STATE_POWER_BAD + + # power good event, expect returning insert event + mock_time.side_effect = [0, timeout] + mock_poll.poll.return_value = [(2, 10)] + mock_power_good_file.read.return_value = '1' + _, change_event = c.get_change_event(timeout) + assert 'sfp' in change_event and sfp_index in change_event['sfp'] and change_event['sfp'][sfp_index] == '1' + assert s.state == sfp.STATE_SW_CONTROL diff --git a/platform/mellanox/mlnx-platform-api/tests/test_chassis.py b/platform/mellanox/mlnx-platform-api/tests/test_chassis.py index ad23cd8dde85..49dfa8ff3fa9 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_chassis.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_chassis.py @@ -124,6 +124,7 @@ def test_fan(self): chassis._fan_drawer_list = [] assert chassis.get_num_fan_drawers() == 2 + @mock.patch('sonic_platform.device_data.DeviceDataManager.is_module_host_management_mode', mock.MagicMock(return_value=False)) def test_sfp(self): # Test get_num_sfps, it should not create any SFP objects DeviceDataManager.get_sfp_count = mock.MagicMock(return_value=3) @@ -176,6 +177,7 @@ def test_sfp(self): assert chassis.get_num_sfps() == 6 sonic_platform.chassis.extract_RJ45_ports_index = mock.MagicMock(return_value=[]) + @mock.patch('sonic_platform.device_data.DeviceDataManager.is_module_host_management_mode', mock.MagicMock(return_value=False)) def test_create_sfp_in_multi_thread(self): DeviceDataManager.get_sfp_count = mock.MagicMock(return_value=3) @@ -199,25 +201,6 @@ def test_create_sfp_in_multi_thread(self): assert s.sdk_index == index iteration_num -= 1 - - @mock.patch('sonic_platform.device_data.DeviceDataManager.get_sfp_count', MagicMock(return_value=3)) - def test_change_event(self): - chassis = Chassis() - chassis.modules_mgmt_thread.is_alive = MagicMock(return_value=True) - chassis.modules_changes_queue.get = MagicMock(return_value={1: '1'}) - - # Call get_change_event with timeout=0, wait until an event is detected - status, event_dict = chassis.get_change_event() - assert status is True - assert 'sfp' in event_dict and event_dict['sfp'][1] == '1' - assert len(chassis._sfp_list) == 3 - - # Call get_change_event with timeout=1.0 - chassis.modules_changes_queue.get.return_value = {} - status, event_dict = chassis.get_change_event(timeout=1.0) - assert status is True - assert 'sfp' in event_dict and not event_dict['sfp'] - @mock.patch('sonic_platform.chassis.Chassis._wait_reboot_cause_ready', MagicMock(return_value=True)) def test_reboot_cause(self): from sonic_platform import utils diff --git a/platform/mellanox/mlnx-platform-api/tests/test_device_data.py b/platform/mellanox/mlnx-platform-api/tests/test_device_data.py index c172b82a30b7..f67793419091 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_device_data.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_device_data.py @@ -54,11 +54,11 @@ def test_get_bios_component(self): @mock.patch('sonic_py_common.device_info.get_paths_to_platform_and_hwsku_dirs', mock.MagicMock(return_value=('', '/tmp'))) @mock.patch('sonic_platform.device_data.utils.read_key_value_file') - def test_is_independent_mode(self, mock_read): + def test_is_module_host_management_mode(self, mock_read): mock_read.return_value = {} - assert not DeviceDataManager.is_independent_mode() + assert not DeviceDataManager.is_module_host_management_mode() mock_read.return_value = {'SAI_INDEPENDENT_MODULE_MODE': '1'} - assert DeviceDataManager.is_independent_mode() + assert DeviceDataManager.is_module_host_management_mode() @mock.patch('sonic_py_common.device_info.get_path_to_platform_dir', mock.MagicMock(return_value='/tmp')) @mock.patch('sonic_platform.device_data.utils.load_json_file') @@ -74,7 +74,7 @@ def test_get_sfp_count(self, mock_load_json): @mock.patch('sonic_platform.device_data.DeviceDataManager.get_sfp_count', mock.MagicMock(return_value=3)) @mock.patch('sonic_platform.device_data.utils.read_int_from_file', mock.MagicMock(return_value=1)) @mock.patch('sonic_platform.device_data.os.path.exists') - @mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode') + @mock.patch('sonic_platform.device_data.DeviceDataManager.is_module_host_management_mode') def test_wait_platform_ready(self, mock_is_indep, mock_exists): mock_exists.return_value = True mock_is_indep.return_value = True diff --git a/platform/mellanox/mlnx-platform-api/tests/test_module_initializer.py b/platform/mellanox/mlnx-platform-api/tests/test_module_initializer.py new file mode 100644 index 000000000000..ad833a70f85c --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/tests/test_module_initializer.py @@ -0,0 +1,98 @@ +# +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import sys + +if sys.version_info.major == 3: + from unittest import mock +else: + import mock + +test_path = os.path.dirname(os.path.abspath(__file__)) +modules_path = os.path.dirname(test_path) +sys.path.insert(0, modules_path) + +from sonic_platform import chassis +from sonic_platform import module_host_mgmt_initializer + + +class TestModuleInitializer: + @mock.patch('os.path.exists') + @mock.patch('sonic_platform.utils.wait_until') + @mock.patch('sonic_platform.utils.is_host') + def test_wait_module_ready(self, mock_is_host, mock_wait, mock_exists): + initializer = module_host_mgmt_initializer.ModuleHostMgmtInitializer() + mock_is_host.return_value = True + mock_exists.return_value = False + mock_wait.return_value = True + initializer.wait_module_ready() + mock_exists.assert_called_with(module_host_mgmt_initializer.MODULE_READY_HOST_FILE) + assert initializer.initialized + + initializer.initialized = False + mock_is_host.return_value = False + initializer.wait_module_ready() + mock_exists.assert_called_with(module_host_mgmt_initializer.MODULE_READY_CONTAINER_FILE) + + initializer.initialized = False + mock_exists.return_value = True + initializer.wait_module_ready() + assert initializer.initialized + + initializer.initialized = False + mock_wait.return_value = False + mock_exists.return_value = False + initializer.wait_module_ready() + assert not initializer.initialized + + + @mock.patch('sonic_platform.chassis.extract_RJ45_ports_index', mock.MagicMock(return_value=[])) + @mock.patch('sonic_platform.device_data.DeviceDataManager.get_sfp_count', mock.MagicMock(return_value=1)) + @mock.patch('sonic_platform.sfp.SFP.initialize_sfp_modules', mock.MagicMock()) + @mock.patch('sonic_platform.module_host_mgmt_initializer.ModuleHostMgmtInitializer.is_initialization_owner') + @mock.patch('sonic_platform.module_host_mgmt_initializer.ModuleHostMgmtInitializer.wait_module_ready') + @mock.patch('sonic_platform.utils.is_host') + def test_initialize(self, mock_is_host, mock_wait_ready, mock_owner): + c = chassis.Chassis() + initializer = module_host_mgmt_initializer.ModuleHostMgmtInitializer() + mock_is_host.return_value = True + mock_owner.return_value = False + # called from host side, just wait + initializer.initialize(c) + mock_wait_ready.assert_called_once() + mock_wait_ready.reset_mock() + + mock_is_host.return_value = False + # non-initializer-owner called from container side, just wait + initializer.initialize(c) + mock_wait_ready.assert_called_once() + mock_wait_ready.reset_mock() + + mock_owner.return_value = True + initializer.initialize(c) + mock_wait_ready.assert_not_called() + assert initializer.initialized + assert module_host_mgmt_initializer.initialization_owner + assert os.path.exists(module_host_mgmt_initializer.MODULE_READY_CONTAINER_FILE) + + module_host_mgmt_initializer.clean_up() + assert not os.path.exists(module_host_mgmt_initializer.MODULE_READY_CONTAINER_FILE) + + def test_is_initialization_owner(self): + initializer = module_host_mgmt_initializer.ModuleHostMgmtInitializer() + assert not initializer.is_initialization_owner() diff --git a/platform/mellanox/mlnx-platform-api/tests/test_modules_mgmt.py b/platform/mellanox/mlnx-platform-api/tests/test_modules_mgmt.py deleted file mode 100644 index d0cab978cf2f..000000000000 --- a/platform/mellanox/mlnx-platform-api/tests/test_modules_mgmt.py +++ /dev/null @@ -1,800 +0,0 @@ -# -# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. -# Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os -import queue -import sys -import threading -import time -import types -import unittest - -from mock import MagicMock, patch, mock_open, Mock -if sys.version_info.major == 3: - from unittest import mock -else: - import mock - -test_path = os.path.dirname(os.path.abspath(__file__)) -modules_path = os.path.dirname(test_path) -sys.path.insert(0, modules_path) - -from sonic_platform.device_data import DeviceDataManager -from sonic_py_common import device_info -from sonic_platform import modules_mgmt -from sonic_platform.modules_mgmt import ModulesMgmtTask -from sonic_platform_base.sonic_xcvr.api.public.cmis import CmisApi -from sonic_platform_base.sonic_xcvr.xcvr_eeprom import XcvrEeprom -from sonic_platform_base.sonic_xcvr.codes.public.cmis import CmisCodes -from sonic_platform_base.sonic_xcvr.mem_maps.public.cmis import CmisMemMap -from sonic_platform_base.sonic_xcvr.fields import consts - -DEFAULT_NUM_OF_PORTS_1 = 1 -DEFAULT_NUM_OF_PORTS_3 = 3 -DEFAULT_NUM_OF_PORTS_32 = 32 -POLLER_EXECUTED = False - -def _mock_sysfs_default_file_content(): - return { - modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format("0"): "1", - modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format("1"): "1", - modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format("2"): "1", - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format("0"): "1", - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format("1"): "1", - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format("2"): "1", - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format("0"): "1", - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format("1"): "1", - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format("2"): "1", - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_LIMIT.format("0"): "48", - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_LIMIT.format("1"): "48", - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_LIMIT.format("2"): "48", - modules_mgmt.SYSFS_INDEPENDENT_FD_FREQ_SUPPORT.format("0"): "0", - modules_mgmt.SYSFS_INDEPENDENT_FD_FREQ_SUPPORT.format("1"): "0", - modules_mgmt.SYSFS_INDEPENDENT_FD_FREQ_SUPPORT.format("2"): "0", - modules_mgmt.SYSFS_INDEPENDENT_FD_HW_RESET: "", - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_LIMIT: "48", - modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("0"): "1", - modules_mgmt.SYSFS_INDEPENDENT_FD_FW_CONTROL.format("0"): "1", - modules_mgmt.SYSFS_INDEPENDENT_FD_FW_CONTROL.format("1"): "1", - modules_mgmt.SYSFS_INDEPENDENT_FD_FW_CONTROL.format("2"): "1", - modules_mgmt.SYSFS_LEGACY_FD_PRESENCE: "1", - modules_mgmt.PROC_CMDLINE: "" - } - - -mock_file_content = _mock_sysfs_default_file_content() - - -class MockPoller: - - def __init__(self, modules_mgmt_task_stopping_event, modules_mgmt_thrd=None, num_of_ports=3, port_plug_out=False - , feature_enabled=True, warm_reboot=False, port_plug_in=False, sleep_timeout=False): - self.fds_dict = {} - self.poller_iteration_count = 0 - self.modules_mgmt_task_stopping_event = modules_mgmt_task_stopping_event - self.modules_mgmt_thrd = modules_mgmt_thrd - self.num_of_ports = num_of_ports - self.port_plug_out = port_plug_out - self.port_plug_in = port_plug_in - self.feature_enabled = feature_enabled - self.warm_reboot = warm_reboot - self.port_plug_out_changed = False - self.port_plug_in_changed = False - self.sleep_timeout = sleep_timeout - - def register(self, fd, attrs): - self.fds_dict[fd.fileno()] = { fd : attrs } - assert fd.fileno() in self.fds_dict - - def unregister(self, fd): - if fd.fileno() in self.fds_dict.keys(): - del self.fds_dict[fd.fileno()] - assert fd.fileno() not in self.fds_dict.keys() - - def poll(self, timeout=1000): - global POLLER_EXECUTED - assert len(self.modules_mgmt_thrd.sfp_port_dict_initial) == self.num_of_ports - assert self.modules_mgmt_thrd.is_supported_indep_mods_system == self.feature_enabled - # counting the number of poller iterations to know when to do the checks after plug out (and plug in) - # have to check at least on iteration 7 to let ports reach final state - self.poller_iteration_count += 1 - if self.num_of_ports > 0: - if not self.port_plug_out_changed: - if self.port_plug_out: - # return first fd registered with some made up event number 870 - fd_no_to_return = list(self.fds_dict.keys())[0] - fd = list(self.fds_dict[fd_no_to_return].keys())[0] - fd.set_file_int_content(0) - event_to_return = 870 - self.port_plug_out_changed = True - return [(fd_no_to_return, event_to_return)] - if not self.port_plug_in_changed: - if self.port_plug_in: - # return first fd registered with some made up event number 871 - fd_no_to_return = list(self.fds_dict.keys())[0] - fd = list(self.fds_dict[fd_no_to_return].keys())[0] - fd.set_file_int_content(1) - event_to_return = 871 - self.port_plug_in_changed = True - return [(fd_no_to_return, event_to_return)] - if 7 == self.poller_iteration_count: - # when feature is enabled, need to check for each port both power_good and hw_present sysfs for - # cmis non-flat memory cables - num_of_sysfs_to_check = self.num_of_ports if (not self.port_plug_out or not self.feature_enabled - or self.warm_reboot) else self.num_of_ports * 2 - for i in range(num_of_sysfs_to_check): - # when feature is enabled, power_good sysfs is also registered for cmis non-flat memory cables - # so each SW controlled port has 2 fds registered - port_to_test = i if not self.feature_enabled else int(i / 2) - assert self.modules_mgmt_thrd.sfp_port_dict_initial[port_to_test].port_num == port_to_test - assert self.modules_mgmt_thrd.sfp_port_dict_initial[ - port_to_test].initial_state == modules_mgmt.STATE_HW_NOT_PRESENT - if self.feature_enabled: - module_obj = self.modules_mgmt_thrd.fds_mapping_to_obj[list(self.fds_dict.keys())[i]][ - 'module_obj'] - assert module_obj.port_num == port_to_test - if not self.warm_reboot: - # in tests other than warm reboot it creates only SW control ports - if not self.port_plug_out: - assert module_obj.final_state == modules_mgmt.STATE_SW_CONTROL - else: - assert module_obj.final_state == modules_mgmt.STATE_HW_NOT_PRESENT - else: - if not self.port_plug_out: - assert module_obj.final_state == modules_mgmt.STATE_HW_PRESENT - # in warm reboot test with plug out plug in test creates only FW control ports - elif self.port_plug_out and self.port_plug_in: - assert module_obj.final_state == modules_mgmt.STATE_FW_CONTROL - else: - assert module_obj.final_state == modules_mgmt.STATE_HW_NOT_PRESENT - POLLER_EXECUTED = True - self.modules_mgmt_task_stopping_event.set() - if self.sleep_timeout: - time.sleep(timeout/1000) - return [] - - -class MockOpen: - - def __init__(self, name='', file_no=None, indep_mode_supported=True): - self.name = name - self.file_no = file_no - self.indep_mode_supported = indep_mode_supported - self.retint = None - self.curr = 0 - - def read(self): - if self.fileno() in [SAI_PROFILE_FD_FILENO]: - pass - else: - # if return value was changed, i.e. sysfs content changed from 1 to 0 to simulate plug out - if self.retint is not None: - return str(self.retint) - # return default values (can be changed per test) - else: - return mock_file_content[self.name] - - def readline(self): - # if trying to read sai profile file, according to fd fileno - if self.fileno() in [SAI_PROFILE_FD_FILENO]: - if self.indep_mode_supported: - return "SAI_INDEPENDENT_MODULE_MODE=1" - else: - return "" - else: - return mock_file_content[self.name] - - def fileno(self): - return self.file_no - - def seek(self, seek_val): - self.curr = seek_val - - def close(self): - pass - - def write(self, write_val): - self.set_file_int_content(write_val) - - def set_file_int_content(self, retint): - self.retint = str(retint) - mock_file_content[self.name] = str(retint) - - def __enter__(self): - return self - - def __exit__(self, filename, *args, **kwargs): - pass - -class MockPollerStopEvent: - - def __init__(self, modules_mgmt_task_stopping_event, modules_mgmt_thrd=None, num_of_ports=DEFAULT_NUM_OF_PORTS_3 - , feature_enabled=True, ports_connected=True, fw_controlled_ports=False, sleep_timeout=False): - self.fds_dict = {} - self.modules_mgmt_task_stopping_event = modules_mgmt_task_stopping_event - self.modules_mgmt_thrd = modules_mgmt_thrd - self.num_of_ports = num_of_ports - self.feature_enabled = feature_enabled - self.ports_connected = ports_connected - self.sleep_timeout = sleep_timeout - self.fw_controlled_ports = fw_controlled_ports - - def register(self, fd, attrs): - self.fds_dict[fd.fileno()] = 1 & attrs - assert fd.fileno() in self.fds_dict - - def poll(self, timeout=0): - assert len(self.modules_mgmt_thrd.sfp_port_dict_initial) == self.num_of_ports - assert self.modules_mgmt_thrd.is_supported_indep_mods_system == self.feature_enabled - global POLLER_EXECUTED - if self.num_of_ports > 0: - # when feature is enabled, need to check for each port both power_good and hw_present sysfs for - # cmis non-flat memory cables - ports_to_test = self.num_of_ports if (not self.feature_enabled or not self.ports_connected - or self.fw_controlled_ports) else self.num_of_ports * 2 - for i in range(ports_to_test): - # when feature is enabled, power_good sysfs is also registered for cmis non-flat memory cables - port_to_test = i if (not self.feature_enabled or not self.ports_connected - or self.fw_controlled_ports) else int(i / 2) - assert self.modules_mgmt_thrd.sfp_port_dict_initial[port_to_test].port_num == port_to_test - assert self.modules_mgmt_thrd.sfp_port_dict_initial[port_to_test].initial_state == modules_mgmt.STATE_HW_NOT_PRESENT - module_obj = self.modules_mgmt_thrd.fds_mapping_to_obj[list(self.fds_dict.keys())[i]]['module_obj'] - assert module_obj.port_num == port_to_test - if self.ports_connected: - if self.feature_enabled: - if self.fw_controlled_ports: - assert module_obj.final_state == modules_mgmt.STATE_FW_CONTROL - else: - assert module_obj.final_state == modules_mgmt.STATE_SW_CONTROL - else: - assert module_obj.final_state == modules_mgmt.STATE_HW_PRESENT - else: - assert module_obj.final_state == modules_mgmt.STATE_HW_NOT_PRESENT - POLLER_EXECUTED = True - else: - POLLER_EXECUTED = True - self.modules_mgmt_task_stopping_event.set() - if self.sleep_timeout: - time.sleep(timeout/1000) - return [] - - -def _mock_is_file_indep_mode_disabled_content(): - return { - modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE: True, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD: True, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON: True, - modules_mgmt.SYSFS_INDEPENDENT_FD_HW_RESET: True, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_LIMIT: True, - modules_mgmt.SYSFS_INDEPENDENT_FD_FW_CONTROL: True, - modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("0"): True, - modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("1"): True, - modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("2"): True, - '//usr/share/sonic/platform/ACS-MSN4700/sai.profile' : True - } - -mock_is_file_indep_mode_disabled_content = _mock_is_file_indep_mode_disabled_content() - -def mock_is_file_indep_mode_disabled(file_path, **kwargs): - return mock_is_file_indep_mode_disabled_content[file_path] - -def _mock_is_file_indep_mode_enabled_content(): - return { - modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE: True, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD: True, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON: True, - modules_mgmt.SYSFS_INDEPENDENT_FD_HW_RESET: True, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_LIMIT: True, - modules_mgmt.SYSFS_INDEPENDENT_FD_FW_CONTROL: True, - modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("0"): True, - modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format("0"): True, - modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format("1"): True, - modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format("2"): True, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format("0"): True, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format("1"): True, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format("2"): True, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format("0"): True, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format("1"): True, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format("2"): True, - '//usr/share/sonic/platform/ACS-MSN4700/sai.profile' : True - } - -mock_is_file_indep_mode_enabled_content = _mock_is_file_indep_mode_enabled_content() - - -def mock_is_file_indep_mode_enabled(file_path, **kwargs): - return mock_is_file_indep_mode_enabled_content[file_path] - - -def mock_read_int_from_file(filename, *args): - return_dict = { - modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("0") : 1, - modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("1") : 1, - modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("2") : 1 - } - - return return_dict[filename] - - -class MockXcvrEeprom(): - def __init__(self, is_flat_memory, mem_map): - self.is_flat_memory = is_flat_memory - self.mem_map = mem_map - - def is_cmis_api(self): - return self.is_cmis_api - - def is_flat_memory(self): - return self.is_flat_memory - - def read(self, field): - if consts.FLAT_MEM_FIELD == field: - return 0 if self.is_flat_memory else 1 - else: - return 0 - - -class MockXcvrapi: - def __init__(self, is_cmis_api=True, is_flat_memory_bool=False): - self.is_cmis_api = is_cmis_api - self.is_flat_memory_bool = is_flat_memory_bool - self.xcvr_eeprom = MagicMock(autospec=XcvrEeprom, return_value=MockXcvrEeprom(is_flat_memory_bool, CmisMemMap(CmisCodes))) - - def is_flat_memory(self): - return self.is_flat_memory_bool - - def xcvr_eeprom(self): - return self.xcvr_eeprom - - -class MockSFPxcvrapi: - def __init__(self, xcvr_api_is_cmis_api=True, xcvr_eeprom_is_flat_memory=False): - self.xcvr_api = Mock(spec=CmisApi(MockXcvrEeprom(False, CmisMemMap(CmisCodes))), return_value=MockXcvrapi(xcvr_api_is_cmis_api, xcvr_eeprom_is_flat_memory)) - self.xcvr_api_is_cmis_api = xcvr_api_is_cmis_api - self.xcvr_eeprom_is_flat_memory = xcvr_eeprom_is_flat_memory - self.xcvr_api.is_flat_memory = types.MethodType(self.is_flat_memory, self) - - def get_xcvr_api(self): - return self.xcvr_api - - def is_flat_memory(self, ref): - return self.xcvr_eeprom_is_flat_memory - - -def check_power_cap(port, module_sm_obj): - pass - -SAI_PROFILE_FD_FILENO = 99 - - -class TestModulesMgmt(unittest.TestCase): - """Test class to test modules_mgmt.py. The test cases covers: - 1. cables detection for 1 to 3 ports - feature disabled / enabled / poller - 2. cable disconnection - plug out - 3. cable reconnection - plug in - 4. warm reboot normal flow with FW ports - 5. warm reboot flow with FW ports plugged out - 6. warm reboot flow with FW ports plugged out and then plugged in (stays FW controlled, no SFP mock change) - 7. test 32 FW controlled (non cmis flat mem) cables powered off - 8. test 32 SW controlled (cmis active non flat mem) cables powered off - """ - - def _mock_sysfs_file_content(self): - return { - modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE : "1", - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD : "1", - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON : "0", - modules_mgmt.SYSFS_INDEPENDENT_FD_HW_RESET : "", - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_LIMIT : "48", - modules_mgmt.SYSFS_INDEPENDENT_FD_FW_CONTROL : "1", - modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("0") : "1", - modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("1") : "1", - modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("2") : "1", - modules_mgmt.SYSFS_INDEPENDENT_FD_FREQ_SUPPORT.format("0"): "0" - } - - def mock_open_builtin(self, file_name, feature_enabled=True): - return_dict = { - (modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("0"), 'r') : MockOpen(modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("0"), 100), - (modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("1"), 'r') : MockOpen(modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("1"), 101), - (modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("2"), 'r') : MockOpen(modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("2"), 102), - '//usr/share/sonic/platform/ACS-MSN4700/sai.profile' : MockOpen('//usr/share/sonic/platform/ACS-MSN4700/sai.profile' - , SAI_PROFILE_FD_FILENO, feature_enabled), - modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("0") : MockOpen(modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("0"), 100), - modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("1") : MockOpen(modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("1"), 101), - modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("2") : MockOpen(modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("2"), 102), - modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format("0"): MockOpen(modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format("0"), 0), - modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format("1"): MockOpen(modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format("1"), 1), - modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format("2"): MockOpen(modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format("2"), 2), - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format("0"): MockOpen(modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format("0"), 200), - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format("1"): MockOpen(modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format("1"), 201), - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format("2"): MockOpen(modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format("2"), 202), - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format("0"): MockOpen(modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format("0"), 300), - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format("1"): MockOpen(modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format("1"), 301), - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format("2"): MockOpen(modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format("2"), 302), - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_LIMIT.format("0"): MockOpen(modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_LIMIT.format("0"), 500), - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_LIMIT.format("1"): MockOpen(modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_LIMIT.format("1"), 501), - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_LIMIT.format("2"): MockOpen(modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_LIMIT.format("2"), 502), - modules_mgmt.SYSFS_INDEPENDENT_FD_FREQ_SUPPORT.format("0"): MockOpen(modules_mgmt.SYSFS_INDEPENDENT_FD_FREQ_SUPPORT.format("0"), 602), - modules_mgmt.SYSFS_INDEPENDENT_FD_FREQ_SUPPORT.format("1"): MockOpen(modules_mgmt.SYSFS_INDEPENDENT_FD_FREQ_SUPPORT.format("1"), 602), - modules_mgmt.SYSFS_INDEPENDENT_FD_FREQ_SUPPORT.format("2"): MockOpen(modules_mgmt.SYSFS_INDEPENDENT_FD_FREQ_SUPPORT.format("2"), 602), - modules_mgmt.PROC_CMDLINE: MockOpen(modules_mgmt.PROC_CMDLINE, self.fd_number_by_fd_name_dict[modules_mgmt.PROC_CMDLINE]) - } - return return_dict[file_name] - - # side effects are used in mock when want to create different mocks per variable, i.e. here it's filename - # see below mock_open_new_side_effect_poller_test where returning a new MockOpen passing it the filename - def mock_open_new_side_effect_feature_disabled(self, filename, *args, **kwargs): - mock_context = MagicMock() - mock_context.__enter__.return_value = self.mock_open_builtin(filename, False) - mock_context.__exit__.return_value = False - return mock_context - - def mock_open_new_side_effect_feature_enabled(self, filename, *args, **kwargs): - mock_context = MagicMock() - mock_context.__enter__.return_value = self.mock_open_builtin(filename) - mock_context.__exit__.return_value = False - return mock_context - - def mock_open_new_side_effect_poller_test(self, filename, *args, **kwargs): - if filename in ['//usr/share/sonic/platform/ACS-MSN4700/sai.profile']: - mock_context = MagicMock() - mock_context.__enter__.return_value = MockOpen(filename, SAI_PROFILE_FD_FILENO) - mock_context.__exit__.return_value = False - return mock_context - else: - mock_context = MagicMock() - mock_open_new = MockOpen(filename, self.fd_number_by_fd_name_dict[filename]) - mock_context.return_value = mock_open_new - mock_context.__enter__.return_value = mock_open_new - mock_context.__exit__.return_value = False - if 'hw_present' in filename or 'power_on' in filename or 'freq' in filename or 'control' in filename: - return mock_context - else: - return mock_context.return_value - - def mock_open_new_side_effect_warm_reboot(self, filename, *args, **kwargs): - if filename in ['//usr/share/sonic/platform/ACS-MSN4700/sai.profile']: - mock_context = MagicMock() - mock_context.__enter__.return_value = MockOpen(filename, SAI_PROFILE_FD_FILENO) - mock_context.__exit__.return_value = False - return mock_context - else: - mock_open_new = MockOpen(filename, self.fd_number_by_fd_name_dict[filename]) - return mock_open_new - - def setUp(cls): - cls.modules_mgmt_task_stopping_event = threading.Event() - cls.modules_changes_queue = queue.Queue() - global POLLER_EXECUTED - POLLER_EXECUTED = False - # start modules_mgmt thread and the test in poller part - cls.modules_mgmt_thrd = ModulesMgmtTask(main_thread_stop_event=cls.modules_mgmt_task_stopping_event, - q=cls.modules_changes_queue) - cls.modules_mgmt_thrd.check_power_cap = check_power_cap - assert cls.modules_mgmt_thrd.sfp_port_dict_initial == {} - - @classmethod - def setup_class(cls): - os.environ["MLNX_PLATFORM_API_UNIT_TESTING"] = "1" - cls.fd_number_by_fd_name_dict = { - modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("0") : 100, - modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("1") : 101, - modules_mgmt.SYSFS_LEGACY_FD_PRESENCE.format("2") : 102, - '//usr/share/sonic/platform/ACS-MSN4700/sai.profile' : SAI_PROFILE_FD_FILENO, - modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format("0") : 0, - modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format("1") : 1, - modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format("2") : 2, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format("0") : 200, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format("1") : 201, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format("2") : 202, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format("0") : 300, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format("1") : 301, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format("2") : 302, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_LIMIT.format("0") : 500, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_LIMIT.format("1") : 501, - modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_LIMIT.format("2") : 502, - modules_mgmt.SYSFS_INDEPENDENT_FD_FREQ_SUPPORT.format("0") : 600, - modules_mgmt.SYSFS_INDEPENDENT_FD_FREQ_SUPPORT.format("1") : 601, - modules_mgmt.SYSFS_INDEPENDENT_FD_FREQ_SUPPORT.format("2") : 602, - modules_mgmt.SYSFS_INDEPENDENT_FD_FW_CONTROL.format("0") : 700, - modules_mgmt.SYSFS_INDEPENDENT_FD_FW_CONTROL.format("1") : 701, - modules_mgmt.SYSFS_INDEPENDENT_FD_FW_CONTROL.format("2") : 702, - modules_mgmt.PROC_CMDLINE : 800 - } - # mock the directory holding relevant sai.profile - device_info.get_paths_to_platform_and_hwsku_dirs = mock.MagicMock(return_value=('', '/usr/share/sonic/platform/ACS-MSN4700')) - - - @patch('sonic_platform.device_data.DeviceDataManager.get_sfp_count', MagicMock(return_value=DEFAULT_NUM_OF_PORTS_3)) - @patch('os.path.isfile', MagicMock(side_effect=mock_is_file_indep_mode_disabled)) - @patch('sonic_platform.utils.read_int_from_file', MagicMock(side_effect=mock_read_int_from_file)) - @patch('builtins.open', spec=open) - def test_mdf_all_ports_feature_disabled(self, mock_open): - mock_open.side_effect = self.mock_open_new_side_effect_feature_disabled - num_of_tested_ports = DeviceDataManager.get_sfp_count() - assert num_of_tested_ports == DEFAULT_NUM_OF_PORTS_3 - - # start modules_mgmt thread and the test in poller part - with patch('select.poll', MagicMock(return_value=MockPollerStopEvent(self.modules_mgmt_task_stopping_event - , self.modules_mgmt_thrd, feature_enabled=False))): - self.modules_mgmt_thrd.run() - - @patch('sonic_platform.device_data.DeviceDataManager.get_sfp_count', MagicMock(return_value=DEFAULT_NUM_OF_PORTS_3)) - @patch('os.path.isfile', MagicMock(side_effect=mock_is_file_indep_mode_enabled)) - @patch('builtins.open', spec=open) - @patch('sonic_platform.sfp.SFP', MagicMock(return_value=MockSFPxcvrapi())) - def test_mdf_all_ports_feature_enabled(self, mock_open): - mock_open.side_effect = self.mock_open_new_side_effect_feature_enabled - num_of_tested_ports = DeviceDataManager.get_sfp_count() - assert num_of_tested_ports == DEFAULT_NUM_OF_PORTS_3 - - # start modules_mgmt thread and the test in poller part - with patch('select.poll', MagicMock(return_value=MockPollerStopEvent(self.modules_mgmt_task_stopping_event - , self.modules_mgmt_thrd))): - self.modules_mgmt_thrd.run() - - @patch('os.path.isfile', MagicMock(side_effect=mock_is_file_indep_mode_enabled)) - @patch('builtins.open', spec=open) - @patch('sonic_platform.sfp.SFP', MagicMock(return_value=MockSFPxcvrapi())) - def test_modules_mgmt_poller_events_3_ports(self, mock_open): - mock_open.side_effect = self.mock_open_new_side_effect_poller_test - DeviceDataManager.get_sfp_count = mock.MagicMock(return_value=DEFAULT_NUM_OF_PORTS_3) - num_of_tested_ports = DeviceDataManager.get_sfp_count() - assert num_of_tested_ports == DEFAULT_NUM_OF_PORTS_3 - - # start modules_mgmt thread and the test in poller part - with patch('select.poll', MagicMock(return_value=MockPollerStopEvent(self.modules_mgmt_task_stopping_event - , self.modules_mgmt_thrd))): - self.modules_mgmt_thrd.run() - - @patch('os.path.isfile', MagicMock(side_effect=mock_is_file_indep_mode_enabled)) - @patch('builtins.open', spec=open) - @patch('sonic_platform.sfp.SFP', MagicMock(return_value=MockSFPxcvrapi())) - def test_modules_mgmt_poller_events_single_port(self, mock_open): - mock_open.side_effect = self.mock_open_new_side_effect_poller_test - DeviceDataManager.get_sfp_count = mock.MagicMock(return_value=DEFAULT_NUM_OF_PORTS_1) - num_of_tested_ports = DeviceDataManager.get_sfp_count() - assert num_of_tested_ports == DEFAULT_NUM_OF_PORTS_1 - - # start modules_mgmt thread and the test in poller part - with patch('select.poll', MagicMock(return_value=MockPollerStopEvent(self.modules_mgmt_task_stopping_event - , self.modules_mgmt_thrd, num_of_tested_ports))): - #with patch('builtins.open', MagicMock(side_effect=self.mock_open_new_side_effect_poller_test)): - self.modules_mgmt_thrd.run() - - @patch('os.path.isfile', MagicMock(side_effect=mock_is_file_indep_mode_enabled)) - @patch('builtins.open', spec=open) - @patch('sonic_platform.sfp.SFP', MagicMock(return_value=MockSFPxcvrapi(False, True))) - def test_modules_mgmt_normal_warm_reboot(self, mock_open): - mock_open.side_effect = self.mock_open_new_side_effect_warm_reboot - # mock /proc/cmdline with warm reboot boot type key value - mock_file_content[modules_mgmt.PROC_CMDLINE] = f'{modules_mgmt.CMDLINE_STR_TO_LOOK_FOR}{modules_mgmt.CMDLINE_VAL_TO_LOOK_FOR}' - DeviceDataManager.get_sfp_count = mock.MagicMock(return_value=DEFAULT_NUM_OF_PORTS_1) - num_of_tested_ports = DeviceDataManager.get_sfp_count() - assert num_of_tested_ports == DEFAULT_NUM_OF_PORTS_1 - # set the port to start with FW controlled before warm reboot takes place - mock_file_content[modules_mgmt.SYSFS_INDEPENDENT_FD_FW_CONTROL.format("0")] = "0" - - # start modules_mgmt thread and the test in poller part - with patch('select.poll', MagicMock(return_value=MockPoller(self.modules_mgmt_task_stopping_event - , self.modules_mgmt_thrd, num_of_tested_ports, warm_reboot=True))): - self.modules_mgmt_thrd.run() - - @patch('os.path.isfile', MagicMock(side_effect=mock_is_file_indep_mode_enabled)) - @patch('builtins.open', spec=open) - @patch('sonic_platform.sfp.SFP', MagicMock(return_value=MockSFPxcvrapi(False, True))) - def test_modules_mgmt_plug_out_fw_cable_after_warm_reboot(self, mock_open): - mock_open.side_effect = self.mock_open_new_side_effect_warm_reboot - # mock /proc/cmdline with warm reboot boot type key value - mock_file_content[modules_mgmt.PROC_CMDLINE] = f'{modules_mgmt.CMDLINE_STR_TO_LOOK_FOR}{modules_mgmt.CMDLINE_VAL_TO_LOOK_FOR}' - DeviceDataManager.get_sfp_count = mock.MagicMock(return_value=DEFAULT_NUM_OF_PORTS_1) - num_of_tested_ports = DeviceDataManager.get_sfp_count() - assert num_of_tested_ports == DEFAULT_NUM_OF_PORTS_1 - - # set the port to start with FW controlled before warm reboot takes place - mock_file_content[modules_mgmt.SYSFS_INDEPENDENT_FD_FW_CONTROL.format("0")] = "0" - - # start modules_mgmt thread and the test in poller part - with patch('select.poll', MagicMock(return_value=MockPoller(self.modules_mgmt_task_stopping_event - , self.modules_mgmt_thrd, num_of_tested_ports, port_plug_out=True, warm_reboot=True))): - self.modules_mgmt_thrd.run() - - @patch('os.path.isfile', MagicMock(side_effect=mock_is_file_indep_mode_enabled)) - @patch('builtins.open', spec=open) - @patch('sonic_platform.sfp.SFP', MagicMock(return_value=MockSFPxcvrapi(False, True))) - def test_modules_mgmt_plug_out_plug_in_fw_cable_after_warm_reboot(self, mock_open): - mock_open.side_effect = self.mock_open_new_side_effect_warm_reboot - # mock /proc/cmdline with warm reboot boot type key value - mock_file_content[modules_mgmt.PROC_CMDLINE] = f'{modules_mgmt.CMDLINE_STR_TO_LOOK_FOR}{modules_mgmt.CMDLINE_VAL_TO_LOOK_FOR}' - DeviceDataManager.get_sfp_count = mock.MagicMock(return_value=DEFAULT_NUM_OF_PORTS_1) - num_of_tested_ports = DeviceDataManager.get_sfp_count() - assert num_of_tested_ports == DEFAULT_NUM_OF_PORTS_1 - - mock_file_content[modules_mgmt.SYSFS_INDEPENDENT_FD_FW_CONTROL.format("0")] = "0" - - # start modules_mgmt thread and the test in poller part - with patch('select.poll', MagicMock(return_value=MockPoller(self.modules_mgmt_task_stopping_event - , self.modules_mgmt_thrd, num_of_tested_ports, port_plug_out=True, warm_reboot=True, port_plug_in=True))): - self.modules_mgmt_thrd.run() - - @patch('os.path.isfile', MagicMock(side_effect=mock_is_file_indep_mode_enabled)) - @patch('builtins.open', spec=open) - @patch('sonic_platform.sfp.SFP', MagicMock(return_value=MockSFPxcvrapi(False, True))) - def test_modules_mgmt_no_ports(self, mock_open): - mock_open.side_effect = self.mock_open_new_side_effect_poller_test - DeviceDataManager.get_sfp_count = mock.MagicMock(return_value=0) - num_of_tested_ports = DeviceDataManager.get_sfp_count() - assert num_of_tested_ports == 0 - - # start modules_mgmt thread and the test in poller part - with patch('select.poll', MagicMock(return_value=MockPollerStopEvent(self.modules_mgmt_task_stopping_event - , self.modules_mgmt_thrd, num_of_tested_ports))): - self.modules_mgmt_thrd.run() - - @patch('os.path.isfile', MagicMock(side_effect=mock_is_file_indep_mode_enabled)) - @patch('builtins.open', spec=open) - @patch('sonic_platform.sfp.SFP', MagicMock(return_value=MockSFPxcvrapi(False, True))) - def test_modules_mgmt_ports_disconnected(self, mock_open): - mock_open.side_effect = self.mock_open_new_side_effect_poller_test - DeviceDataManager.get_sfp_count = mock.MagicMock(return_value=DEFAULT_NUM_OF_PORTS_3) - num_of_tested_ports = DeviceDataManager.get_sfp_count() - assert num_of_tested_ports == DEFAULT_NUM_OF_PORTS_3 - - # update hw_present sysfs with value of 0 for each port - for i in range(num_of_tested_ports): - modules_sysfs = modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format(f"{i}") - mock_file_content[modules_sysfs] = "0" - - # start modules_mgmt thread and the test in poller part - with patch('select.poll', MagicMock(return_value=MockPollerStopEvent(self.modules_mgmt_task_stopping_event - , self.modules_mgmt_thrd, num_of_tested_ports, ports_connected=False))): - self.modules_mgmt_thrd.run() - - @patch('os.path.isfile', MagicMock(side_effect=mock_is_file_indep_mode_enabled)) - @patch('builtins.open', spec=open) - @patch('sonic_platform.sfp.SFP', MagicMock(return_value=MockSFPxcvrapi(False, True))) - def test_modules_mgmt_bad_flows_port_disconnected(self, mock_open): - mock_open.side_effect = self.mock_open_new_side_effect_poller_test - DeviceDataManager.get_sfp_count = mock.MagicMock(return_value=DEFAULT_NUM_OF_PORTS_1) - num_of_tested_ports = DeviceDataManager.get_sfp_count() - assert num_of_tested_ports == DEFAULT_NUM_OF_PORTS_1 - - # update hw_present sysfs with value of 0 for each port - for i in range(num_of_tested_ports): - modules_sysfs = modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format(f"{i}") - mock_file_content[modules_sysfs] = "0" - - # start modules_mgmt thread and the test in poller part - with patch('select.poll', MagicMock(return_value=MockPollerStopEvent(self.modules_mgmt_task_stopping_event - , self.modules_mgmt_thrd, num_of_tested_ports, ports_connected=False))): - self.modules_mgmt_thrd.run() - - @patch('os.path.isfile', MagicMock(side_effect=mock_is_file_indep_mode_enabled)) - @patch('builtins.open', spec=open) - @patch('sonic_platform.sfp.SFP', MagicMock(return_value=MockSFPxcvrapi(False, True))) - def test_modules_mgmt_bad_flows_power_good(self, mock_open): - mock_open.side_effect = self.mock_open_new_side_effect_poller_test - DeviceDataManager.get_sfp_count = mock.MagicMock(return_value=DEFAULT_NUM_OF_PORTS_1) - num_of_tested_ports = DeviceDataManager.get_sfp_count() - assert num_of_tested_ports == DEFAULT_NUM_OF_PORTS_1 - - # update power_good sysfs with value of 0 for each port - for i in range(num_of_tested_ports): - modules_sysfs = modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format(f"{i}") - mock_file_content[modules_sysfs] = "0" - - # start modules_mgmt thread and the test in poller part - with patch('select.poll', MagicMock(return_value=MockPollerStopEvent(self.modules_mgmt_task_stopping_event - , self.modules_mgmt_thrd, num_of_tested_ports, ports_connected=False))): - self.modules_mgmt_thrd.run() - for i in range(num_of_tested_ports): - modules_sysfs = modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format(f"{i}") - mock_file_content[modules_sysfs] = "1" - - @patch('os.path.isfile', MagicMock(side_effect=mock_is_file_indep_mode_enabled)) - @patch('builtins.open', spec=open) - @patch('sonic_platform.sfp.SFP', MagicMock(return_value=MockSFPxcvrapi(False, True))) - def test_modules_mgmt_bad_flows_ports_powered_off_fw_controlled(self, mock_open): - mock_open.side_effect = self.mock_open_new_side_effect_poller_test - DeviceDataManager.get_sfp_count = mock.MagicMock(return_value=DEFAULT_NUM_OF_PORTS_32) - num_of_tested_ports = DeviceDataManager.get_sfp_count() - assert num_of_tested_ports == DEFAULT_NUM_OF_PORTS_32 - - # create or update different sysfs and is_file mocking with relevant value for each port - for i in range(num_of_tested_ports): - # mock power_on sysfs for all ports - modules_sysfs = modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format(f"{i}") - mock_file_content[modules_sysfs] = "0" - mock_is_file_indep_mode_enabled_content[modules_sysfs] = True - self.fd_number_by_fd_name_dict[modules_sysfs] = 300 + i - # mock hw_presence sysfs for all ports - modules_sysfs = modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format(f'{i}') - mock_file_content[modules_sysfs] = "1" - mock_is_file_indep_mode_enabled_content[modules_sysfs] = True - self.fd_number_by_fd_name_dict[modules_sysfs] = i - # mock power_good sysfs for all ports - modules_sysfs = modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format(f'{i}') - mock_file_content[modules_sysfs] = "1" - mock_is_file_indep_mode_enabled_content[modules_sysfs] = True - self.fd_number_by_fd_name_dict[modules_sysfs] = 200 + i - # mock hw_reset sysfs for all ports - modules_sysfs = modules_mgmt.SYSFS_INDEPENDENT_FD_HW_RESET.format(f'{i}') - mock_is_file_indep_mode_enabled_content[modules_sysfs] = True - self.fd_number_by_fd_name_dict[modules_sysfs] = 400 + i - - # start modules_mgmt thread and the test in poller part - with patch('select.poll', MagicMock(return_value=MockPollerStopEvent(self.modules_mgmt_task_stopping_event - , self.modules_mgmt_thrd, num_of_tested_ports, fw_controlled_ports=True))): - self.modules_mgmt_thrd.run() - - # change power_on sysfs values back to the default ones - for i in range(num_of_tested_ports): - modules_sysfs = modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format(f"{i}") - mock_file_content[modules_sysfs] = "1" - - @patch('os.path.isfile', MagicMock(side_effect=mock_is_file_indep_mode_enabled)) - @patch('builtins.open', spec=open) - @patch('sonic_platform.sfp.SFP', MagicMock(return_value=MockSFPxcvrapi())) - def test_modules_mgmt_bad_flows_ports_powered_off_sw_controlled(self, mock_open): - mock_open.side_effect = self.mock_open_new_side_effect_poller_test - DeviceDataManager.get_sfp_count = mock.MagicMock(return_value=DEFAULT_NUM_OF_PORTS_32) - num_of_tested_ports = DeviceDataManager.get_sfp_count() - assert num_of_tested_ports == DEFAULT_NUM_OF_PORTS_32 - - # create or update different sysfs and is_file mocking with relevant value for each port - for i in range(num_of_tested_ports): - # mock power_on sysfs for all ports - modules_sysfs = modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format(f"{i}") - mock_file_content[modules_sysfs] = "0" - mock_is_file_indep_mode_enabled_content[modules_sysfs] = True - self.fd_number_by_fd_name_dict[modules_sysfs] = 300 + i - # mock hw_presence sysfs for all ports - modules_sysfs = modules_mgmt.SYSFS_INDEPENDENT_FD_PRESENCE.format(f'{i}') - mock_file_content[modules_sysfs] = "1" - mock_is_file_indep_mode_enabled_content[modules_sysfs] = True - self.fd_number_by_fd_name_dict[modules_sysfs] = i - # mock power_good sysfs for all ports - modules_sysfs = modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_GOOD.format(f'{i}') - mock_file_content[modules_sysfs] = "1" - mock_is_file_indep_mode_enabled_content[modules_sysfs] = True - self.fd_number_by_fd_name_dict[modules_sysfs] = 200 + i - # mock hw_reset sysfs for all ports - modules_sysfs = modules_mgmt.SYSFS_INDEPENDENT_FD_HW_RESET.format(f'{i}') - mock_is_file_indep_mode_enabled_content[modules_sysfs] = True - self.fd_number_by_fd_name_dict[modules_sysfs] = 400 + i - # mock frequency_support sysfs for all ports - modules_sysfs = modules_mgmt.SYSFS_INDEPENDENT_FD_FREQ_SUPPORT.format(f'{i}') - mock_file_content[modules_sysfs] = "0" - mock_is_file_indep_mode_enabled_content[modules_sysfs] = True - self.fd_number_by_fd_name_dict[modules_sysfs] = 600 + i - - # start modules_mgmt thread and the test in poller part - with patch('select.poll', MagicMock(return_value=MockPollerStopEvent(self.modules_mgmt_task_stopping_event - , self.modules_mgmt_thrd, num_of_tested_ports))): - self.modules_mgmt_thrd.run() - - # change power_on sysfs values back to the default ones - for i in range(num_of_tested_ports): - modules_sysfs = modules_mgmt.SYSFS_INDEPENDENT_FD_POWER_ON.format(f"{i}") - mock_file_content[modules_sysfs] = "1" - - def tearDown(cls): - mock_file_content[modules_mgmt.PROC_CMDLINE] = '' - cls.modules_mgmt_thrd = None - # a check that modules mgmt thread ran and got into the poller part where the tests here has all checks - assert POLLER_EXECUTED diff --git a/platform/mellanox/mlnx-platform-api/tests/test_sfp.py b/platform/mellanox/mlnx-platform-api/tests/test_sfp.py index 499983a01e15..71f81bd243e3 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_sfp.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_sfp.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. +# Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. # Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -61,10 +61,7 @@ def test_sfp_index(self, mock_max_port): @mock.patch('sonic_platform.chassis.Chassis.get_num_sfps', mock.MagicMock(return_value=2)) @mock.patch('sonic_platform.chassis.extract_RJ45_ports_index', mock.MagicMock(return_value=[])) def test_sfp_get_error_status(self, mock_get_error_code, mock_control): - chassis = Chassis() - - # Fetch an SFP module to test - sfp = chassis.get_sfp(1) + sfp = SFP(1) mock_control.return_value = False description_dict = sfp._get_error_description_dict() for error in description_dict.keys(): @@ -230,18 +227,14 @@ def test_get_page_and_page_offset(self, mock_get_type_str, mock_eeprom_path, moc assert page == '/tmp/1/data' assert page_offset is 0 - @mock.patch('sonic_platform.sfp.SFP.is_sw_control') @mock.patch('sonic_platform.sfp.SFP._read_eeprom') - def test_sfp_get_presence(self, mock_read, mock_control): + def test_sfp_get_presence(self, mock_read): sfp = SFP(0) mock_read.return_value = None assert not sfp.get_presence() mock_read.return_value = 0 assert sfp.get_presence() - - mock_control.side_effect = RuntimeError('') - assert not sfp.get_presence() @mock.patch('sonic_platform.utils.read_int_from_file') def test_rj45_get_presence(self, mock_read_int): @@ -343,34 +336,20 @@ def test_get_temperature_threshold(self): assert sfp.get_temperature_warning_threshold() == 75.0 assert sfp.get_temperature_critical_threshold() == 85.0 - @mock.patch('sonic_platform.sfp.NvidiaSFPCommon.get_logical_port_by_sfp_index') @mock.patch('sonic_platform.utils.read_int_from_file') - @mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode') - @mock.patch('sonic_platform.utils.DbUtils.get_db_instance') - def test_is_sw_control(self, mock_get_db, mock_mode, mock_read, mock_get_logical): + @mock.patch('sonic_platform.device_data.DeviceDataManager.is_module_host_management_mode') + def test_is_sw_control(self, mock_mode, mock_read): sfp = SFP(0) mock_mode.return_value = False assert not sfp.is_sw_control() mock_mode.return_value = True - mock_get_logical.return_value = None - with pytest.raises(Exception): - sfp.is_sw_control() - - mock_get_logical.return_value = 'Ethernet0' - mock_db = mock.MagicMock() - mock_get_db.return_value = mock_db - mock_db.exists = mock.MagicMock(return_value=False) - with pytest.raises(Exception): - sfp.is_sw_control() - - mock_db.exists.return_value = True mock_read.return_value = 0 assert not sfp.is_sw_control() mock_read.return_value = 1 assert sfp.is_sw_control() - @mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode', mock.MagicMock(return_value=False)) + @mock.patch('sonic_platform.device_data.DeviceDataManager.is_module_host_management_mode', mock.MagicMock(return_value=False)) @mock.patch('sonic_platform.sfp.SFP.is_sw_control', mock.MagicMock(return_value=False)) @mock.patch('sonic_platform.utils.is_host', mock.MagicMock(side_effect = [True, True, False, False])) @mock.patch('subprocess.check_output', mock.MagicMock(side_effect = ['True', 'False'])) @@ -383,7 +362,7 @@ def test_get_lpmode(self): assert sfp.get_lpmode() assert not sfp.get_lpmode() - @mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode', mock.MagicMock(return_value=False)) + @mock.patch('sonic_platform.device_data.DeviceDataManager.is_module_host_management_mode', mock.MagicMock(return_value=False)) @mock.patch('sonic_platform.sfp.SFP.is_sw_control', mock.MagicMock(return_value=False)) @mock.patch('sonic_platform.utils.is_host', mock.MagicMock(side_effect = [True, True, False, False])) @mock.patch('subprocess.check_output', mock.MagicMock(side_effect = ['True', 'False'])) @@ -396,7 +375,7 @@ def test_set_lpmode(self): assert sfp.set_lpmode(False) assert not sfp.set_lpmode(False) - @mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode', mock.MagicMock(return_value=True)) + @mock.patch('sonic_platform.device_data.DeviceDataManager.is_module_host_management_mode', mock.MagicMock(return_value=True)) @mock.patch('sonic_platform.utils.read_int_from_file') @mock.patch('sonic_platform.sfp.SFP.is_sw_control') def test_get_lpmode_cmis_host_mangagement(self, mock_control, mock_read): @@ -420,7 +399,7 @@ def test_get_lpmode_cmis_host_mangagement(self, mock_control, mock_read): mock_read.return_value = 2 assert not sfp.get_lpmode() - @mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode', mock.MagicMock(return_value=True)) + @mock.patch('sonic_platform.device_data.DeviceDataManager.is_module_host_management_mode', mock.MagicMock(return_value=True)) @mock.patch('sonic_platform.sfp.SFP.is_sw_control') def test_set_lpmode_cmis_host_mangagement(self, mock_control): mock_control.return_value = True @@ -437,3 +416,115 @@ def test_set_lpmode_cmis_host_mangagement(self, mock_control): mock_control.return_value = False assert not sfp.set_lpmode(True) assert not sfp.set_lpmode(False) + + def test_determine_control_type(self): + sfp = SFP(0) + sfp.get_xcvr_api = mock.MagicMock(return_value=None) + assert sfp.determine_control_type() == 0 + + sfp.get_xcvr_api.return_value = 1 # Just make it not None + sfp.is_supported_for_software_control = mock.MagicMock(return_value=True) + assert sfp.determine_control_type() == 1 + + sfp.is_supported_for_software_control.return_value = False + assert sfp.determine_control_type() == 0 + + def test_check_power_capability(self): + sfp = SFP(0) + sfp.get_module_max_power = mock.MagicMock(return_value=-1) + assert not sfp.check_power_capability() + + sfp.get_module_max_power.return_value = 48 + sfp.get_power_limit = mock.MagicMock(return_value=48) + assert sfp.check_power_capability() + + sfp.get_power_limit.return_value = 1 + assert not sfp.check_power_capability() + + def test_get_module_max_power(self): + sfp = SFP(0) + sfp.is_cmis_api = mock.MagicMock(return_value=True) + sfp.read_eeprom = mock.MagicMock(return_value=bytearray([48])) + assert sfp.get_module_max_power() == 48 + + sfp.is_cmis_api.return_value = False + sfp.is_sff_api = mock.MagicMock(return_value=True) + sfp.read_eeprom.return_value = bytearray([128]) + assert sfp.get_module_max_power() == 2.5 * 4 + + sfp.read_eeprom.return_value = bytearray([32]) + assert sfp.get_module_max_power() == 3.2 * 4 + + # Simulate invalid value + sfp.read_eeprom.return_value = bytearray([33]) + assert sfp.get_module_max_power() == -1 + + # Simulate unsupported module type + sfp.is_sff_api .return_value = False + assert sfp.get_module_max_power() == -1 + + def test_update_i2c_frequency(self): + sfp = SFP(0) + sfp.get_frequency_support = mock.MagicMock(return_value=False) + sfp.set_frequency = mock.MagicMock() + sfp.update_i2c_frequency() + sfp.set_frequency.assert_not_called() + + sfp.get_frequency_support.return_value = True + sfp.update_i2c_frequency() + sfp.set_frequency.assert_not_called() + + sfp.is_cmis_api = mock.MagicMock(return_value=True) + sfp.read_eeprom = mock.MagicMock(return_value=bytearray([0])) + sfp.update_i2c_frequency() + sfp.set_frequency.assert_called_with(0) + + sfp.is_cmis_api.return_value = False + sfp.is_sff_api = mock.MagicMock(return_value=True) + sfp.update_i2c_frequency() + sfp.set_frequency.assert_called_with(0) + + def test_disable_tx_for_sff_optics(self): + sfp = SFP(0) + mock_api = mock.MagicMock() + sfp.get_xcvr_api = mock.MagicMock(return_value=mock_api) + mock_api.tx_disable = mock.MagicMock() + sfp.disable_tx_for_sff_optics() + mock_api.tx_disable.assert_not_called() + + sfp.is_sff_api = mock.MagicMock(return_value=True) + mock_api.get_tx_disable_support = mock.MagicMock(return_value=True) + sfp.disable_tx_for_sff_optics() + mock_api.tx_disable.assert_called_with(True) + + @mock.patch('sonic_platform.utils.read_int_from_file') + def test_get_error_info_from_sdk_error_type(self, mock_read): + sfp = SFP(0) + # Unknown error + mock_read.return_value = -1 + sfp_state, error_desc = sfp.get_error_info_from_sdk_error_type() + assert sfp_state == '2' + assert 'Unknown error' in error_desc + + mock_read.return_value = 2 + sfp_state, error_desc = sfp.get_error_info_from_sdk_error_type() + assert sfp_state == '11' + assert error_desc is None + + @mock.patch('sonic_platform.chassis.extract_RJ45_ports_index', mock.MagicMock(return_value=[])) + @mock.patch('sonic_platform.device_data.DeviceDataManager.get_sfp_count', mock.MagicMock(return_value=1)) + def test_initialize_sfp_modules(self): + c = Chassis() + c.initialize_sfp() + s = c._sfp_list[0] + s.get_hw_present = mock.MagicMock(return_value=True) + s.get_power_on = mock.MagicMock(return_value=False) + s.get_reset_state = mock.MagicMock(return_value=True) + s.get_power_good = mock.MagicMock(return_value=True) + s.determine_control_type = mock.MagicMock(return_value=1) # software control + s.set_control_type = mock.MagicMock() + SFP.initialize_sfp_modules(c._sfp_list) + assert s.in_stable_state() + SFP.wait_ready_task.stop() + SFP.wait_ready_task.join() + SFP.wait_ready_task = None diff --git a/platform/mellanox/mlnx-platform-api/tests/test_sfp_event.py b/platform/mellanox/mlnx-platform-api/tests/test_sfp_event.py deleted file mode 100644 index ef4820ecfd8f..000000000000 --- a/platform/mellanox/mlnx-platform-api/tests/test_sfp_event.py +++ /dev/null @@ -1,61 +0,0 @@ -# -# Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. -# Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os -import sys - -from mock import MagicMock, patch - -test_path = os.path.dirname(os.path.abspath(__file__)) -modules_path = os.path.dirname(test_path) -sys.path.insert(0, modules_path) - -from sonic_platform_base.sfp_base import SfpBase - -class TestSfpEvent(object): - @classmethod - def setup_class(cls): - os.environ["MLNX_PLATFORM_API_UNIT_TESTING"] = "1" - - @patch('select.select', MagicMock(return_value=([99], None, None))) - def test_check_sfp_status(self): - from sonic_platform.sfp_event import SDK_SFP_STATE_IN, SDK_SFP_STATE_OUT, SDK_SFP_STATE_ERR - from sonic_platform.sfp_event import SDK_ERRORS_TO_ERROR_BITS, SDK_ERRORS_TO_DESCRIPTION, SDK_SFP_BLOCKING_ERRORS - - self.executor(SDK_SFP_STATE_IN, None, SfpBase.SFP_STATUS_BIT_INSERTED) - self.executor(SDK_SFP_STATE_OUT, None, SfpBase.SFP_STATUS_BIT_REMOVED) - for error_type, error_status in SDK_ERRORS_TO_ERROR_BITS.items(): - description = SDK_ERRORS_TO_DESCRIPTION.get(error_type) - if error_type in SDK_SFP_BLOCKING_ERRORS: - error_status |= SfpBase.SFP_ERROR_BIT_BLOCKING - error_status |= SfpBase.SFP_STATUS_BIT_INSERTED - self.executor(SDK_SFP_STATE_ERR, error_type, error_status, description) - - def executor(self, mock_module_state, mock_error_type, expect_status, description=None): - from sonic_platform.sfp_event import sfp_event - - event = sfp_event() - event.on_pmpe = MagicMock(return_value=(True, [0,1], mock_module_state, mock_error_type)) - port_change = {} - error_dict = {} - found = event.check_sfp_status(port_change, error_dict, 0) - assert found - expect_status_str = str(expect_status) - assert 1 in port_change and port_change[1] == expect_status_str - assert 2 in port_change and port_change[2] == expect_status_str - if description: - assert 1 in error_dict and error_dict[1] == description - assert 2 in error_dict and error_dict[2] == description diff --git a/platform/mellanox/mlnx-platform-api/tests/test_sfp_sm.py b/platform/mellanox/mlnx-platform-api/tests/test_sfp_sm.py new file mode 100644 index 000000000000..684fa3af11f8 --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/tests/test_sfp_sm.py @@ -0,0 +1,156 @@ +# +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import sys +if sys.version_info.major == 3: + from unittest import mock +else: + import mock + +test_path = os.path.dirname(os.path.abspath(__file__)) +modules_path = os.path.dirname(test_path) +sys.path.insert(0, modules_path) + +from sonic_platform import sfp +from sonic_platform import utils + +origin_read = utils.read_from_file +origin_write = utils.write_file + + +class TestSfpStateMachine: + PATH_PREFIX = '/sys/module/sx_core/asic0/module0' + mock_file_content = {} + + @classmethod + def setup_class(cls): + utils.read_from_file = cls.mock_read + utils.write_file = cls.mock_write + + @classmethod + def teardown_class(cls): + utils.read_from_file = origin_read + utils.write_file = origin_write + + @classmethod + def mock_value(cls, file_name, value): + cls.mock_file_content[f'{cls.PATH_PREFIX}/{file_name}'] = value + + @classmethod + def get_value(cls, file_name): + return cls.mock_file_content[f'{cls.PATH_PREFIX}/{file_name}'] + + @classmethod + def mock_write(cls, file_path, value, *args, **kwargs): + cls.mock_file_content[file_path] = value + + @classmethod + def mock_read(cls, file_path, *args, **kwargs): + return cls.mock_file_content[file_path] + + def test_no_hw_present(self): + self.mock_value('hw_present', 0) + s = sfp.SFP(0) + s.on_event(sfp.EVENT_START) + assert s.get_state() == sfp.STATE_NOT_PRESENT + + def test_not_powered(self): + self.mock_value('hw_present', 1) + self.mock_value('power_on', 0) + s = sfp.SFP(0) + s.on_event(sfp.EVENT_START) + assert s.get_state() == sfp.STATE_RESETTING + assert self.get_value('power_on') == 1 + assert self.get_value('hw_reset') == 1 + assert 0 in sfp.SFP.get_wait_ready_task()._wait_dict + sfp.SFP.get_wait_ready_task()._wait_dict.pop(0) + + def test_in_reset_state(self): + self.mock_value('hw_present', 1) + self.mock_value('power_on', 1) + self.mock_value('hw_reset', 0) + s = sfp.SFP(0) + s.on_event(sfp.EVENT_START) + assert s.get_state() == sfp.STATE_RESETTING + assert self.get_value('hw_reset') == 1 + assert 0 in sfp.SFP.get_wait_ready_task()._wait_dict + s.on_event(sfp.EVENT_NOT_PRESENT) + assert s.get_state() == sfp.STATE_NOT_PRESENT + assert 0 not in sfp.SFP.get_wait_ready_task()._wait_dict + + def test_reset_done(self): + self.mock_value('hw_present', 1) + self.mock_value('power_on', 1) + self.mock_value('hw_reset', 0) + self.mock_value('power_good', 1) + s = sfp.SFP(0) + s.determine_control_type = mock.MagicMock(return_value=sfp.SFP_FW_CONTROL) + s.on_event(sfp.EVENT_START) + assert s.get_state() == sfp.STATE_RESETTING + s.on_event(sfp.EVENT_RESET_DONE) + assert s.get_state() == sfp.STATE_FW_CONTROL + + def test_no_power_good(self): + self.mock_value('hw_present', 1) + self.mock_value('power_on', 1) + self.mock_value('hw_reset', 1) + self.mock_value('power_good', 0) + s = sfp.SFP(0) + s.on_event(sfp.EVENT_START) + assert s.get_state() == sfp.STATE_POWER_BAD + s.on_event(sfp.EVENT_NOT_PRESENT) + assert s.get_state() == sfp.STATE_NOT_PRESENT + + def test_fw_control(self): + self.mock_value('hw_present', 1) + self.mock_value('power_on', 1) + self.mock_value('hw_reset', 1) + self.mock_value('power_good', 1) + s = sfp.SFP(0) + s.determine_control_type = mock.MagicMock(return_value=sfp.SFP_FW_CONTROL) + s.on_event(sfp.EVENT_START) + assert s.get_state() == sfp.STATE_FW_CONTROL + assert self.get_value('control') == sfp.SFP_FW_CONTROL + + def test_power_exceed(self): + self.mock_value('hw_present', 1) + self.mock_value('power_on', 1) + self.mock_value('hw_reset', 1) + self.mock_value('power_good', 1) + s = sfp.SFP(0) + s.determine_control_type = mock.MagicMock(return_value=sfp.SFP_SW_CONTROL) + s.check_power_capability = mock.MagicMock(return_value=False) + s.on_event(sfp.EVENT_START) + assert s.get_state() == sfp.STATE_POWER_LIMIT_ERROR + assert self.get_value('power_on') == 0 + assert self.get_value('hw_reset') == 0 + s.on_event(sfp.EVENT_NOT_PRESENT) + assert s.get_state() == sfp.STATE_NOT_PRESENT + + def test_sw_control(self): + self.mock_value('hw_present', 1) + self.mock_value('power_on', 1) + self.mock_value('hw_reset', 1) + self.mock_value('power_good', 1) + s = sfp.SFP(0) + s.determine_control_type = mock.MagicMock(return_value=sfp.SFP_SW_CONTROL) + s.check_power_capability = mock.MagicMock(return_value=True) + s.update_i2c_frequency = mock.MagicMock() + s.disable_tx_for_sff_optics = mock.MagicMock() + s.on_event(sfp.EVENT_START) + assert s.get_state() == sfp.STATE_SW_CONTROL \ No newline at end of file diff --git a/platform/mellanox/mlnx-platform-api/tests/test_statemachine.py b/platform/mellanox/mlnx-platform-api/tests/test_statemachine.py new file mode 100644 index 000000000000..f2193a6866d4 --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/tests/test_statemachine.py @@ -0,0 +1,137 @@ +# +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import pytest +import sys + +from mock import MagicMock +if sys.version_info.major == 3: + from unittest import mock +else: + import mock + +test_path = os.path.dirname(os.path.abspath(__file__)) +modules_path = os.path.dirname(test_path) +sys.path.insert(0, modules_path) + +from sonic_platform import state_machine + +STATE_DOWN = 'Down' +STATE_INIT = 'Initializing' +STATE_UP = 'Up' + +ACTION_LEAVE_DOWN = 'Leave Down' +ACTION_INIT = 'Initializing' +ACTION_UP = 'Up' + +EVENT_START = 'Start' +EVENT_INIT_DONE = 'Initialize Done' +EVENT_STOP = 'Stop' + +class StateEntity: + def __init__(self): + self.state = STATE_DOWN + self.current_action = None + self.triggered_actions = [] + + def get_state(self): + return self.state + + def change_state(self, new_state): + self.state = new_state + + def on_event(self, event): + pass + + def on_action(self, action_name): + self.current_action = action_name + self.triggered_actions.append(action_name) + + +class TestStateMachine: + sm = None + @classmethod + def setup_class(cls): + sm = state_machine.StateMachine() + sm.add_state(STATE_DOWN).set_leave_action(ACTION_LEAVE_DOWN) \ + .add_transition(EVENT_START, STATE_INIT) + sm.add_state(STATE_INIT).set_entry_action(ACTION_INIT) \ + .add_transition(EVENT_INIT_DONE, STATE_UP) \ + .add_transition(EVENT_STOP, STATE_DOWN) + sm.add_state(STATE_UP).set_entry_action(ACTION_UP) \ + .add_transition(EVENT_STOP, STATE_DOWN) + cls.sm = sm + + def test_state_machine(self): + state_entity = StateEntity() + + # Start + self.sm.on_event(state_entity, EVENT_START) + assert state_entity.triggered_actions == [ACTION_LEAVE_DOWN, ACTION_INIT] + assert state_entity.get_state() == STATE_INIT + + # Initialize done + self.sm.on_event(state_entity, EVENT_INIT_DONE) + assert state_entity.current_action == ACTION_UP + assert state_entity.get_state() == STATE_UP + + # Stop + self.sm.on_event(state_entity, EVENT_STOP) + assert state_entity.get_state() == STATE_DOWN + + # Quick start/stop + self.sm.on_event(state_entity, EVENT_START) + self.sm.on_event(state_entity, EVENT_STOP) + assert state_entity.get_state() == STATE_DOWN + + # Event not defined for this state, state machine should ignore it + self.sm.on_event(state_entity, EVENT_STOP) + assert state_entity.get_state() == STATE_DOWN + + def test_unknown_state(self): + state_entity = StateEntity() + state_entity.state = 'unknown' + with pytest.raises(RuntimeError): + # Trigger unknown event + self.sm.on_event(state_entity, EVENT_START) + + def test_duplicate_state(self): + sm = state_machine.StateMachine() + sm.add_state(STATE_DOWN) + with pytest.raises(RuntimeError): + # Add duplicate state + sm.add_state(STATE_DOWN) + + def test_duplicate_transition(self): + sm = state_machine.StateMachine() + with pytest.raises(RuntimeError): + # Add duplicate transition + sm.add_state(STATE_DOWN) \ + .add_transition(EVENT_START, STATE_INIT) \ + .add_transition(EVENT_START, STATE_INIT) + + def test_unknown_transition_target(self): + sm = state_machine.StateMachine() + # Add unknown transition target + sm.add_state(STATE_DOWN) \ + .add_transition(EVENT_START, 'unknown') + + state_entity = StateEntity() + with pytest.raises(RuntimeError): + sm.on_event(state_entity, EVENT_START) + \ No newline at end of file diff --git a/platform/mellanox/mlnx-platform-api/tests/test_thermal_updater.py b/platform/mellanox/mlnx-platform-api/tests/test_thermal_updater.py index 8e7509ce9b69..c135395c363b 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_thermal_updater.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_thermal_updater.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. # Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -61,10 +61,8 @@ def test_load_tc_config_mocked(self): @mock.patch('sonic_platform.thermal_updater.ThermalUpdater.update_asic', mock.MagicMock()) @mock.patch('sonic_platform.thermal_updater.ThermalUpdater.update_module', mock.MagicMock()) - @mock.patch('sonic_platform.thermal_updater.ThermalUpdater.wait_all_sfp_ready') @mock.patch('sonic_platform.utils.write_file') - def test_start_stop(self, mock_write, mock_wait): - mock_wait.return_value = True + def test_start_stop(self, mock_write): mock_sfp = mock.MagicMock() mock_sfp.sdk_index = 1 updater = ThermalUpdater([mock_sfp]) @@ -77,21 +75,6 @@ def test_start_stop(self, mock_write, mock_wait): assert not updater._timer.is_alive() mock_write.assert_called_once_with('/run/hw-management/config/suspend', 1) - mock_wait.return_value = False - mock_write.reset_mock() - updater.start() - mock_write.assert_called_once_with('/run/hw-management/config/suspend', 1) - updater.stop() - - @mock.patch('sonic_platform.thermal_updater.time.sleep', mock.MagicMock()) - def test_wait_all_sfp_ready(self): - mock_sfp = mock.MagicMock() - mock_sfp.is_sw_control = mock.MagicMock(return_value=True) - updater = ThermalUpdater([mock_sfp]) - assert updater.wait_all_sfp_ready() - mock_sfp.is_sw_control.side_effect = Exception('') - assert not updater.wait_all_sfp_ready() - @mock.patch('sonic_platform.utils.read_int_from_file') def test_update_asic(self, mock_read): mock_read.return_value = 8 diff --git a/platform/mellanox/mlnx-platform-api/tests/test_wait_sfp_ready_task.py b/platform/mellanox/mlnx-platform-api/tests/test_wait_sfp_ready_task.py new file mode 100644 index 000000000000..16e361f09327 --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/tests/test_wait_sfp_ready_task.py @@ -0,0 +1,51 @@ +# +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import sys + +if sys.version_info.major == 3: + from unittest import mock +else: + import mock + +test_path = os.path.dirname(os.path.abspath(__file__)) +modules_path = os.path.dirname(test_path) +sys.path.insert(0, modules_path) + +from sonic_platform import wait_sfp_ready_task +from sonic_platform import utils + + +class TestWaitSfpReadyTask: + def test_schedule(self): + task = wait_sfp_ready_task.WaitSfpReadyTask() + task.schedule_wait(0) + assert not task.empty() + task.cancel_wait(0) + assert task.empty() + + def test_run(self): + task = wait_sfp_ready_task.WaitSfpReadyTask() + task.WAIT_TIME = 1 # Fast the test + task.start() + task.schedule_wait(0) + assert utils.wait_until(lambda: 0 in task.get_ready_set(), 4, 0.5), 'sfp does not reach ready in 4 seconds' + assert 0 not in task._wait_dict + assert len(task._ready_set) == 0 + task.stop() + task.join()