From b39e03033e710b80b525eab31c442c905e0a963b Mon Sep 17 00:00:00 2001 From: Junchao-Mellanox Date: Mon, 30 Oct 2023 07:46:00 +0200 Subject: [PATCH] Update sysfs node name/path according to production code change Change-Id: If3524367576c95e6596779ca5c388832bac4f6d5 --- tests/common/mellanox_data.py | 26 ++++ tests/platform_tests/mellanox/check_sysfs.py | 25 ++-- .../mellanox_thermal_control_test_helper.py | 139 +++++++++--------- .../mellanox/mellanox_device_mocker.py | 10 +- 4 files changed, 117 insertions(+), 83 deletions(-) diff --git a/tests/common/mellanox_data.py b/tests/common/mellanox_data.py index 0e09a5415b2..d6fd570756c 100644 --- a/tests/common/mellanox_data.py +++ b/tests/common/mellanox_data.py @@ -1,4 +1,6 @@ +import functools + SPC1_HWSKUS = ["ACS-MSN2700", "Mellanox-SN2700", "Mellanox-SN2700-D48C8", "ACS-MSN2740", "ACS-MSN2100", "ACS-MSN2410", "ACS-MSN2010", "ACS-SN2201"] SPC2_HWSKUS = ["ACS-MSN3700", "ACS-MSN3700C", "ACS-MSN3800", "Mellanox-SN3800-D112C8", "ACS-MSN3420"] @@ -901,3 +903,27 @@ def get_platform_data(dut): def get_chip_type(dut): platform_data = get_platform_data(dut) return platform_data.get("chip_type") + + +def read_only_cache(): + """Decorator to cache return value for a method/function once. + This decorator should be used for method/function when: + 1. Executing the method/function takes time. e.g. reading sysfs. + 2. The return value of this method/function never changes. + """ + def decorator(method): + method.return_value = None + + @functools.wraps(method) + def _impl(*args, **kwargs): + if not method.return_value: + method.return_value = method(*args, **kwargs) + return method.return_value + return _impl + return decorator + + +@read_only_cache() +def get_hw_management_version(duthost): + full_version = duthost.shell('dpkg-query --showformat=\'${Version}\' --show hw-management')['stdout'] + return full_version[len('1.mlnx.'):] diff --git a/tests/platform_tests/mellanox/check_sysfs.py b/tests/platform_tests/mellanox/check_sysfs.py index 167a13e13a9..82a192d26de 100644 --- a/tests/platform_tests/mellanox/check_sysfs.py +++ b/tests/platform_tests/mellanox/check_sysfs.py @@ -5,7 +5,8 @@ """ import logging import re -from tests.common.mellanox_data import get_platform_data +from pkg_resources import parse_version +from tests.common.mellanox_data import get_hw_management_version, get_platform_data from tests.common.utilities import wait_until MAX_FAN_SPEED_THRESHOLD = 0.15 @@ -41,7 +42,7 @@ def check_sysfs(dut): @summary: Check various hw-management related sysfs under /var/run/hw-management """ platform_data = get_platform_data(dut) - sysfs_config = generate_sysfs_config(platform_data) + sysfs_config = generate_sysfs_config(dut, platform_data) logging.info("Collect mellanox sysfs facts") sysfs_facts = dut.sysfs_facts(config=sysfs_config)['ansible_facts'] @@ -247,7 +248,7 @@ def _is_fan_speed_in_range(sysfs_facts): return True -def generate_sysfs_config(platform_data): +def generate_sysfs_config(dut, platform_data): config = list() config.append(generate_sysfs_symbolink_config()) config.append(generate_sysfs_asic_config()) @@ -256,7 +257,7 @@ def generate_sysfs_config(platform_data): config.append(generate_sysfs_cpu_core_config(platform_data)) config.append(generate_sysfs_fan_config(platform_data)) if platform_data['psus']['hot_swappable']: - config.append(generate_sysfs_psu_config(platform_data)) + config.append(generate_sysfs_psu_config(dut, platform_data)) config.append(generate_sysfs_sfp_config(platform_data)) return config @@ -369,8 +370,8 @@ def generate_sysfs_cpu_core_config(platform_data): } -def generate_sysfs_psu_config(platform_data): - return { +def generate_sysfs_psu_config(dut, platform_data): + data = { 'name': 'psu_info', 'start': 1, 'count': platform_data['psus']['number'], @@ -386,15 +387,15 @@ def generate_sysfs_psu_config(platform_data): }, { 'name': 'temp', - 'cmd_pattern': 'cat /var/run/hw-management/thermal/psu{}_temp', + 'cmd_pattern': 'cat /var/run/hw-management/thermal/psu{}_temp1', }, { 'name': 'max_temp', - 'cmd_pattern': 'cat /var/run/hw-management/thermal/psu{}_temp_max', + 'cmd_pattern': 'cat /var/run/hw-management/thermal/psu{}_temp1_max', }, { 'name': 'max_temp_alarm', - 'cmd_pattern': 'cat /var/run/hw-management/thermal/psu{}_temp_max_alarm', + 'cmd_pattern': 'cat /var/run/hw-management/alarm/psu{}_temp1_max_alarm', }, { 'name': 'fan_speed', @@ -402,6 +403,12 @@ def generate_sysfs_psu_config(platform_data): } ] } + hw_mgmt_version = get_hw_management_version(dut) + if parse_version(hw_mgmt_version) < parse_version('7.0030.2003'): + data['properties'][2]['cmd_pattern'] = 'cat /var/run/hw-management/thermal/psu{}_temp' + data['properties'][3]['cmd_pattern'] = 'cat /var/run/hw-management/thermal/psu{}_temp_max' + data['properties'][4]['cmd_pattern'] = 'cat /var/run/hw-management/thermal/psu{}_temp_max_alarm' + return data def generate_sysfs_sfp_config(platform_data): diff --git a/tests/platform_tests/mellanox/mellanox_thermal_control_test_helper.py b/tests/platform_tests/mellanox/mellanox_thermal_control_test_helper.py index a48eac11de3..e8d2177e266 100644 --- a/tests/platform_tests/mellanox/mellanox_thermal_control_test_helper.py +++ b/tests/platform_tests/mellanox/mellanox_thermal_control_test_helper.py @@ -6,7 +6,7 @@ from pkg_resources import parse_version from tests.platform_tests.thermal_control_test_helper import mocker, FanStatusMocker, ThermalStatusMocker, \ SingleFanMocker -from tests.common.mellanox_data import get_platform_data +from tests.common.mellanox_data import get_hw_management_version, get_platform_data from .minimum_table import get_min_table from tests.common.utilities import wait_until from tests.common.helpers.assertions import pytest_assert @@ -29,8 +29,8 @@ }, "psu": { "name": "PSU-{} Temp", - "temperature": "psu{}_temp", - "high_threshold": "psu{}_temp_max" + "temperature": "psu{}_temp1", + "high_threshold": "psu{}_temp1_max" }, "cpu_pack": { "name": "CPU Pack Temp", @@ -41,14 +41,14 @@ "gearbox": { "name": "Gearbox {} Temp", "temperature": "gearbox{}_temp_input", - "high_threshold": "mlxsw-gearbox{}/temp_trip_hot", - "high_critical_threshold": "mlxsw-gearbox{}/temp_trip_crit" + "high_threshold": "gearbox{}_temp_emergency", + "high_critical_threshold": "gearbox{}_temp_trip_crit" }, "asic_ambient": { "name": "ASIC", "temperature": "asic", - "high_threshold": "mlxsw/temp_trip_hot", - "high_critical_threshold": "mlxsw/temp_trip_crit" + "high_threshold": "asic_temp_emergency", + "high_critical_threshold": "asic_temp_trip_crit" }, "port_ambient": { "name": "Ambient Port Side Temp", @@ -78,15 +78,67 @@ } } -ASIC_THERMAL_RULE_201911 = { - "name": "Ambient ASIC Temp", - "temperature": "asic" +thermal_rule_patched = False + +THERMAL_RULE_PATCHES = { + "201911": { + "asic_ambient": { + "name": "Ambient ASIC Temp", + "temperature": "asic" + }, + "gearbox": { + "name": "Gearbox {} Temp", + "temperature": "gearbox{}_temp_input" + } + }, + "hw-mgmt.7.0030.2003.before": { + "asic_ambient": { + "name": "ASIC", + "temperature": "asic", + "high_threshold": "mlxsw/temp_trip_hot", + "high_critical_threshold": "mlxsw/temp_trip_crit" + }, + "gearbox": { + "name": "Gearbox {} Temp", + "temperature": "gearbox{}_temp_input", + "high_threshold": "mlxsw-gearbox{}/temp_trip_hot", + "high_critical_threshold": "mlxsw-gearbox{}/temp_trip_crit" + }, + "psu": { + "name": "PSU-{} Temp", + "temperature": "psu{}_temp", + "high_threshold": "psu{}_temp_max" + } + } } -GEARBOX_THERMAL_RULE_201911 = { - "name": "Gearbox {} Temp", - "temperature": "gearbox{}_temp_input" -} + +def patch_thermal_rule(mock_helper): + """ + Patch thermal rule for different sonic version/kernel version/hw-management version. + This function is mainly for backward compatible. + :param mock_helper: Mock helper. + """ + global thermal_rule_patched + global THERMAL_NAMING_RULE + + if thermal_rule_patched: + return + + patch = None + if mock_helper.is_201911(): + patch = THERMAL_RULE_PATCHES['201911'] + else: + hw_mgmt_version = get_hw_management_version(mock_helper.dut) + if parse_version(hw_mgmt_version) < parse_version('7.0030.2003'): + patch = THERMAL_RULE_PATCHES['hw-mgmt.7.0030.2003.before'] + + if patch is not None: + for key, rule in patch.items(): + THERMAL_NAMING_RULE[key] = rule + + thermal_rule_patched = True + FAN_NAMING_RULE = { "fan": { @@ -675,12 +727,7 @@ def __init__(self, mock_helper, naming_rule, index): :param index: Thermal index. """ self.helper = mock_helper - if self.helper.is_201911(): - if 'ASIC' in naming_rule['name']: - naming_rule = ASIC_THERMAL_RULE_201911 - elif 'Gearbox' in naming_rule['name']: - naming_rule = GEARBOX_THERMAL_RULE_201911 - + patch_thermal_rule(mock_helper) self.name = naming_rule['name'] self.temperature_file = naming_rule['temperature'] self.high_threshold_file = naming_rule['high_threshold'] if 'high_threshold' in naming_rule else None @@ -1210,56 +1257,6 @@ def mock_normal_speed(self): self.expect_led_color = 'green' -@mocker('MinTableMocker') -class MinTableMocker(object): - FAN_AMB_PATH = 'fan_amb' - PORT_AMB_PATH = 'port_amb' - TRUST_PATH = 'module1_temp_fault' - LIST_THERMAL_ZONE_TEMPERATURE_FILE = 'ls /run/hw-management/thermal/mlxsw*/thermal_zone_temp' - NORMAL_TEMPERATURE = 40000 - - def __init__(self, dut): - self.mock_helper = MockerHelper(dut) - - def get_expect_cooling_level(self, temperature, trust_state): - minimum_table = get_min_table(self.mock_helper.dut) - row = minimum_table['unk_{}'.format( - 'trust' if trust_state else 'untrust')] - temperature = temperature / 1000 - for range_str, cooling_level in list(row.items()): - range_str_list = range_str.split(':') - min_temp = int(range_str_list[0]) - max_temp = int(range_str_list[1]) - if min_temp <= temperature <= max_temp: - return cooling_level - 10 - - return None - - def mock_min_table(self, temperature, trust_state): - trust_value = '0' if trust_state else '1' - fan_temp = temperature - port_temp = temperature - - self.mock_helper.mock_thermal_value(self.FAN_AMB_PATH, str(fan_temp)) - self.mock_helper.mock_thermal_value(self.PORT_AMB_PATH, str(port_temp)) - self.mock_helper.mock_thermal_value(self.TRUST_PATH, str(trust_value)) - - def mock_normal_temperature(self): - output = self.mock_helper.dut.shell( - self.LIST_THERMAL_ZONE_TEMPERATURE_FILE) - for thermal_file in output['stdout_lines']: - if self.mock_helper.read_value(thermal_file) != '0': - self.mock_helper.mock_value( - thermal_file, self.NORMAL_TEMPERATURE) - - def deinit(self): - """ - Destructor of MinTableMocker. - :return: - """ - self.mock_helper.deinit() - - @mocker('PsuMocker') class PsuMocker(object): PSU_PRESENCE = 'psu{}_status' @@ -1269,7 +1266,7 @@ def __init__(self, dut): def deinit(self): """ - Destructor of MinTableMocker. + Destructor of PsuMocker. :return: """ self.mock_helper.deinit() diff --git a/tests/system_health/mellanox/mellanox_device_mocker.py b/tests/system_health/mellanox/mellanox_device_mocker.py index 9a3ae6d6beb..de09985abef 100644 --- a/tests/system_health/mellanox/mellanox_device_mocker.py +++ b/tests/system_health/mellanox/mellanox_device_mocker.py @@ -1,12 +1,12 @@ from ..device_mocker import DeviceMocker -from tests.common.mellanox_data import get_platform_data +from pkg_resources import parse_version +from tests.common.mellanox_data import get_platform_data, get_hw_management_version from tests.platform_tests.mellanox.mellanox_thermal_control_test_helper import MockerHelper, FanDrawerData, FanData, \ FAN_NAMING_RULE class AsicData(object): TEMPERATURE_FILE = '/run/hw-management/thermal/asic' - THRESHOLD_FILE = '/run/hw-management/thermal/mlxsw/temp_trip_hot' def __init__(self, mock_helper): self.helper = mock_helper @@ -15,7 +15,11 @@ def mock_asic_temperature(self, value): self.helper.mock_value(AsicData.TEMPERATURE_FILE, str(value)) def get_asic_temperature_threshold(self): - value = self.helper.read_value(AsicData.THRESHOLD_FILE) + threshold_file = '/run/hw-management/thermal/asic_temp_emergency' + hw_mgmt_version = get_hw_management_version(self.helper.dut) + if parse_version(hw_mgmt_version) < parse_version('7.0030.2003'): + threshold_file = '/run/hw-management/thermal/mlxsw/temp_trip_hot' + value = self.helper.read_value(threshold_file) return int(value)