Skip to content

Commit

Permalink
[Mellanox] Add test case for CPU thermal control algorithm for SN4800 (
Browse files Browse the repository at this point in the history
…sonic-net#5309)

Add test case for PR sonic-net/sonic-buildimage#10202

- How did you do it?
Mock changing CPU temperature and verify cooling level is updated by algorithm accordingly

- How did you verify/test it?
Run the new test and passed.

- Any platform specific information?
Mellanox/Nvidia SN4800 platform
  • Loading branch information
Junchao-Mellanox authored and xwjiang-ms committed Apr 13, 2022
1 parent ff535c5 commit 6353942
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -1154,3 +1154,29 @@ def deinit(self):

def mock_psu_status(self, psu_index, status):
self.mock_helper.mock_thermal_value(self.PSU_PRESENCE.format(psu_index), '1' if status else '0')


@mocker('CpuThermalMocker')
class CpuThermalMocker(object):
LOW_THRESHOLD = 80000
HIGH_THRESHOLD = 95000
MIN_COOLING_STATE = 2
MAX_COOLING_STATE = 10
CPU_COOLING_STATE_FILE = '/var/run/hw-management/thermal/cooling2_cur_state'
CPU_PACK_TEMP_FILE = '/var/run/hw-management/thermal/cpu_pack'

def __init__(self, dut):
self.mock_helper = MockerHelper(dut)

def deinit(self):
"""
Destructor of CpuThermalMocker.
:return:
"""
self.mock_helper.deinit()

def mock_cpu_pack_temperature(self, temperature):
self.mock_helper.mock_value(self.CPU_PACK_TEMP_FILE, temperature)

def get_cpu_cooling_state(self):
return int(self.mock_helper.read_value(self.CPU_COOLING_STATE_FILE))
40 changes: 40 additions & 0 deletions tests/platform_tests/mellanox/test_thermal_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import operator
import pytest
import random
from tests.common.helpers.assertions import pytest_assert, pytest_require
from tests.common.mellanox_data import get_platform_data
from tests.common.utilities import wait_until
from tests.platform_tests.thermal_control_test_helper import *
Expand Down Expand Up @@ -159,6 +160,39 @@ def test_psu_absence_policy(duthosts, rand_one_dut_hostname, mocker_factory):
assert check_fan_speed(duthost, MAX_PWM), 'Fan speed is not turn to {}'.format(MAX_PWM)


@pytest.mark.disable_loganalyzer
def test_cpu_thermal_control(rand_selected_dut, mocker_factory):
duthost = rand_selected_dut
dut_platform = duthost.facts["platform"]
pytest_require(dut_platform == "x86_64-nvidia_sn4800-r0", 'This test case is only for platform x86_64-nvidia_sn4800-r0, skipping...')
mocker = mocker_factory(duthost, 'CpuThermalMocker')

temp_step = 1000
# Mock CPU temperature is lower than low threshold
mocker.mock_cpu_pack_temperature(mocker.LOW_THRESHOLD - temp_step)
wait_result = wait_until(10, 3, 0, check_cpu_cooling_state, mocker, mocker.MIN_COOLING_STATE)
pytest_assert(wait_result,
'CPU cooling state is not MIN when temperature is below low threshold')

# Mock CPU temperature is raising
mocker.mock_cpu_pack_temperature(mocker.LOW_THRESHOLD)
wait_result = wait_until(10, 3, 0, check_cpu_cooling_state, mocker, mocker.MIN_COOLING_STATE + 1)
pytest_assert(wait_result,
'CPU cooling state is not increasing when temperature is rasing')

# Mock CPU temperature is larger than high threshold
mocker.mock_cpu_pack_temperature(mocker.HIGH_THRESHOLD + temp_step)
wait_result = wait_until(10, 3, 0, check_cpu_cooling_state, mocker, mocker.MAX_COOLING_STATE)
pytest_assert(wait_result,
'CPU cooling state is not MAX increasing when temperature is beyond high threshold')

# Mock CPU temperature is decreasing
mocker.mock_cpu_pack_temperature(mocker.HIGH_THRESHOLD)
wait_result = wait_until(10, 3, 0, check_cpu_cooling_state, mocker, mocker.MAX_COOLING_STATE - 1)
pytest_assert(wait_result,
'CPU cooling state is not decreasing when temperature is decreasing')


def _check_psu_fan_speed_in_range(actual_speed, max_speed, cooling_level):
expect_speed = max_speed * cooling_level / 10.0
logger.info('Expect speed: {}, actual speed: {}'.format(expect_speed, actual_speed))
Expand Down Expand Up @@ -270,3 +304,9 @@ def check_fan_speed(duthost, expect_value):
logging.error('For file {}, Expect speed {}, but actual is {}'.format(file, expect_value, actual_speed))
return False
return True


def check_cpu_cooling_state(mocker, expect_value):
actual_value = mocker.get_cpu_cooling_state()
logging.debug('Expect cpu cooling value is {}, actual value is {}'.format(expect_value, actual_value))
return actual_value == expect_value
2 changes: 1 addition & 1 deletion tests/platform_tests/thermal_control_test_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def _create_mocker(dut, mocker_name):
platform = dut.facts['platform']
mocker_object = None

if 'mlnx' in platform:
if 'mlnx' in platform or 'nvidia' in platform:
from tests.platform_tests.mellanox import mellanox_thermal_control_test_helper
mocker_type = BaseMocker.get_mocker_type(mocker_name)
if mocker_type:
Expand Down

0 comments on commit 6353942

Please sign in to comment.