-
Notifications
You must be signed in to change notification settings - Fork 763
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added DPU platform test cases #14152
Changes from all commits
e0ebe52
8dc74e7
3f8d0d8
8f724d5
4c51d33
0c972b3
a821082
dcccc94
ce6b013
49564d1
e07e31d
3c490e9
e8edb18
8e7fbdd
88a93ff
2ce111f
4bf5489
00089af
c63d7c0
99f8bf5
c18e7dd
f1cd717
362ffd3
d8bbb79
8f34419
df7d03a
4324009
b36ca03
899710c
55926b1
953626e
6e4a41b
63d2ee6
76b6c4e
edb52e4
9af649d
ca45309
c5c7614
0964a5c
2113129
0c8cdfc
893e984
01b7620
d2dd500
bf9c574
ecc60e8
04d829c
2ab8055
e1c8039
562a595
2ad407b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
""" | ||
Helper script for DPU operations | ||
""" | ||
import logging | ||
import pytest | ||
from tests.common.devices.sonic import * # noqa: F401,F403 | ||
from tests.platform_tests.api.conftest import * # noqa: F401,F403 | ||
from tests.common.helpers.platform_api import chassis, module | ||
from tests.common.utilities import wait_until | ||
from tests.common.helpers.assertions import pytest_assert | ||
from pkg_resources import parse_version | ||
|
||
|
||
@pytest.fixture(scope='function') | ||
def num_dpu_modules(platform_api_conn): | ||
""" | ||
Returns the number of DPU modules | ||
""" | ||
|
||
num_modules = int(chassis.get_num_modules(platform_api_conn)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a debug log to log num_modules for easy troubleshooting. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added it. |
||
logging.info("Num of modules: '{}'".format(num_modules)) | ||
|
||
return num_modules | ||
|
||
|
||
@pytest.fixture(scope='function') | ||
def check_smartswitch_and_dark_mode(duthosts, | ||
enum_rand_one_per_hwsku_hostname, | ||
platform_api_conn): | ||
""" | ||
Checks whether given testbed is running | ||
202405 image or below versions | ||
If True, then skip the script | ||
else checks if dpus are in darkmode | ||
If dpus are in dark mode, then power up the DPUs | ||
else, proceeds to run all test cases | ||
""" | ||
|
||
duthost = duthosts[enum_rand_one_per_hwsku_hostname] | ||
|
||
if not duthost.facts["DPUS"] and \ | ||
parse_version(duthost.os_version) <= parse_version("202405"): | ||
pytest.skip("Test is not supported for this testbed and os version") | ||
|
||
darkmode = is_dark_mode_enabled(duthost, platform_api_conn) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As commented earlier, either rename function name or move these dpu check for darkmode to a new function for providing more clarity? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I changed the function name. |
||
|
||
if darkmode: | ||
dpu_power_on(duthost, platform_api_conn) | ||
|
||
|
||
def is_dark_mode_enabled(duthost, platform_api_conn): | ||
""" | ||
Checks the liveliness of DPU | ||
Returns: | ||
True if all DPUs admin status are down | ||
else False | ||
""" | ||
|
||
num_modules = num_dpu_modules(platform_api_conn) | ||
count_admin_down = 0 | ||
|
||
for index in range(num_modules): | ||
dpu = module.get_name(platform_api_conn, index) | ||
output_config_db = duthost.command( | ||
'redis-cli -p 6379 -h 127.0.0.1 \ | ||
-n 4 hgetall "CHASSIS_MODULE|{}"'.format(dpu)) | ||
if output_config_db['stdout'] is None: | ||
logging.warn("redis cli output for chassis module state is empty") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If output_config_db is empty, you need to return False here right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, returning false here. Changed it. |
||
break | ||
if 'down' in output_config_db['stdout']: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is good to add a check, if the output_config_db is empty for some failure or a reason. Also add a warning log if the output is empty. |
||
count_admin_down += 1 | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add debug log here to print dark mode There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added it. |
||
if count_admin_down == num_modules: | ||
logging.info("Smartswitch is in dark mode") | ||
return True | ||
|
||
logging.info("Smartswitch is in non-dark mode") | ||
return False | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What will happen in case of only few DPUs are in power-on state. Does the tests proceed only on enabled DPUs? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the test proceeds with enabled dpus. |
||
|
||
|
||
def dpu_power_on(duthost, platform_api_conn): | ||
""" | ||
Executes power on all DPUs | ||
Returns: | ||
Returns True or False based on all DPUs powered on or not | ||
""" | ||
|
||
num_modules = num_dpu_modules(platform_api_conn) | ||
ip_address_list = [] | ||
|
||
for index in range(num_modules): | ||
dpu = module.get_name(platform_api_conn, index) | ||
ip_address_list.append( | ||
module.get_midplane_ip(platform_api_conn, index)) | ||
duthost.shell("config chassis modules startup %s" % (dpu)) | ||
|
||
pytest_assert(wait_until(180, 60, 0, check_dpu_ping_status, # noqa: F405 | ||
duthost, ip_address_list), "Not all DPUs are operationally up") | ||
|
||
|
||
def check_dpu_ping_status(duthost, ip_address_list): | ||
""" | ||
Executes ping to all DPUs | ||
Args: | ||
duthost : Host handle | ||
ip_address_list (list): List of all DPU ip addresses | ||
Returns: | ||
Returns True or False based on Ping is successfull or not to all DPUs | ||
""" | ||
|
||
ping_count = 0 | ||
for ip_address in ip_address_list: | ||
output_ping = duthost.command("ping -c 3 %s" % (ip_address)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add debug logs for all ping outputs for easy troubleshooting. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added it. |
||
logging.info("Ping output: '{}'".format(output_ping)) | ||
if "0% packet loss" in output_ping["stdout"]: | ||
ping_count += 1 | ||
|
||
return ping_count == len(ip_address_list) | ||
|
||
|
||
def check_dpu_module_status(duthost, power_status, dpu_name): | ||
""" | ||
Check status of given DPU module against given option on/off | ||
Args: | ||
duthost : Host handle | ||
power_status: on/off status of dpu | ||
dpu_name: name of the dpu module | ||
Returns: | ||
Returns True or False based on status of given DPU module | ||
""" | ||
|
||
output_dpu_status = duthost.command( | ||
'show chassis module status | grep %s' % (dpu_name)) | ||
|
||
if "Offline" in output_dpu_status["stdout"]: | ||
if power_status == "off": | ||
logging.info("'{}' is offline ...".format(dpu_name)) | ||
return True | ||
else: | ||
logging.info("'{}' is online ...".format(dpu_name)) | ||
return False | ||
else: | ||
if power_status == "on": | ||
logging.info("'{}' is online ...".format(dpu_name)) | ||
return True | ||
else: | ||
logging.info("'{}' is offline ...".format(dpu_name)) | ||
return False | ||
|
||
|
||
def check_dpu_reboot_cause(duthost, dpu_name): | ||
""" | ||
Check reboot cause of all DPU modules | ||
Args: | ||
duthost : Host handle | ||
dpu_name: name of the dpu module | ||
Returns: | ||
Returns True or False based on reboot cause of all DPU modules | ||
""" | ||
|
||
output_reboot_cause = duthost.command( | ||
'show reboot-cause all | grep %s' % (dpu_name)) | ||
|
||
if 'Unknown' in output_reboot_cause["stdout"]: | ||
# Checking for Unknown as of now and | ||
# implementation for other reasons are not in place now | ||
# TODO: Needs to be extend the function for other reasons | ||
logging.info("'{}' - reboot cause is Unkown...".format(dpu_name)) | ||
return True | ||
|
||
return False |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
""" | ||
Tests for the `reboot and reload ...` commands in DPU | ||
""" | ||
|
||
import logging | ||
import pytest | ||
import time | ||
from tests.common.helpers.assertions import pytest_assert | ||
from tests.common.platform.interface_utils \ | ||
import check_interface_status_of_up_ports | ||
from tests.common.utilities import wait_until | ||
from tests.common.platform.processes_utils import wait_critical_processes | ||
from tests.common.reboot import reboot, wait_for_startup, REBOOT_TYPE_COLD | ||
from tests.common.config_reload import config_force_option_supported, config_system_checks_passed # noqa: F401, E501 | ||
from tests.smartswitch.common.device_utils_dpu import * # noqa: F401,F403,E501 | ||
from tests.common.helpers.platform_api import chassis, module # noqa: F401 | ||
from tests.platform_tests.api.conftest import * # noqa: F401,F403 | ||
|
||
pytestmark = [ | ||
pytest.mark.topology('t1') | ||
] | ||
|
||
|
||
def test_dpu_ping_after_reboot(duthosts, enum_rand_one_per_hwsku_hostname, | ||
localhost, platform_api_conn, num_dpu_modules): | ||
""" | ||
@summary: Verify output of `config chassis modules startup <DPU_Number>` | ||
""" | ||
duthost = duthosts[enum_rand_one_per_hwsku_hostname] | ||
ip_address_list = [] | ||
num_modules = num_dpu_modules(platform_api_conn) | ||
|
||
logging.info("Starting switch reboot...") | ||
reboot(duthost, localhost, reboot_type=REBOOT_TYPE_COLD, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you rebooting entire switch here? If yes, could you add a debug/info log? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I added the logs as well. |
||
wait_for_ssh=False) | ||
wait_for_startup(duthost, localhost, 10, 300) | ||
pytest_assert(wait_until(300, 5, 0, check_interface_status_of_up_ports, | ||
duthost), | ||
"Not all ports that are admin up on are operationally up") | ||
logging.info("Interfaces are up") | ||
|
||
for index in range(num_modules): | ||
ip_address_list.append( | ||
module.get_midplane_ip(platform_api_conn, index)) | ||
dpu = module.get_name(platform_api_conn, index) | ||
duthosts.shell("config chassis modules startup %s" % (dpu)) | ||
time.sleep(2) | ||
|
||
pytest_assert(wait_until(120, 30, 0, check_dpu_ping_status, # noqa: F405 | ||
duthost, ip_address_list), | ||
"Not all DPUs operationally up") | ||
|
||
|
||
def test_show_ping_int_after_reload(duthosts, enum_rand_one_per_hwsku_hostname, | ||
localhost, platform_api_conn, num_dpu_modules): | ||
""" | ||
@summary: To Check Ping between NPU and DPU | ||
after configuration reload on NPU | ||
""" | ||
duthost = duthosts[enum_rand_one_per_hwsku_hostname] | ||
num_modules = num_dpu_modules(platform_api_conn) | ||
ip_address_list = [] | ||
|
||
for index in range(num_modules): | ||
ip_address_list.append( | ||
module.get_midplane_ip(platform_api_conn, index)) | ||
|
||
logging.info("Reload configuration") | ||
duthost.shell("sudo config reload -y &>/dev/null", executable="/bin/bash") | ||
|
||
logging.info("Wait until all critical services are fully started") | ||
wait_critical_processes(duthost) | ||
|
||
pytest_assert(wait_until(30, 10, 0, check_dpu_ping_status, # noqa: F405 | ||
duthost, ip_address_list), | ||
"Not all DPUs operationally up") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
""" | ||
Tests for the `platform cli ...` commands in DPU | ||
""" | ||
|
||
import logging | ||
import pytest | ||
from tests.common.utilities import wait_until | ||
from tests.common.helpers.assertions import pytest_assert | ||
from tests.smartswitch.common.device_utils_dpu import * # noqa: F403,F401,E501 | ||
from tests.common.helpers.platform_api import chassis, module # noqa: F401 | ||
from tests.platform_tests.api.conftest import * # noqa: F401,F403 | ||
from tests.common.devices.sonic import * # noqa: 403 | ||
|
||
pytestmark = [ | ||
pytest.mark.topology('t1') | ||
] | ||
|
||
|
||
def test_midplane_ip(duthosts, enum_rand_one_per_hwsku_hostname, | ||
platform_api_conn): | ||
""" | ||
@summary: Verify `Midplane ip address between NPU and DPU` | ||
""" | ||
duthost = duthosts[enum_rand_one_per_hwsku_hostname] | ||
ip_address_list = [] | ||
|
||
output_dpu_status = duthost.show_and_parse('show chassis module status') | ||
|
||
for index in range(len(output_dpu_status)): | ||
parse_output = output_dpu_status[index] | ||
if 'DPU' in parse_output['name']: | ||
if parse_output['oper-status'] != 'Offline': | ||
index = (parse_output['name'])[-1] | ||
ip_address_list.append( | ||
module.get_midplane_ip(platform_api_conn, index)) | ||
|
||
ping_status = check_dpu_ping_status(duthost, ip_address_list) # noqa: F405 | ||
pytest_assert(ping_status == 1, "Ping to DPU has been tested") | ||
|
||
|
||
def test_shutdown_power_up_dpu(duthosts, enum_rand_one_per_hwsku_hostname, | ||
platform_api_conn, num_dpu_modules): | ||
""" | ||
@summary: Verify `shut down and power up DPU` | ||
""" | ||
duthost = duthosts[enum_rand_one_per_hwsku_hostname] | ||
num_modules = num_dpu_modules(platform_api_conn) | ||
|
||
for index in range(num_modules): | ||
dpu_name = module.get_name(platform_api_conn, index) | ||
duthosts.shell("config chassis modules shutdown %s" % (dpu_name)) | ||
pytest_assert(wait_until(180, 60, 0, | ||
check_dpu_module_status, # noqa: F405 | ||
duthost, "off", dpu_name), | ||
"DPU is not operationally down") | ||
|
||
for index in range(num_modules): | ||
dpu_name = module.get_name(platform_api_conn, index) | ||
duthosts.shell("config chassis modules startup %s" % (dpu_name)) | ||
pytest_assert(wait_until(180, 60, 0, | ||
check_dpu_module_status, # noqa: F405 | ||
duthost, "on", dpu_name), | ||
"DPU is not operationally up") | ||
|
||
|
||
def test_reboot_cause(duthosts, enum_rand_one_per_hwsku_hostname, | ||
platform_api_conn, num_dpu_modules): | ||
""" | ||
@summary: Verify `Reboot Cause` | ||
""" | ||
duthost = duthosts[enum_rand_one_per_hwsku_hostname] | ||
num_modules = num_dpu_modules(platform_api_conn) | ||
|
||
for index in range(num_modules): | ||
dpu_name = module.get_name(platform_api_conn, index) | ||
duthost.shell("config chassis \ | ||
module shutdown %s" % (dpu_name))["stdout_lines"] | ||
pytest_assert(wait_until(180, 60, 0, | ||
check_dpu_module_status, # noqa: F405 | ||
duthost, "off", | ||
dpu_name), "DPU is not operationally down") | ||
|
||
for index in range(num_modules): | ||
dpu_name = module.get_name(platform_api_conn, index) | ||
duthosts.shell("config chassis modules startup %s" % (dpu_name)) | ||
pytest_assert(wait_until(180, 60, 0, | ||
check_dpu_reboot_cause, # noqa: F405 | ||
duthost, | ||
dpu_name), "DPU is not operationally up") | ||
|
||
|
||
def test_pcie_link(duthosts, enum_rand_one_per_hwsku_hostname, | ||
platform_api_conn, num_dpu_modules): | ||
""" | ||
@summary: Verify `PCIe link` | ||
""" | ||
CMD_PCIE_INFO = "show platform pcieinfo -c" | ||
|
||
duthost = duthosts[enum_rand_one_per_hwsku_hostname] | ||
|
||
logging.info("Verifying output of \ | ||
'{}' on '{}'...".format(CMD_PCIE_INFO, duthost.hostname)) | ||
output_pcie_info = duthost.command(CMD_PCIE_INFO)["stdout_lines"] | ||
pytest_assert(output_pcie_info[-1] == | ||
'PCIe Device Checking All Test ----------->>> PASSED', | ||
"PCIe Link is good'{}'".format(duthost.hostname)) | ||
|
||
num_modules = num_dpu_modules(platform_api_conn) | ||
|
||
for index in range(num_modules): | ||
dpu_name = module.get_name(platform_api_conn, index) | ||
duthosts.shell("config chassis modules shutdown %s" % (dpu_name)) | ||
pytest_assert(wait_until(180, 60, 0, | ||
check_dpu_module_status, # noqa: F405 | ||
duthost, "off", dpu_name), | ||
"DPU is not operationally down") | ||
|
||
output_pcie_info = duthost.command(CMD_PCIE_INFO)["stdout_lines"] | ||
pytest_assert(output_pcie_info[-1] == | ||
'PCIe Device Checking All Test ----------->>> PASSED', | ||
"PCIe Link is good'{}'".format(duthost.hostname)) | ||
|
||
for index in range(num_modules): | ||
dpu_name = module.get_name(platform_api_conn, index) | ||
duthosts.shell("config chassis modules startup %s" % (dpu_name)) | ||
pytest_assert(wait_until(180, 60, 0, | ||
check_dpu_module_status, # noqa: F405 | ||
duthost, "on", dpu_name), "DPU is not operationally up") | ||
|
||
logging.info("Verifying output of '{}' on '{}'..." | ||
.format(CMD_PCIE_INFO, duthost.hostname)) | ||
output_pcie_info = duthost.command(CMD_PCIE_INFO)["stdout_lines"] | ||
pytest_assert("PASSED" == output_pcie_info[-1], "PCIe Link is good'{}'" | ||
.format(duthost.hostname)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@vvolam can you review?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes reviewing!