Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added DPU platform test cases #14152

Merged
merged 51 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
e0ebe52
Added DPU platform test cases
nissampa Aug 16, 2024
8dc74e7
modified imports for flake8 test
nissampa Aug 16, 2024
3f8d0d8
Changed dirrectory structure for smartswitch test scripts
nissampa Sep 3, 2024
8f724d5
adderessed few comments
nissampa Sep 5, 2024
4c51d33
moved tests scripts to platform_tests dir under smartswitch
nissampa Sep 5, 2024
0c972b3
addressed few set of comments in PR
nissampa Sep 6, 2024
a821082
added logging info
nissampa Sep 9, 2024
dcccc94
changed ping check
nissampa Sep 10, 2024
ce6b013
addressed set of comments on PR
nissampa Sep 10, 2024
49564d1
flake8 errors resolved for smartswitch/common directory
nissampa Sep 10, 2024
e07e31d
flake8 error resolved for platform_tests/test_reload_dpu.py
nissampa Sep 10, 2024
3c490e9
flake8 error resolved for platform_tests/test_show_platform_dpu.py
nissampa Sep 10, 2024
e8edb18
flake8 error resolved for platform_tests/test_show_platform_dpu.py
nissampa Sep 11, 2024
8e7fbdd
flake8 error resolved for platform_tests/test_show_platform_dpu.py
nissampa Sep 11, 2024
88a93ff
added test skip fixture for smartswitch and non-dark mode
nissampa Sep 11, 2024
2ce111f
utils flak8 fix
nissampa Sep 11, 2024
4bf5489
added platform file as variable
nissampa Sep 11, 2024
00089af
missed the \ in added code for platform file
nissampa Sep 12, 2024
c63d7c0
Merge branch 'sonic-net:master' into dpu_platform_test_cases
nissampa Sep 12, 2024
99f8bf5
Merge branch 'sonic-net:master' into dpu_platform_test_cases
nissampa Sep 12, 2024
c18e7dd
Merge branch 'sonic-net:master' into dpu_platform_test_cases
nissampa Sep 18, 2024
f1cd717
added smartswtich and darkmode detection and lighting up dpus
nissampa Sep 18, 2024
362ffd3
rectified the syntax error
nissampa Sep 19, 2024
d8bbb79
rectified the syntax error
nissampa Sep 19, 2024
8f34419
rectified the syntax error
nissampa Sep 19, 2024
df7d03a
rectified the syntax error
nissampa Sep 19, 2024
4324009
rectified the syntax error
nissampa Sep 19, 2024
b36ca03
rectified the syntax error
nissampa Sep 19, 2024
899710c
rectified the syntax error
nissampa Sep 19, 2024
55926b1
rectified the syntax error
nissampa Sep 19, 2024
953626e
rectified the syntax error
nissampa Sep 19, 2024
6e4a41b
resolved flake8 errors
nissampa Sep 19, 2024
63d2ee6
added comments for util functions
nissampa Sep 20, 2024
76b6c4e
Made clear with utility function descriptions
nissampa Sep 21, 2024
edb52e4
retified the error
nissampa Sep 21, 2024
9af649d
num of dpu modules fixtures
nissampa Sep 25, 2024
ca45309
resolved flake8 errors
nissampa Sep 25, 2024
c5c7614
resolved flake8 errors
nissampa Sep 25, 2024
0964a5c
Merge branch 'sonic-net:master' into dpu_platform_test_cases
nissampa Sep 25, 2024
2113129
changed link flap case to accomodate bridge interface
nissampa Sep 25, 2024
0c8cdfc
removed link flap case
nissampa Oct 1, 2024
893e984
resolved flake8 error
nissampa Oct 1, 2024
01b7620
eof fixed
nissampa Oct 1, 2024
d2dd500
resolving pre-commit check
nissampa Oct 7, 2024
bf9c574
Addressed set of comments on PR
nissampa Oct 7, 2024
ecc60e8
minor mistakes
nissampa Oct 8, 2024
04d829c
added extra line to PR checker
nissampa Oct 9, 2024
2ab8055
Merge branch 'sonic-net:master' into dpu_platform_test_cases
nissampa Oct 9, 2024
e1c8039
minor change for PR checker
nissampa Oct 10, 2024
562a595
removed functions parameter which are not fixtures
nissampa Oct 17, 2024
2ad407b
Merge branch 'sonic-net:master' into dpu_platform_test_cases
nissampa Oct 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 171 additions & 0 deletions tests/smartswitch/common/device_utils_dpu.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vvolam can you review?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes reviewing!

Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
"""
Helper script for DPU operations
"""
import logging
import pytest
from tests.common.devices.sonic import * # noqa: F401,F403
from tests.platform_tests.api.conftest import * # noqa: F401,F403
from tests.common.helpers.platform_api import chassis, module
from tests.common.utilities import wait_until
from tests.common.helpers.assertions import pytest_assert
from pkg_resources import parse_version


@pytest.fixture(scope='function')
def num_dpu_modules(platform_api_conn):
"""
Returns the number of DPU modules
"""

num_modules = int(chassis.get_num_modules(platform_api_conn))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a debug log to log num_modules for easy troubleshooting.

Copy link
Contributor Author

@nissampa nissampa Oct 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added it.

logging.info("Num of modules: '{}'".format(num_modules))

return num_modules


@pytest.fixture(scope='function')
def check_smartswitch_and_dark_mode(duthosts,
enum_rand_one_per_hwsku_hostname,
platform_api_conn):
"""
Checks whether given testbed is running
202405 image or below versions
If True, then skip the script
else checks if dpus are in darkmode
If dpus are in dark mode, then power up the DPUs
else, proceeds to run all test cases
"""

duthost = duthosts[enum_rand_one_per_hwsku_hostname]

if not duthost.facts["DPUS"] and \
parse_version(duthost.os_version) <= parse_version("202405"):
pytest.skip("Test is not supported for this testbed and os version")

darkmode = is_dark_mode_enabled(duthost, platform_api_conn)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As commented earlier, either rename function name or move these dpu check for darkmode to a new function for providing more clarity?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed the function name.


if darkmode:
dpu_power_on(duthost, platform_api_conn)


def is_dark_mode_enabled(duthost, platform_api_conn):
"""
Checks the liveliness of DPU
Returns:
True if all DPUs admin status are down
else False
"""

num_modules = num_dpu_modules(platform_api_conn)
count_admin_down = 0

for index in range(num_modules):
dpu = module.get_name(platform_api_conn, index)
output_config_db = duthost.command(
'redis-cli -p 6379 -h 127.0.0.1 \
-n 4 hgetall "CHASSIS_MODULE|{}"'.format(dpu))
if output_config_db['stdout'] is None:
logging.warn("redis cli output for chassis module state is empty")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If output_config_db is empty, you need to return False here right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, returning false here. Changed it.

break
if 'down' in output_config_db['stdout']:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is good to add a check, if the output_config_db is empty for some failure or a reason. Also add a warning log if the output is empty.

count_admin_down += 1

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add debug log here to print dark mode

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added it.

if count_admin_down == num_modules:
logging.info("Smartswitch is in dark mode")
return True

logging.info("Smartswitch is in non-dark mode")
return False
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What will happen in case of only few DPUs are in power-on state. Does the tests proceed only on enabled DPUs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the test proceeds with enabled dpus.



def dpu_power_on(duthost, platform_api_conn):
"""
Executes power on all DPUs
Returns:
Returns True or False based on all DPUs powered on or not
"""

num_modules = num_dpu_modules(platform_api_conn)
ip_address_list = []

for index in range(num_modules):
dpu = module.get_name(platform_api_conn, index)
ip_address_list.append(
module.get_midplane_ip(platform_api_conn, index))
duthost.shell("config chassis modules startup %s" % (dpu))

pytest_assert(wait_until(180, 60, 0, check_dpu_ping_status, # noqa: F405
duthost, ip_address_list), "Not all DPUs are operationally up")


def check_dpu_ping_status(duthost, ip_address_list):
"""
Executes ping to all DPUs
Args:
duthost : Host handle
ip_address_list (list): List of all DPU ip addresses
Returns:
Returns True or False based on Ping is successfull or not to all DPUs
"""

ping_count = 0
for ip_address in ip_address_list:
output_ping = duthost.command("ping -c 3 %s" % (ip_address))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add debug logs for all ping outputs for easy troubleshooting.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added it.

logging.info("Ping output: '{}'".format(output_ping))
if "0% packet loss" in output_ping["stdout"]:
ping_count += 1

return ping_count == len(ip_address_list)


def check_dpu_module_status(duthost, power_status, dpu_name):
"""
Check status of given DPU module against given option on/off
Args:
duthost : Host handle
power_status: on/off status of dpu
dpu_name: name of the dpu module
Returns:
Returns True or False based on status of given DPU module
"""

output_dpu_status = duthost.command(
'show chassis module status | grep %s' % (dpu_name))

if "Offline" in output_dpu_status["stdout"]:
if power_status == "off":
logging.info("'{}' is offline ...".format(dpu_name))
return True
else:
logging.info("'{}' is online ...".format(dpu_name))
return False
else:
if power_status == "on":
logging.info("'{}' is online ...".format(dpu_name))
return True
else:
logging.info("'{}' is offline ...".format(dpu_name))
return False


def check_dpu_reboot_cause(duthost, dpu_name):
"""
Check reboot cause of all DPU modules
Args:
duthost : Host handle
dpu_name: name of the dpu module
Returns:
Returns True or False based on reboot cause of all DPU modules
"""

output_reboot_cause = duthost.command(
'show reboot-cause all | grep %s' % (dpu_name))

if 'Unknown' in output_reboot_cause["stdout"]:
# Checking for Unknown as of now and
# implementation for other reasons are not in place now
# TODO: Needs to be extend the function for other reasons
logging.info("'{}' - reboot cause is Unkown...".format(dpu_name))
return True

return False
76 changes: 76 additions & 0 deletions tests/smartswitch/platform_tests/test_reload_dpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""
Tests for the `reboot and reload ...` commands in DPU
"""

import logging
import pytest
import time
from tests.common.helpers.assertions import pytest_assert
from tests.common.platform.interface_utils \
import check_interface_status_of_up_ports
from tests.common.utilities import wait_until
from tests.common.platform.processes_utils import wait_critical_processes
from tests.common.reboot import reboot, wait_for_startup, REBOOT_TYPE_COLD
from tests.common.config_reload import config_force_option_supported, config_system_checks_passed # noqa: F401, E501
from tests.smartswitch.common.device_utils_dpu import * # noqa: F401,F403,E501
from tests.common.helpers.platform_api import chassis, module # noqa: F401
from tests.platform_tests.api.conftest import * # noqa: F401,F403

pytestmark = [
pytest.mark.topology('t1')
]


def test_dpu_ping_after_reboot(duthosts, enum_rand_one_per_hwsku_hostname,
localhost, platform_api_conn, num_dpu_modules):
"""
@summary: Verify output of `config chassis modules startup <DPU_Number>`
"""
duthost = duthosts[enum_rand_one_per_hwsku_hostname]
ip_address_list = []
num_modules = num_dpu_modules(platform_api_conn)

logging.info("Starting switch reboot...")
reboot(duthost, localhost, reboot_type=REBOOT_TYPE_COLD,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you rebooting entire switch here? If yes, could you add a debug/info log?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I added the logs as well.

wait_for_ssh=False)
wait_for_startup(duthost, localhost, 10, 300)
pytest_assert(wait_until(300, 5, 0, check_interface_status_of_up_ports,
duthost),
"Not all ports that are admin up on are operationally up")
logging.info("Interfaces are up")

for index in range(num_modules):
ip_address_list.append(
module.get_midplane_ip(platform_api_conn, index))
dpu = module.get_name(platform_api_conn, index)
duthosts.shell("config chassis modules startup %s" % (dpu))
time.sleep(2)

pytest_assert(wait_until(120, 30, 0, check_dpu_ping_status, # noqa: F405
duthost, ip_address_list),
"Not all DPUs operationally up")


def test_show_ping_int_after_reload(duthosts, enum_rand_one_per_hwsku_hostname,
localhost, platform_api_conn, num_dpu_modules):
"""
@summary: To Check Ping between NPU and DPU
after configuration reload on NPU
"""
duthost = duthosts[enum_rand_one_per_hwsku_hostname]
num_modules = num_dpu_modules(platform_api_conn)
ip_address_list = []

for index in range(num_modules):
ip_address_list.append(
module.get_midplane_ip(platform_api_conn, index))

logging.info("Reload configuration")
duthost.shell("sudo config reload -y &>/dev/null", executable="/bin/bash")

logging.info("Wait until all critical services are fully started")
wait_critical_processes(duthost)

pytest_assert(wait_until(30, 10, 0, check_dpu_ping_status, # noqa: F405
duthost, ip_address_list),
"Not all DPUs operationally up")
134 changes: 134 additions & 0 deletions tests/smartswitch/platform_tests/test_show_platform_dpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
"""
Tests for the `platform cli ...` commands in DPU
"""

import logging
import pytest
from tests.common.utilities import wait_until
from tests.common.helpers.assertions import pytest_assert
from tests.smartswitch.common.device_utils_dpu import * # noqa: F403,F401,E501
from tests.common.helpers.platform_api import chassis, module # noqa: F401
from tests.platform_tests.api.conftest import * # noqa: F401,F403
from tests.common.devices.sonic import * # noqa: 403

pytestmark = [
pytest.mark.topology('t1')
]


def test_midplane_ip(duthosts, enum_rand_one_per_hwsku_hostname,
platform_api_conn):
"""
@summary: Verify `Midplane ip address between NPU and DPU`
"""
duthost = duthosts[enum_rand_one_per_hwsku_hostname]
ip_address_list = []

output_dpu_status = duthost.show_and_parse('show chassis module status')

for index in range(len(output_dpu_status)):
parse_output = output_dpu_status[index]
if 'DPU' in parse_output['name']:
if parse_output['oper-status'] != 'Offline':
index = (parse_output['name'])[-1]
ip_address_list.append(
module.get_midplane_ip(platform_api_conn, index))

ping_status = check_dpu_ping_status(duthost, ip_address_list) # noqa: F405
pytest_assert(ping_status == 1, "Ping to DPU has been tested")


def test_shutdown_power_up_dpu(duthosts, enum_rand_one_per_hwsku_hostname,
platform_api_conn, num_dpu_modules):
"""
@summary: Verify `shut down and power up DPU`
"""
duthost = duthosts[enum_rand_one_per_hwsku_hostname]
num_modules = num_dpu_modules(platform_api_conn)

for index in range(num_modules):
dpu_name = module.get_name(platform_api_conn, index)
duthosts.shell("config chassis modules shutdown %s" % (dpu_name))
pytest_assert(wait_until(180, 60, 0,
check_dpu_module_status, # noqa: F405
duthost, "off", dpu_name),
"DPU is not operationally down")

for index in range(num_modules):
dpu_name = module.get_name(platform_api_conn, index)
duthosts.shell("config chassis modules startup %s" % (dpu_name))
pytest_assert(wait_until(180, 60, 0,
check_dpu_module_status, # noqa: F405
duthost, "on", dpu_name),
"DPU is not operationally up")


def test_reboot_cause(duthosts, enum_rand_one_per_hwsku_hostname,
platform_api_conn, num_dpu_modules):
"""
@summary: Verify `Reboot Cause`
"""
duthost = duthosts[enum_rand_one_per_hwsku_hostname]
num_modules = num_dpu_modules(platform_api_conn)

for index in range(num_modules):
dpu_name = module.get_name(platform_api_conn, index)
duthost.shell("config chassis \
module shutdown %s" % (dpu_name))["stdout_lines"]
pytest_assert(wait_until(180, 60, 0,
check_dpu_module_status, # noqa: F405
duthost, "off",
dpu_name), "DPU is not operationally down")

for index in range(num_modules):
dpu_name = module.get_name(platform_api_conn, index)
duthosts.shell("config chassis modules startup %s" % (dpu_name))
pytest_assert(wait_until(180, 60, 0,
check_dpu_reboot_cause, # noqa: F405
duthost,
dpu_name), "DPU is not operationally up")


def test_pcie_link(duthosts, enum_rand_one_per_hwsku_hostname,
platform_api_conn, num_dpu_modules):
"""
@summary: Verify `PCIe link`
"""
CMD_PCIE_INFO = "show platform pcieinfo -c"

duthost = duthosts[enum_rand_one_per_hwsku_hostname]

logging.info("Verifying output of \
'{}' on '{}'...".format(CMD_PCIE_INFO, duthost.hostname))
output_pcie_info = duthost.command(CMD_PCIE_INFO)["stdout_lines"]
pytest_assert(output_pcie_info[-1] ==
'PCIe Device Checking All Test ----------->>> PASSED',
"PCIe Link is good'{}'".format(duthost.hostname))

num_modules = num_dpu_modules(platform_api_conn)

for index in range(num_modules):
dpu_name = module.get_name(platform_api_conn, index)
duthosts.shell("config chassis modules shutdown %s" % (dpu_name))
pytest_assert(wait_until(180, 60, 0,
check_dpu_module_status, # noqa: F405
duthost, "off", dpu_name),
"DPU is not operationally down")

output_pcie_info = duthost.command(CMD_PCIE_INFO)["stdout_lines"]
pytest_assert(output_pcie_info[-1] ==
'PCIe Device Checking All Test ----------->>> PASSED',
"PCIe Link is good'{}'".format(duthost.hostname))

for index in range(num_modules):
dpu_name = module.get_name(platform_api_conn, index)
duthosts.shell("config chassis modules startup %s" % (dpu_name))
pytest_assert(wait_until(180, 60, 0,
check_dpu_module_status, # noqa: F405
duthost, "on", dpu_name), "DPU is not operationally up")

logging.info("Verifying output of '{}' on '{}'..."
.format(CMD_PCIE_INFO, duthost.hostname))
output_pcie_info = duthost.command(CMD_PCIE_INFO)["stdout_lines"]
pytest_assert("PASSED" == output_pcie_info[-1], "PCIe Link is good'{}'"
.format(duthost.hostname))
Loading