Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dualtor] Add test to simulate server reboot #14690

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions tests/common/dualtor/dual_tor_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,58 @@ def shutdown(dut_intfs=None):
fanout_intfs_to_recover.clear()


@pytest.fixture
def fanout_upper_tor_port_control(upper_tor_host, upper_tor_fanouthosts, tbinfo,
cable_type, active_active_ports, active_standby_ports): # noqa F811
"""
Fixture returns methods to shutdown and restart all fanout ports connected to
the upper_tor_host.
"""
shut_fanouts = []
fanout_intfs_to_recover.clear()

mux_ports = active_active_ports if cable_type == CableType.active_active else active_standby_ports

def shutdown(dut_intfs=None):
logger.info('Shutdown fanout ports connected to upper_tor')
if dut_intfs is None:
dut_intfs = mux_ports
shut_fanouts.append(_shutdown_fanout_tor_intfs(upper_tor_host, upper_tor_fanouthosts, tbinfo, dut_intfs))

def restart():
for fanout_host, intf_list in list(fanout_intfs_to_recover.items()):
fanout_host.no_shutdown(intf_list)
fanout_intfs_to_recover.clear()

yield shutdown, restart


@pytest.fixture
def fanout_lower_tor_port_control(lower_tor_host, lower_tor_fanouthosts, tbinfo,
cable_type, active_active_ports, active_standby_ports): # noqa F811
"""
Fixture returns methods to shutdown and restart all fanout ports connected to
the upper_tor_host.
"""
shut_fanouts = []
fanout_intfs_to_recover.clear()

mux_ports = active_active_ports if cable_type == CableType.active_active else active_standby_ports

def shutdown(dut_intfs=None):
logger.info('Shutdown fanout ports connected to lower_tor')
if dut_intfs is None:
dut_intfs = mux_ports
shut_fanouts.append(_shutdown_fanout_tor_intfs(lower_tor_host, lower_tor_fanouthosts, tbinfo, dut_intfs))

def restart():
for fanout_host, intf_list in list(fanout_intfs_to_recover.items()):
fanout_host.no_shutdown(intf_list)
fanout_intfs_to_recover.clear()

yield shutdown, restart


@pytest.fixture
def shutdown_fanout_tor_intfs(upper_tor_host, upper_tor_fanouthosts, lower_tor_host, lower_tor_fanouthosts,
tbinfo, cable_type, active_active_ports, active_standby_ports): # noqa F811
Expand Down
37 changes: 37 additions & 0 deletions tests/common/dualtor/icmp_responder_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,40 @@ def _pause_icmp_respond(mux_ports):
yield _pause_icmp_respond

ptfhost.shell("supervisorctl restart icmp_responder", module_ignore_errors=True)


def set_supervisorctl_status_icmp_responder(ptfhost, cmd, status):

icmp_responder_status = ptfhost.shell("supervisorctl status icmp_responder",
module_ignore_errors=True)["stdout"]
if status in icmp_responder_status:
raise RuntimeError(f"icmp_responder is already in {status} state")

ptfhost.shell(f'supervisorctl {cmd} icmp_responder', module_ignore_errors=True)

icmp_responder_status = ptfhost.shell("supervisorctl status icmp_responder",
module_ignore_errors=True)["stdout"]
if status not in icmp_responder_status:
raise RuntimeError(f"could not set icmp_responder to {status} state")


@pytest.fixture
def shutdown_icmp_responder(ptfhost): # noqa F811

def _shutdown_icmp_responder():
cmd = 'stop'
status = 'STOPPED'
set_supervisorctl_status_icmp_responder(ptfhost, cmd, status)

yield _shutdown_icmp_responder


@pytest.fixture
def start_icmp_responder(ptfhost): # noqa F811

def _start_icmp_responder():
cmd = 'start'
status = 'RUNNING'
set_supervisorctl_status_icmp_responder(ptfhost, cmd, status)

yield _start_icmp_responder
14 changes: 14 additions & 0 deletions tests/common/platform/interface_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,20 @@ def check_interface_status_of_up_ports(duthost):
return True


def expect_interface_status(dut, interface_name, expected_op_status):
"""
Compare the operational status of a given interface name to an
expected value, return True if they are equal False otherwise.
Raises Exception if given interface name does not exist.
"""
output = dut.command("show interface description")
intf_status = parse_intf_status(output["stdout_lines"][2:])
status = intf_status.get(interface_name)
if status is None:
raise Exception(f'interface name {interface_name} does not exist')
return status['oper'] == expected_op_status


def check_interface_status(dut, asic_index, interfaces, xcvr_skip_list):
"""
@summary: Check the admin and oper status of the specified interfaces on DUT.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,12 @@ dualtor_mgmt/test_dualtor_bgp_update_delay.py:
- asic_type in ['vs']
- https://github.com/sonic-net/sonic-mgmt/issues/14996

dualtor_mgmt/test_server_failure.py::test_server_reboot:
skip:
reason: "KVM testbed does not have fanout hosts"
conditions:
- "asic_type in ['vs']"

#######################################
##### dut_console #####
#######################################
Expand Down
98 changes: 96 additions & 2 deletions tests/dualtor_mgmt/test_server_failure.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import random

from tests.common.dualtor.mux_simulator_control import toggle_simulator_port_to_upper_tor, \
simulator_flap_counter, simulator_server_down # noqa F401
simulator_flap_counter, simulator_server_down, \
toggle_all_simulator_ports # noqa F401
from tests.common.helpers.assertions import pytest_assert
from tests.common.dualtor.dual_tor_utils import show_muxcable_status # noqa: F401
from tests.common.dualtor.dual_tor_common import active_active_ports # noqa F401
Expand All @@ -13,11 +14,17 @@
from tests.common.dualtor.dual_tor_utils import validate_active_active_dualtor_setup # noqa F401
from tests.common.dualtor.dual_tor_utils import upper_tor_host # noqa F401
from tests.common.dualtor.dual_tor_utils import lower_tor_host # noqa F401
from tests.common.dualtor.dual_tor_utils import lower_tor_fanouthosts, fanout_lower_tor_port_control # noqa F401
from tests.common.dualtor.dual_tor_utils import upper_tor_fanouthosts, fanout_upper_tor_port_control # noqa F401
from tests.common.dualtor.nic_simulator_control import simulator_server_down_active_active # noqa F401
from tests.common.fixtures.ptfhost_utils import change_mac_addresses, run_garp_service, \
run_icmp_responder # noqa: F401
from tests.common.utilities import wait_until

from tests.common.dualtor.icmp_responder_control import shutdown_icmp_responder # noqa: F401
from tests.common.dualtor.icmp_responder_control import start_icmp_responder # noqa: F401
from tests.common.dualtor.control_plane_utils import verify_tor_states
from tests.common.platform.interface_utils import expect_interface_status
from tests.common.dualtor.constants import UPPER_TOR

pytestmark = [
pytest.mark.topology('dualtor'),
Expand Down Expand Up @@ -90,3 +97,90 @@ def lower_tor_mux_state_verfication(state, health):
"mux_cable status is unexpected. Should be (standby, unhealthy)")
pytest_assert(wait_until(30, 1, 0, lower_tor_mux_state_verfication, 'standby', 'unhealthy'),
"mux_cable status is unexpected. Should be (standby, unhealthy)")


@pytest.mark.enable_active_active
def test_server_reboot(request, cable_type, tbinfo, # noqa: F811
start_icmp_responder, shutdown_icmp_responder, # noqa: F811
active_standby_ports, active_active_ports, # noqa: F811
upper_tor_host, lower_tor_host, # noqa: F811
toggle_all_simulator_ports, # noqa: F811
fanout_upper_tor_port_control, # noqa: F811
fanout_lower_tor_port_control): # noqa: F811

"""
Test verifies that TOR health returns back to healthy status after a server reboot.
"""
if cable_type == CableType.active_standby:
interface_name = random.choice(active_standby_ports)
# Set upper_tor as active
toggle_all_simulator_ports(UPPER_TOR)
verify_tor_states(expected_active_host=upper_tor_host,
expected_standby_host=lower_tor_host, cable_type=cable_type)

pytest_assert(expect_interface_status(upper_tor_host, interface_name, 'up'),
f'{interface_name} on upper ToR must be up')
pytest_assert(expect_interface_status(lower_tor_host, interface_name, 'up'),
f'{interface_name} on lower ToR must be up')
shutdown_icmp_responder()

# simulate server reboot by turning off all fanout ports on both the ToRs
shutdown_upper, restart_upper = fanout_upper_tor_port_control
shutdown_lower, restart_lower = fanout_lower_tor_port_control
shutdown_upper()
shutdown_lower()
pytest_assert(wait_until(30, 1, 0, expect_interface_status, upper_tor_host, interface_name, 'down'),
f'{interface_name} on upper ToR is expected to be down after server shutdown')
pytest_assert(wait_until(30, 1, 0, expect_interface_status, lower_tor_host, interface_name, 'down'),
f'{interface_name} on lower ToR is expected to be down after server shutdown')
restart_upper()
restart_lower()

# fanout ports are back on
pytest_assert(wait_until(30, 1, 0, expect_interface_status, upper_tor_host, interface_name, 'up'),
f'{interface_name} on upper ToR is expected to be down after server shutdown')
pytest_assert(wait_until(30, 1, 0, expect_interface_status, lower_tor_host, interface_name, 'up'),
f'{interface_name} on lower ToR is expected to be down after server shutdown')

start_icmp_responder()
# The ToRs must then reconcile to a consistent state
# Upper ToR switches to standby and Lower to active.
verify_tor_states(expected_active_host=lower_tor_host,
expected_standby_host=upper_tor_host, cable_type=cable_type)
elif cable_type == CableType.active_active:
interface_name = random.choice(active_active_ports)

verify_tor_states(expected_active_host=[upper_tor_host, lower_tor_host],
expected_standby_host=None, cable_type=cable_type)

pytest_assert(expect_interface_status(upper_tor_host, interface_name, 'up'),
f'{interface_name} on upper ToR must be up')
pytest_assert(expect_interface_status(lower_tor_host, interface_name, 'up'),
f'{interface_name} on lower ToR must be up')
shutdown_icmp_responder()

verify_tor_states(expected_active_host=None,
expected_standby_host=[upper_tor_host, lower_tor_host],
expected_standby_health='unhealthy')

# simulate server reboot by turning off all fanout ports on both the ToRs
shutdown_upper, restart_upper = fanout_upper_tor_port_control
shutdown_lower, restart_lower = fanout_lower_tor_port_control
shutdown_upper()
shutdown_lower()
pytest_assert(wait_until(30, 1, 0, expect_interface_status, upper_tor_host, interface_name, 'down'),
f'{interface_name} on upper ToR is expected to be down after server shutdown')
pytest_assert(wait_until(30, 1, 0, expect_interface_status, lower_tor_host, interface_name, 'down'),
f'{interface_name} on lower ToR is expected to be down after server shutdown')
restart_upper()
restart_lower()

# fanout ports are back on
pytest_assert(wait_until(30, 1, 0, expect_interface_status, upper_tor_host, interface_name, 'up'),
f'{interface_name} on upper ToR is expected to be down after server shutdown')
pytest_assert(wait_until(30, 1, 0, expect_interface_status, lower_tor_host, interface_name, 'up'),
f'{interface_name} on lower ToR is expected to be down after server shutdown')

start_icmp_responder()
verify_tor_states(expected_active_host=[upper_tor_host, lower_tor_host],
expected_standby_host=None, cable_type=cable_type)
Loading