Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[warm-reboot] Preboot sad path automation for n vlan ports #1096

Merged
merged 1 commit into from
Sep 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions ansible/roles/test/files/ptftests/advanced-reboot.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,12 @@ def build_peer_mapping(self):
self.get_neigh_port_info()
self.get_portchannel_info()

def build_vlan_if_port_mapping(self):
content = self.read_json('vlan_ports_file')
if len(content) > 1:
raise Exception("Too many vlans")
return [(ifname, self.port_indices[ifname]) for ifname in content.values()[0]['members']]

def populate_fail_info(self, fails):
for key in fails:
if key not in self.fails:
Expand Down Expand Up @@ -386,6 +392,7 @@ def setUp(self):
self.vlan_ports = self.read_vlan_ports()
if self.test_params['preboot_oper'] is not None:
self.build_peer_mapping()
self.test_params['vlan_if_port'] = self.build_vlan_if_port_mapping()

self.vlan_ip_range = self.test_params['vlan_ip_range']
self.default_ip_range = self.test_params['default_ip_range']
Expand All @@ -410,8 +417,8 @@ def setUp(self):
self.log("Converted addresses VMs: %s" % str(self.ssh_targets))
if self.preboot_oper is not None:
self.log("Preboot Operations:")
self.pre_handle = sp.PrebootTest(self.preboot_oper, self.ssh_targets, self.portchannel_ports, self.vm_dut_map, self.test_params, self.dut_ssh)
(self.ssh_targets, self.portchannel_ports, self.neigh_vm), (log_info, fails) = self.pre_handle.setup()
self.pre_handle = sp.PrebootTest(self.preboot_oper, self.ssh_targets, self.portchannel_ports, self.vm_dut_map, self.test_params, self.dut_ssh, self.vlan_ports)
(self.ssh_targets, self.portchannel_ports, self.neigh_vm, self.vlan_ports), (log_info, fails) = self.pre_handle.setup()
self.populate_fail_info(fails)
for log in log_info:
self.log(log)
Expand Down
100 changes: 86 additions & 14 deletions ansible/roles/test/files/ptftests/sad_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,30 @@


class PrebootTest(object):
def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, dut_ssh):
def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, dut_ssh, vlan_ports):
self.oper_type = oper_type
self.vm_list = vm_list
self.portchannel_ports = portchannel_ports
self.vm_dut_map = vm_dut_map
self.test_args = test_args
self.dut_ssh = dut_ssh
self.vlan_ports = vlan_ports
self.fails_vm = set()
self.fails_dut = set()
self.log = []
self.shandle = SadOper(self.oper_type, self.vm_list, self.portchannel_ports, self.vm_dut_map, self.test_args, self.dut_ssh)
self.shandle = SadOper(self.oper_type, self.vm_list, self.portchannel_ports, self.vm_dut_map, self.test_args, self.dut_ssh, self.vlan_ports)

def setup(self):
self.shandle.sad_setup(is_up=False)
return self.shandle.retreive_test_info(), self.shandle.retreive_logs()

def verify(self, pre_check=True):
self.shandle.sad_bgp_verify()
if 'lag' in self.oper_type:
self.shandle.sad_lag_verify(pre_check=pre_check)
if 'vlan' in self.oper_type:
self.shandle.verify_vlan_port_state(pre_check=pre_check)
else:
self.shandle.sad_bgp_verify()
if 'lag' in self.oper_type:
self.shandle.sad_lag_verify(pre_check=pre_check)
return self.shandle.retreive_logs()

def revert(self):
Expand All @@ -35,14 +39,16 @@ def revert(self):


class SadPath(object):
def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args):
def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, vlan_ports):
self.oper_type = ''
self.cnt = 1
self.memb_cnt = 0
self.vm_list = vm_list
self.portchannel_ports = portchannel_ports
self.vm_dut_map = vm_dut_map
self.test_args = test_args
self.vlan_ports = vlan_ports
self.vlan_if_port = self.test_args['vlan_if_port']
self.neigh_vms = []
self.neigh_names = dict()
self.vm_handles = dict()
Expand All @@ -53,13 +59,16 @@ def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args)
self.fails['dut'] = set()
self.tot_memb_cnt = 0
self.memb_index = 0
self.if_port = []
self.down_vlan_info = []
self.extract_oper_info(oper_type)

def extract_oper_info(self, oper_type):
if oper_type and ':' in oper_type:
temp = oper_type.split(':')
self.oper_type = temp[0]
# get number of VMs where the sad pass oper needs to be done
# get number of VMs where the sad pass oper needs to be done. For vlan_member case,
# this will be the number of down vlan ports
self.cnt = int(temp[1])
if len(temp) > 2:
# get the number of lag members in a portchannel that should be brought down
Expand Down Expand Up @@ -122,6 +131,24 @@ def select_member(self):
if self.tot_memb_cnt != 0:
self.memb_index = datetime.datetime.now().day % self.tot_memb_cnt

def select_vlan_ports(self):
self.if_port = sorted(self.vlan_if_port, key=lambda tup: tup[0])
vlan_len = len(self.if_port)
vlan_index = datetime.datetime.now().day % vlan_len if vlan_len > 0 else 0
exceed_len = vlan_index + self.cnt - vlan_len
if exceed_len <= 0:
self.down_vlan_info.extend(self.if_port[vlan_index:vlan_index+self.cnt])
self.if_port = self.if_port[0:vlan_index] + self.if_port[vlan_index+self.cnt:]
else:
self.down_vlan_info.extend(self.if_port[vlan_index:])
self.down_vlan_info.extend(self.if_port[0:exceed_len])
self.if_port = self.if_port[exceed_len:exceed_len + vlan_len - self.cnt]

def down_vlan_ports(self):
# extract the selected vlan ports and mark them down
for item in self.down_vlan_info:
self.vlan_ports.remove(item[1])

def setup(self):
self.select_vm()
self.get_neigh_name()
Expand All @@ -146,15 +173,15 @@ def setup(self):
self.log.append('DUT BGP v6: %s' % self.dut_bgps[vm]['v6'])

def retreive_test_info(self):
return self.vm_list, self.portchannel_ports, self.neigh_vms
return self.vm_list, self.portchannel_ports, self.neigh_vms, self.vlan_ports

def retreive_logs(self):
return self.log, self.fails


class SadOper(SadPath):
def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, dut_ssh):
super(SadOper, self).__init__(oper_type, vm_list, portchannel_ports, vm_dut_map, test_args)
def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, dut_ssh, vlan_ports):
super(SadOper, self).__init__(oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, vlan_ports)
self.dut_ssh = dut_ssh
self.dut_needed = dict()
self.lag_members_down = dict()
Expand Down Expand Up @@ -185,10 +212,14 @@ def sad_setup(self, is_up=True):
self.log = []

if not is_up:
self.setup()
self.populate_bgp_state()
if 'lag' in self.oper_type:
self.populate_lag_state()
if 'vlan' in self.oper_type:
self.select_vlan_ports()
self.down_vlan_ports()
else:
self.setup()
self.populate_bgp_state()
if 'lag' in self.oper_type:
self.populate_lag_state()

if 'bgp' in self.oper_type:
self.log.append('BGP state change will be for %s' % ", ".join(self.neigh_vms))
Expand Down Expand Up @@ -220,6 +251,47 @@ def sad_setup(self, is_up=True):
# wait for sometime for lag members state to sync
time.sleep(120)

elif 'vlan' in self.oper_type:
self.change_vlan_port_state(is_up=is_up)

def change_vlan_port_state(self, is_up=True):
state = ['shutdown', 'startup']

for intf, port in self.down_vlan_info:
if not re.match('Ethernet\d+', intf): continue
self.log.append('Changing state of %s from DUT side to %s' % (intf, state[is_up]))
stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config interface %s %s' % (state[is_up], intf)])
if return_code != 0:
self.fails['dut'].add('%s: State change not successful from DUT side for %s' % (self.msg_prefix[1 - is_up], intf))
self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[1 - is_up], return_code))
self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[1 - is_up], stderr))
else:
self.log.append('State change successful on DUT for %s' % intf)

def verify_vlan_port_state(self, state='down', pre_check=True):
self.log = []
# pattern match "Ethernet252 177,178,179,180 40G 9100 Ethernet64/1 routed down down QSFP28 off"
# extract the admin status
pat = re.compile('(\S+\s+){7}%s' % state)
for intf, port in self.down_vlan_info:
stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'show interfaces status %s' % intf])
if return_code == 0:
for line in stdout.split('\n'):
if intf in line:
is_match = pat.match(line.strip())
if is_match:
self.log.append('Interface state is down as expected on the DUT for %s' % intf)
self.log.append('Pattern check: %s' % line)
break

else:
self.fails['dut'].add('%s: Interface state is not down on the DUT for %s' % (self.msg_prefix[pre_check], intf))
self.fails['dut'].add('%s: Obtained: %s' % (self.msg_prefix[pre_check], line))
else:
self.fails['dut'].add('%s: Retreiving interface %s info from DUT side failed' % (self.msg_prefix[pre_check], intf))
self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[pre_check], return_code))
self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[pre_check], stderr))

def change_bgp_dut_state(self, is_up=True):
state = ['shutdown', 'startup']
for vm in self.neigh_vms:
Expand Down
15 changes: 0 additions & 15 deletions ansible/roles/test/tasks/advanced-reboot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,21 +52,6 @@
copy: src=roles/test/files/ptftests dest=/root
delegate_to: "{{ ptf_host }}"

- name: Copy arp responder to the PTF container
copy: src=roles/test/files/helpers/arp_responder.py dest=/opt
delegate_to: "{{ ptf_host }}"

- name: Copy arp responder supervisor configuration to the PTF container
template: src=arp_responder.conf.j2 dest=/etc/supervisor/conf.d/arp_responder.conf
vars:
- arp_responder_args: '-e'
delegate_to: "{{ ptf_host }}"

- name: Update supervisor configuration
include: "roles/test/tasks/common_tasks/update_supervisor.yml"
vars:
supervisor_host: "{{ ptf_host }}"

- name: Remove old keys
file:
path: "{{ item }}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
item_cnt: "{{ item.split(':')[-1]|int }}"
host_max_len: "{{ vm_hosts|length - 1 }}"
member_max_cnt: "{{ minigraph_portchannels.values()[0]['members']|length }}"
vlan_max_cnt: "{{ minigraph_vlans.values()[0]['members']|length - 1 }}"

- fail: msg="Bgp neigh down count is greater than or equal to number of VM hosts. Current val = {{ item_cnt }} Max val = {{ host_max_len }}"
when: "{{ 'bgp_down' in item and item_cnt > host_max_len }}"
Expand All @@ -11,3 +12,6 @@

- fail: msg="Lag member count is greater than available number of lag members. Current val = {{ item_cnt }} Available cnt = {{ member_max_cnt }}"
when: "{{ 'lag_member_down' in item and item_cnt > member_max_cnt }}"

- fail: msg="Vlan count is greater than or equal to number of Vlan interfaces. Current val = {{ item_cnt }} Max val = {{ vlan_max_cnt }}"
when: "{{ 'vlan_port_down' in item and item_cnt|int > vlan_max_cnt|int }}"
27 changes: 26 additions & 1 deletion ansible/roles/test/tasks/ptf_runner_reboot.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,30 @@
- block:
- name: Copy arp responder to the PTF container
copy: src=roles/test/files/helpers/arp_responder.py dest=/opt
delegate_to: "{{ ptf_host }}"

- name: Copy arp responder supervisor configuration to the PTF container. No args when there is no preboot type
template: src=arp_responder.conf.j2 dest=/etc/supervisor/conf.d/arp_responder.conf
vars:
- arp_responder_args: '-e'
delegate_to: "{{ ptf_host }}"
when: not item or item == 'None'

- name: Copy arp responder supervisor configuration to the PTF container. Specifying args when there is a preboot type
template: src=arp_responder.conf.j2 dest=/etc/supervisor/conf.d/arp_responder.conf
vars:
- arp_responder_args: '-e -c /tmp/from_t1_{{ item }}.json'
delegate_to: "{{ ptf_host }}"
when: item and item != 'None'

- name: Update supervisor configuration
include: "roles/test/tasks/common_tasks/update_supervisor.yml"
vars:
supervisor_host: "{{ ptf_host }}"

- name: Clear FDB entries on the DUT
command: sonic-clear fdb all

- include: ptf_runner.yml
vars:
ptf_test_name: Advanced-reboot test
Expand Down Expand Up @@ -101,7 +127,6 @@
dest: '/tmp/'
flat: yes


- name: Wait for the DUT to be ready for the next test
pause: seconds=420
when: preboot_list|length > 1
2 changes: 1 addition & 1 deletion ansible/roles/test/tasks/warm-reboot-multi-sad.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# preboot_list format is 'preboot oper type:number of VMS down:number of lag members down'. for non lag member cases, this parameter will be skipped
- name: Set vars
set_fact:
pre_list: ['neigh_bgp_down:2', 'dut_bgp_down:3', 'dut_lag_down:2', 'neigh_lag_down:3', 'dut_lag_member_down:3:1', 'neigh_lag_member_down:2:1']
pre_list: ['neigh_bgp_down:2', 'dut_bgp_down:3', 'dut_lag_down:2', 'neigh_lag_down:3', 'dut_lag_member_down:3:1', 'neigh_lag_member_down:2:1', 'vlan_port_down:4']
lag_memb_cnt: "{{ minigraph_portchannels.values()[0]['members']|length }}"

- name: Add all lag member down case
Expand Down
2 changes: 1 addition & 1 deletion ansible/roles/test/tasks/warm-reboot-sad.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@
include: advanced-reboot.yml
vars:
reboot_type: warm-reboot
preboot_list: ['neigh_bgp_down', 'dut_bgp_down', 'dut_lag_down', 'neigh_lag_down']
preboot_list: ['neigh_bgp_down', 'dut_bgp_down', 'dut_lag_down', 'neigh_lag_down', 'dut_lag_member_down:1:1', 'neigh_lag_member_down:1:1', 'vlan_port_down']
preboot_files: "peer_dev_info,neigh_port_info"