Skip to content

Commit

Permalink
Preboot sad path automation for n vlan ports (#1096)
Browse files Browse the repository at this point in the history
Signed-off-by: Neetha John <nejo@microsoft.com>
  • Loading branch information
neethajohn authored Sep 10, 2019
1 parent 29cb3fc commit 4b2bafc
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 34 deletions.
11 changes: 9 additions & 2 deletions ansible/roles/test/files/ptftests/advanced-reboot.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,12 @@ def build_peer_mapping(self):
self.get_neigh_port_info()
self.get_portchannel_info()

def build_vlan_if_port_mapping(self):
content = self.read_json('vlan_ports_file')
if len(content) > 1:
raise Exception("Too many vlans")
return [(ifname, self.port_indices[ifname]) for ifname in content.values()[0]['members']]

def populate_fail_info(self, fails):
for key in fails:
if key not in self.fails:
Expand Down Expand Up @@ -386,6 +392,7 @@ def setUp(self):
self.vlan_ports = self.read_vlan_ports()
if self.test_params['preboot_oper'] is not None:
self.build_peer_mapping()
self.test_params['vlan_if_port'] = self.build_vlan_if_port_mapping()

self.vlan_ip_range = self.test_params['vlan_ip_range']
self.default_ip_range = self.test_params['default_ip_range']
Expand All @@ -410,8 +417,8 @@ def setUp(self):
self.log("Converted addresses VMs: %s" % str(self.ssh_targets))
if self.preboot_oper is not None:
self.log("Preboot Operations:")
self.pre_handle = sp.PrebootTest(self.preboot_oper, self.ssh_targets, self.portchannel_ports, self.vm_dut_map, self.test_params, self.dut_ssh)
(self.ssh_targets, self.portchannel_ports, self.neigh_vm), (log_info, fails) = self.pre_handle.setup()
self.pre_handle = sp.PrebootTest(self.preboot_oper, self.ssh_targets, self.portchannel_ports, self.vm_dut_map, self.test_params, self.dut_ssh, self.vlan_ports)
(self.ssh_targets, self.portchannel_ports, self.neigh_vm, self.vlan_ports), (log_info, fails) = self.pre_handle.setup()
self.populate_fail_info(fails)
for log in log_info:
self.log(log)
Expand Down
100 changes: 86 additions & 14 deletions ansible/roles/test/files/ptftests/sad_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,30 @@


class PrebootTest(object):
def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, dut_ssh):
def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, dut_ssh, vlan_ports):
self.oper_type = oper_type
self.vm_list = vm_list
self.portchannel_ports = portchannel_ports
self.vm_dut_map = vm_dut_map
self.test_args = test_args
self.dut_ssh = dut_ssh
self.vlan_ports = vlan_ports
self.fails_vm = set()
self.fails_dut = set()
self.log = []
self.shandle = SadOper(self.oper_type, self.vm_list, self.portchannel_ports, self.vm_dut_map, self.test_args, self.dut_ssh)
self.shandle = SadOper(self.oper_type, self.vm_list, self.portchannel_ports, self.vm_dut_map, self.test_args, self.dut_ssh, self.vlan_ports)

def setup(self):
self.shandle.sad_setup(is_up=False)
return self.shandle.retreive_test_info(), self.shandle.retreive_logs()

def verify(self, pre_check=True):
self.shandle.sad_bgp_verify()
if 'lag' in self.oper_type:
self.shandle.sad_lag_verify(pre_check=pre_check)
if 'vlan' in self.oper_type:
self.shandle.verify_vlan_port_state(pre_check=pre_check)
else:
self.shandle.sad_bgp_verify()
if 'lag' in self.oper_type:
self.shandle.sad_lag_verify(pre_check=pre_check)
return self.shandle.retreive_logs()

def revert(self):
Expand All @@ -35,14 +39,16 @@ def revert(self):


class SadPath(object):
def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args):
def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, vlan_ports):
self.oper_type = ''
self.cnt = 1
self.memb_cnt = 0
self.vm_list = vm_list
self.portchannel_ports = portchannel_ports
self.vm_dut_map = vm_dut_map
self.test_args = test_args
self.vlan_ports = vlan_ports
self.vlan_if_port = self.test_args['vlan_if_port']
self.neigh_vms = []
self.neigh_names = dict()
self.vm_handles = dict()
Expand All @@ -53,13 +59,16 @@ def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args)
self.fails['dut'] = set()
self.tot_memb_cnt = 0
self.memb_index = 0
self.if_port = []
self.down_vlan_info = []
self.extract_oper_info(oper_type)

def extract_oper_info(self, oper_type):
if oper_type and ':' in oper_type:
temp = oper_type.split(':')
self.oper_type = temp[0]
# get number of VMs where the sad pass oper needs to be done
# get number of VMs where the sad pass oper needs to be done. For vlan_member case,
# this will be the number of down vlan ports
self.cnt = int(temp[1])
if len(temp) > 2:
# get the number of lag members in a portchannel that should be brought down
Expand Down Expand Up @@ -122,6 +131,24 @@ def select_member(self):
if self.tot_memb_cnt != 0:
self.memb_index = datetime.datetime.now().day % self.tot_memb_cnt

def select_vlan_ports(self):
self.if_port = sorted(self.vlan_if_port, key=lambda tup: tup[0])
vlan_len = len(self.if_port)
vlan_index = datetime.datetime.now().day % vlan_len if vlan_len > 0 else 0
exceed_len = vlan_index + self.cnt - vlan_len
if exceed_len <= 0:
self.down_vlan_info.extend(self.if_port[vlan_index:vlan_index+self.cnt])
self.if_port = self.if_port[0:vlan_index] + self.if_port[vlan_index+self.cnt:]
else:
self.down_vlan_info.extend(self.if_port[vlan_index:])
self.down_vlan_info.extend(self.if_port[0:exceed_len])
self.if_port = self.if_port[exceed_len:exceed_len + vlan_len - self.cnt]

def down_vlan_ports(self):
# extract the selected vlan ports and mark them down
for item in self.down_vlan_info:
self.vlan_ports.remove(item[1])

def setup(self):
self.select_vm()
self.get_neigh_name()
Expand All @@ -146,15 +173,15 @@ def setup(self):
self.log.append('DUT BGP v6: %s' % self.dut_bgps[vm]['v6'])

def retreive_test_info(self):
return self.vm_list, self.portchannel_ports, self.neigh_vms
return self.vm_list, self.portchannel_ports, self.neigh_vms, self.vlan_ports

def retreive_logs(self):
return self.log, self.fails


class SadOper(SadPath):
def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, dut_ssh):
super(SadOper, self).__init__(oper_type, vm_list, portchannel_ports, vm_dut_map, test_args)
def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, dut_ssh, vlan_ports):
super(SadOper, self).__init__(oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, vlan_ports)
self.dut_ssh = dut_ssh
self.dut_needed = dict()
self.lag_members_down = dict()
Expand Down Expand Up @@ -185,10 +212,14 @@ def sad_setup(self, is_up=True):
self.log = []

if not is_up:
self.setup()
self.populate_bgp_state()
if 'lag' in self.oper_type:
self.populate_lag_state()
if 'vlan' in self.oper_type:
self.select_vlan_ports()
self.down_vlan_ports()
else:
self.setup()
self.populate_bgp_state()
if 'lag' in self.oper_type:
self.populate_lag_state()

if 'bgp' in self.oper_type:
self.log.append('BGP state change will be for %s' % ", ".join(self.neigh_vms))
Expand Down Expand Up @@ -220,6 +251,47 @@ def sad_setup(self, is_up=True):
# wait for sometime for lag members state to sync
time.sleep(120)

elif 'vlan' in self.oper_type:
self.change_vlan_port_state(is_up=is_up)

def change_vlan_port_state(self, is_up=True):
state = ['shutdown', 'startup']

for intf, port in self.down_vlan_info:
if not re.match('Ethernet\d+', intf): continue
self.log.append('Changing state of %s from DUT side to %s' % (intf, state[is_up]))
stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config interface %s %s' % (state[is_up], intf)])
if return_code != 0:
self.fails['dut'].add('%s: State change not successful from DUT side for %s' % (self.msg_prefix[1 - is_up], intf))
self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[1 - is_up], return_code))
self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[1 - is_up], stderr))
else:
self.log.append('State change successful on DUT for %s' % intf)

def verify_vlan_port_state(self, state='down', pre_check=True):
self.log = []
# pattern match "Ethernet252 177,178,179,180 40G 9100 Ethernet64/1 routed down down QSFP28 off"
# extract the admin status
pat = re.compile('(\S+\s+){7}%s' % state)
for intf, port in self.down_vlan_info:
stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'show interfaces status %s' % intf])
if return_code == 0:
for line in stdout.split('\n'):
if intf in line:
is_match = pat.match(line.strip())
if is_match:
self.log.append('Interface state is down as expected on the DUT for %s' % intf)
self.log.append('Pattern check: %s' % line)
break

else:
self.fails['dut'].add('%s: Interface state is not down on the DUT for %s' % (self.msg_prefix[pre_check], intf))
self.fails['dut'].add('%s: Obtained: %s' % (self.msg_prefix[pre_check], line))
else:
self.fails['dut'].add('%s: Retreiving interface %s info from DUT side failed' % (self.msg_prefix[pre_check], intf))
self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[pre_check], return_code))
self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[pre_check], stderr))

def change_bgp_dut_state(self, is_up=True):
state = ['shutdown', 'startup']
for vm in self.neigh_vms:
Expand Down
15 changes: 0 additions & 15 deletions ansible/roles/test/tasks/advanced-reboot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,21 +52,6 @@
copy: src=roles/test/files/ptftests dest=/root
delegate_to: "{{ ptf_host }}"

- name: Copy arp responder to the PTF container
copy: src=roles/test/files/helpers/arp_responder.py dest=/opt
delegate_to: "{{ ptf_host }}"

- name: Copy arp responder supervisor configuration to the PTF container
template: src=arp_responder.conf.j2 dest=/etc/supervisor/conf.d/arp_responder.conf
vars:
- arp_responder_args: '-e'
delegate_to: "{{ ptf_host }}"

- name: Update supervisor configuration
include: "roles/test/tasks/common_tasks/update_supervisor.yml"
vars:
supervisor_host: "{{ ptf_host }}"

- name: Remove old keys
file:
path: "{{ item }}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
item_cnt: "{{ item.split(':')[-1]|int }}"
host_max_len: "{{ vm_hosts|length - 1 }}"
member_max_cnt: "{{ minigraph_portchannels.values()[0]['members']|length }}"
vlan_max_cnt: "{{ minigraph_vlans.values()[0]['members']|length - 1 }}"

- fail: msg="Bgp neigh down count is greater than or equal to number of VM hosts. Current val = {{ item_cnt }} Max val = {{ host_max_len }}"
when: "{{ 'bgp_down' in item and item_cnt > host_max_len }}"
Expand All @@ -11,3 +12,6 @@

- fail: msg="Lag member count is greater than available number of lag members. Current val = {{ item_cnt }} Available cnt = {{ member_max_cnt }}"
when: "{{ 'lag_member_down' in item and item_cnt > member_max_cnt }}"

- fail: msg="Vlan count is greater than or equal to number of Vlan interfaces. Current val = {{ item_cnt }} Max val = {{ vlan_max_cnt }}"
when: "{{ 'vlan_port_down' in item and item_cnt|int > vlan_max_cnt|int }}"
27 changes: 26 additions & 1 deletion ansible/roles/test/tasks/ptf_runner_reboot.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,30 @@
- block:
- name: Copy arp responder to the PTF container
copy: src=roles/test/files/helpers/arp_responder.py dest=/opt
delegate_to: "{{ ptf_host }}"

- name: Copy arp responder supervisor configuration to the PTF container. No args when there is no preboot type
template: src=arp_responder.conf.j2 dest=/etc/supervisor/conf.d/arp_responder.conf
vars:
- arp_responder_args: '-e'
delegate_to: "{{ ptf_host }}"
when: not item or item == 'None'

- name: Copy arp responder supervisor configuration to the PTF container. Specifying args when there is a preboot type
template: src=arp_responder.conf.j2 dest=/etc/supervisor/conf.d/arp_responder.conf
vars:
- arp_responder_args: '-e -c /tmp/from_t1_{{ item }}.json'
delegate_to: "{{ ptf_host }}"
when: item and item != 'None'

- name: Update supervisor configuration
include: "roles/test/tasks/common_tasks/update_supervisor.yml"
vars:
supervisor_host: "{{ ptf_host }}"

- name: Clear FDB entries on the DUT
command: sonic-clear fdb all

- include: ptf_runner.yml
vars:
ptf_test_name: Advanced-reboot test
Expand Down Expand Up @@ -101,7 +127,6 @@
dest: '/tmp/'
flat: yes


- name: Wait for the DUT to be ready for the next test
pause: seconds=420
when: preboot_list|length > 1
2 changes: 1 addition & 1 deletion ansible/roles/test/tasks/warm-reboot-multi-sad.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# preboot_list format is 'preboot oper type:number of VMS down:number of lag members down'. for non lag member cases, this parameter will be skipped
- name: Set vars
set_fact:
pre_list: ['neigh_bgp_down:2', 'dut_bgp_down:3', 'dut_lag_down:2', 'neigh_lag_down:3', 'dut_lag_member_down:3:1', 'neigh_lag_member_down:2:1']
pre_list: ['neigh_bgp_down:2', 'dut_bgp_down:3', 'dut_lag_down:2', 'neigh_lag_down:3', 'dut_lag_member_down:3:1', 'neigh_lag_member_down:2:1', 'vlan_port_down:4']
lag_memb_cnt: "{{ minigraph_portchannels.values()[0]['members']|length }}"

- name: Add all lag member down case
Expand Down
2 changes: 1 addition & 1 deletion ansible/roles/test/tasks/warm-reboot-sad.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@
include: advanced-reboot.yml
vars:
reboot_type: warm-reboot
preboot_list: ['neigh_bgp_down', 'dut_bgp_down', 'dut_lag_down', 'neigh_lag_down']
preboot_list: ['neigh_bgp_down', 'dut_bgp_down', 'dut_lag_down', 'neigh_lag_down', 'dut_lag_member_down:1:1', 'neigh_lag_member_down:1:1', 'vlan_port_down']
preboot_files: "peer_dev_info,neigh_port_info"

0 comments on commit 4b2bafc

Please sign in to comment.