diff --git a/debian/swss.install b/debian/swss.install index e04f63269be3..d61bf3faa78a 100644 --- a/debian/swss.install +++ b/debian/swss.install @@ -6,3 +6,4 @@ swssconfig/sample/th.64ports.buffers.json etc/swss/config.d swssconfig/sample/th.64ports.qos.json etc/swss/config.d swssconfig/sample/th2.118ports.buffers.json etc/swss/config.d swssconfig/sample/th2.118ports.qos.json etc/swss/config.d +neighsyncd/restore_neighbors.py usr/bin diff --git a/doc/swss-schema.md b/doc/swss-schema.md index 9d02908e766f..4cf3681c3763 100644 --- a/doc/swss-schema.md +++ b/doc/swss-schema.md @@ -745,6 +745,11 @@ Stores information for physical switch ports managed by the switch chip. Ports t ; dynanic data like port state, neighbor, routes ; and so on. +### NEIGH_RESTORE_TABLE + ;State for neighbor table restoring process during warm reboot + key = NEIGH_RESTORE_TABLE|Flags + restored = "true" / "false" ; restored state + ## Configuration files What configuration files should we have? Do apps, orch agent each need separate files? diff --git a/neighsyncd/neighsync.cpp b/neighsyncd/neighsync.cpp index d0e36cc5fffe..f441208900d8 100644 --- a/neighsyncd/neighsync.cpp +++ b/neighsyncd/neighsync.cpp @@ -16,12 +16,27 @@ using namespace std; using namespace swss; -NeighSync::NeighSync(RedisPipeline *pipelineAppDB) : +NeighSync::NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb) : m_neighTable(pipelineAppDB, APP_NEIGH_TABLE_NAME), + m_stateNeighRestoreTable(stateDb, STATE_NEIGH_RESTORE_TABLE_NAME), m_AppRestartAssist(pipelineAppDB, "neighsyncd", "swss", &m_neighTable, DEFAULT_NEIGHSYNC_WARMSTART_TIMER) { } +// Check if neighbor table is restored in kernel +bool NeighSync::isNeighRestoreDone() +{ + string value; + + m_stateNeighRestoreTable.hget("Flags", "restored", value); + if (value == "true") + { + SWSS_LOG_NOTICE("neighbor table restore to kernel is done"); + return true; + } + return false; +} + void NeighSync::onMsg(int nlmsg_type, struct nl_object *obj) { char ipStr[MAX_ADDR_SIZE + 1] = {0}; diff --git a/neighsyncd/neighsync.h b/neighsyncd/neighsync.h index 1889e0cadabb..aefe9bdeb7c6 100644 --- a/neighsyncd/neighsync.h +++ b/neighsyncd/neighsync.h @@ -8,6 +8,11 @@ #define DEFAULT_NEIGHSYNC_WARMSTART_TIMER 5 +//This is the timer value (in seconds) that the neighsyncd waiting for restore_neighbors +//service to finish, should be longer than the restore_neighbors timeout value (60) +//This should not happen, if happens, system is in a unknown state, we should exit. +#define RESTORE_NEIGH_WAIT_TIME_OUT 70 + namespace swss { class NeighSync : public NetMsg @@ -15,16 +20,19 @@ class NeighSync : public NetMsg public: enum { MAX_ADDR_SIZE = 64 }; - NeighSync(RedisPipeline *pipelineAppDB); + NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb); virtual void onMsg(int nlmsg_type, struct nl_object *obj); + bool isNeighRestoreDone(); + AppRestartAssist *getRestartAssist() { return &m_AppRestartAssist; } private: + Table m_stateNeighRestoreTable; ProducerStateTable m_neighTable; AppRestartAssist m_AppRestartAssist; }; diff --git a/neighsyncd/neighsyncd.cpp b/neighsyncd/neighsyncd.cpp index 07236a8515e3..cce6487ca463 100644 --- a/neighsyncd/neighsyncd.cpp +++ b/neighsyncd/neighsyncd.cpp @@ -1,4 +1,7 @@ #include +#include +#include +#include #include "logger.h" #include "select.h" #include "netdispatcher.h" @@ -14,8 +17,9 @@ int main(int argc, char **argv) DBConnector appDb(APPL_DB, DBConnector::DEFAULT_UNIXSOCKET, 0); RedisPipeline pipelineAppDB(&appDb); + DBConnector stateDb(STATE_DB, DBConnector::DEFAULT_UNIXSOCKET, 0); - NeighSync sync(&pipelineAppDB); + NeighSync sync(&pipelineAppDB, &stateDb); NetDispatcher::getInstance().registerMessageHandler(RTM_NEWNEIGH, &sync); NetDispatcher::getInstance().registerMessageHandler(RTM_DELNEIGH, &sync); @@ -27,16 +31,36 @@ int main(int argc, char **argv) NetLink netlink; Select s; - netlink.registerGroup(RTNLGRP_NEIGH); - cout << "Listens to neigh messages..." << endl; - netlink.dumpRequest(RTM_GETNEIGH); + using namespace std::chrono; - s.addSelectable(&netlink); if (sync.getRestartAssist()->isWarmStartInProgress()) { sync.getRestartAssist()->readTableToMap(); + + steady_clock::time_point starttime = steady_clock::now(); + while (!sync.isNeighRestoreDone()) + { + duration time_span = + duration_cast>(steady_clock::now() - starttime); + int pasttime = int(time_span.count()); + SWSS_LOG_INFO("waited neighbor table to be restored to kernel" + " for %d seconds", pasttime); + if (pasttime > RESTORE_NEIGH_WAIT_TIME_OUT) + { + SWSS_LOG_ERROR("neighbor table restore is not finished" + " after timed-out, exit!!!"); + exit(EXIT_FAILURE); + } + sleep(1); + } sync.getRestartAssist()->startReconcileTimer(s); } + + netlink.registerGroup(RTNLGRP_NEIGH); + cout << "Listens to neigh messages..." << endl; + netlink.dumpRequest(RTM_GETNEIGH); + + s.addSelectable(&netlink); while (true) { Selectable *temps; diff --git a/neighsyncd/restore_neighbors.py b/neighsyncd/restore_neighbors.py new file mode 100755 index 000000000000..ceafa2ecc729 --- /dev/null +++ b/neighsyncd/restore_neighbors.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python + +"""" +Description: restore_neighbors.py -- restoring neighbor table into kernel during system warm reboot. + The script is started by supervisord in swss docker when the docker is started. + If does not do anything in case warm restart is not enabled. + In case system warm reboot is enabled, it will try to restore the neighbor table into kernel + through netlink API calls and update the neigh table by sending arp/ns requests to all neighbor + entries, then it sets the stateDB flag for neighsyncd to continue the reconciliation process. + In case docker restart enabled only, it sets the stateDB flag so neighsyncd can follow + the same logic. +""" + +import sys +import swsssdk +import netifaces +import time +import monotonic +from pyroute2 import IPRoute, NetlinkError +from pyroute2.netlink.rtnl import ndmsg +from socket import AF_INET,AF_INET6 +import logging +logging.getLogger("scapy.runtime").setLevel(logging.ERROR) +from scapy.all import conf, in6_getnsma, inet_pton, inet_ntop, in6_getnsmac, get_if_hwaddr, Ether, ARP, IPv6, ICMPv6ND_NS, ICMPv6NDOptSrcLLAddr +from swsscommon import swsscommon +import errno + +logger = logging.getLogger(__name__) +logger.setLevel(logging.WARNING) +logger.addHandler(logging.NullHandler()) + +# timeout the restore process in 1 min if not finished +# This is mostly to wait for interfaces to be created and up after warm-reboot +# It would be good to keep that below routing reconciliation time-out. +TIME_OUT = 60 + +# every 5 seconds to check interfaces state +CHECK_INTERVAL = 5 + +ip_family = {"IPv4": AF_INET, "IPv6": AF_INET6} + +# return the first ipv4/ipv6 address assigned on intf +def first_ip_on_intf(intf, family): + if intf in netifaces.interfaces(): + ipaddresses = netifaces.ifaddresses(intf) + if ip_family[family] in ipaddresses: + # cover link local address as well + return ipaddresses[ip_family[family]][0]['addr'].split("%")[0] + return None + +# check if the intf is operational up +def is_intf_oper_state_up(intf): + oper_file = '/sys/class/net/{0}/carrier' + try: + state_file = open(oper_file.format(intf), 'r') + state = state_file.readline().rstrip() + except Exception as e: + logger.info('Error: {}'.format(str(e))) + return False + if state == '1': + return True + return False + +# read the neigh table from AppDB to memory, format as below +# build map as below, this can efficiently access intf and family groups later +# { intf1 -> { { family1 -> [[ip1, mac1], [ip2, mac2] ...] } +# { family2 -> [[ipM, macM], [ipN, macN] ...] } }, +# ... +# intfA -> { { family1 -> [[ipW, macW], [ipX, macX] ...] } +# { family2 -> [[ipY, macY], [ipZ, macZ] ...] } } +# } +# +# Alternatively: +# 1, we can build: +# { intf1 -> [[family1, ip1, mac1], [family2, ip2, mac2] ...]}, +# ... +# { intfA -> [[family1, ipX, macX], [family2, ipY, macY] ...]} +# +# 2, Or simply build two maps based on families +# These alternative solutions would have worse performance because: +# 1, need iterate the whole list if only one family is up. +# 2, need check interface state twice due to the split map + +def read_neigh_table_to_maps(): + db = swsssdk.SonicV2Connector(host='127.0.0.1') + db.connect(db.APPL_DB, False) + + intf_neigh_map = {} + + keys = db.keys(db.APPL_DB, 'NEIGH_TABLE:*') + keys = [] if keys is None else keys + for key in keys: + key_split = key.split(':', 2) + intf_name = key_split[1] + if intf_name == 'lo': + continue + dst_ip = key_split[2] + value = db.get_all(db.APPL_DB, key) + if 'neigh' in value and 'family' in value: + dmac = value['neigh'] + family = value['family'] + else: + raise RuntimeError('Neigh table format is incorrect') + + if family not in ip_family: + raise RuntimeError('Neigh table format is incorrect') + + ip_mac_pair = [] + ip_mac_pair.append(dst_ip) + ip_mac_pair.append(dmac) + + intf_neigh_map.setdefault(intf_name, {}).setdefault(family, []).append(ip_mac_pair) + db.close(db.APPL_DB) + return intf_neigh_map + + +# Use netlink to set neigh table into kernel, not overwrite the existing ones +def set_neigh_in_kernel(ipclass, family, intf_idx, dst_ip, dmac): + logging.info('Add neighbor entries: family: {}, intf_idx: {}, ip: {}, mac: {}'.format( + family, intf_idx, dst_ip, dmac)) + + if family not in ip_family: + return + + family_af_inet = ip_family[family] + try : + ipclass.neigh('add', + family=family_af_inet, + dst=dst_ip, + lladdr=dmac, + ifindex=intf_idx, + state=ndmsg.states['reachable']) + # If neigh exists, log it but no exception raise, other exceptions, raise + except NetlinkError as e: + if e[0] == errno.EEXIST: + logger.warning('Neigh exists in kernel with family: {}, intf_idx: {}, ip: {}, mac: {}'.format( + family, intf_idx, dst_ip, dmac)) + else: + raise + +# build ARP or NS packets depending on family +def build_arp_ns_pkt(family, smac, src_ip, dst_ip): + if family == 'IPv4': + eth = Ether(src=smac, dst='ff:ff:ff:ff:ff:ff') + pkt = eth/ARP(op=ARP.who_has, pdst=dst_ip) + elif family == 'IPv6': + nsma = in6_getnsma(inet_pton(AF_INET6, dst_ip)) + mcast_dst_ip = inet_ntop(AF_INET6, nsma) + dmac = in6_getnsmac(nsma) + eth = Ether(src=smac,dst=dmac) + ipv6 = IPv6(src=src_ip, dst=mcast_dst_ip) + ns = ICMPv6ND_NS(tgt=dst_ip) + ns_opt = ICMPv6NDOptSrcLLAddr(lladdr=smac) + pkt = eth/ipv6/ns/ns_opt + return pkt + +# Set the statedb "NEIGH_RESTORE_TABLE|Flags", so neighsyncd can start reconciliation +def set_statedb_neigh_restore_done(): + db = swsssdk.SonicV2Connector(host='127.0.0.1') + db.connect(db.STATE_DB, False) + db.set(db.STATE_DB, 'NEIGH_RESTORE_TABLE|Flags', 'restored', 'true') + db.close(db.STATE_DB) + return + +def restore_update_kernel_neighbors(intf_neigh_map): + # create object for netlink calls to kernel + ipclass = IPRoute() + mtime = monotonic.time.time + start_time = mtime() + while (mtime() - start_time) < TIME_OUT: + for intf, family_neigh_map in intf_neigh_map.items(): + # only try to restore to kernel when link is up + if is_intf_oper_state_up(intf): + src_mac = get_if_hwaddr(intf) + intf_idx = ipclass.link_lookup(ifname=intf)[0] + # create socket per intf to send packets + s = conf.L2socket(iface=intf) + + # Only two families: 'IPv4' and 'IPv6' + for family in ip_family.keys(): + # if ip address assigned and if we have neighs in this family, restore them + src_ip = first_ip_on_intf(intf, family) + if src_ip and (family in family_neigh_map): + neigh_list = family_neigh_map[family] + for dst_ip, dmac in neigh_list: + # use netlink to set neighbor entries + set_neigh_in_kernel(ipclass, family, intf_idx, dst_ip, dmac) + + # best effort to update kernel neigh info + # this will be updated by arp_update later too + s.send(build_arp_ns_pkt(family, src_mac, src_ip, dst_ip)) + # delete this family on the intf + del intf_neigh_map[intf][family] + # close the pkt socket + s.close() + + # if all families are deleted, remove the key + if len(intf_neigh_map[intf]) == 0: + del intf_neigh_map[intf] + # map is empty, all neigh entries are restored + if not intf_neigh_map: + break + time.sleep(CHECK_INTERVAL) + + +def main(): + + print "restore_neighbors service is started" + + # Use warmstart python binding + warmstart = swsscommon.WarmStart() + warmstart.initialize("neighsyncd", "swss") + warmstart.checkWarmStart("neighsyncd", "swss", False) + + # if swss or system warm reboot not enabled, don't run + if not warmstart.isWarmStart(): + print "restore_neighbors service is skipped as warm restart not enabled" + return + + # swss restart not system warm reboot + if not warmstart.isSystemWarmRebootEnabled(): + set_statedb_neigh_restore_done() + print "restore_neighbors service is done as system warm reboot not enabled" + return + + # read the neigh table from appDB to internal map + try: + intf_neigh_map = read_neigh_table_to_maps() + except RuntimeError as e: + logger.exception(str(e)) + sys.exit(1) + + try: + restore_update_kernel_neighbors(intf_neigh_map) + except Exception as e: + logger.exception(str(e)) + sys.exit(1) + + # set statedb to signal other processes like neighsyncd + set_statedb_neigh_restore_done() + print "restore_neighbor service is done for system warmreboot" + return + +if __name__ == '__main__': + main() diff --git a/tests/conftest.py b/tests/conftest.py index d8f5e2f082f6..220beb6d55dc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -135,6 +135,9 @@ def runcmd(self, cmd): def runcmd_async(self, cmd): return subprocess.Popen("ip netns exec %s %s" % (self.nsname, cmd), shell=True) + def runcmd_output(self, cmd): + return subprocess.check_output("ip netns exec %s %s" % (self.nsname, cmd), shell=True) + class DockerVirtualSwitch(object): def __init__(self, name=None, keeptb=False): self.basicd = ['redis-server', diff --git a/tests/test_warm_reboot.py b/tests/test_warm_reboot.py index ace175872258..2d2ba7af1fa9 100644 --- a/tests/test_warm_reboot.py +++ b/tests/test_warm_reboot.py @@ -293,6 +293,12 @@ def stop_neighsyncd(dvs): def start_neighsyncd(dvs): dvs.runcmd(['sh', '-c', 'supervisorctl start neighsyncd']) +def stop_restore_neighbors(dvs): + dvs.runcmd(['sh', '-c', 'pkill -x restore_neighbors']) + +def start_restore_neighbors(dvs): + dvs.runcmd(['sh', '-c', 'supervisorctl start restore_neighbors']) + def check_no_neighsyncd_timer(dvs): (exitcode, string) = dvs.runcmd(['sh', '-c', 'grep getWarmStartTimer /var/log/syslog | grep neighsyncd | grep invalid']) assert string.strip() != "" @@ -301,13 +307,37 @@ def check_neighsyncd_timer(dvs, timer_value): (exitcode, num) = dvs.runcmd(['sh', '-c', "grep getWarmStartTimer /var/log/syslog | grep neighsyncd | tail -n 1 | rev | cut -d ' ' -f 1 | rev"]) assert num.strip() == timer_value +def check_redis_neigh_entries(dvs, neigh_tbl, number): + (exitcode, lb_output) = dvs.runcmd(['sh', '-c', "redis-cli keys NEIGH_TABLE:lo* | grep NEI | wc -l"]) + lb_num = int(lb_output.strip()) + assert len(neigh_tbl.getKeys()) == number + lb_num + +def check_kernel_reachable_neigh_num(dvs, number): + (exitcode, output) = dvs.runcmd(['sh', '-c', "ip neigh show nud reachable| grep -v 'dev lo' | wc -l"]) + neigh_num = int(output.strip()) + assert neigh_num == number + +def check_kernel_reachable_v4_neigh_num(dvs, number): + (exitcode, output) = dvs.runcmd(['sh', '-c', "ip -4 neigh show nud reachable | grep -v 'dev lo' | wc -l"]) + neigh_num = int(output.strip()) + assert neigh_num == number + +def check_kernel_reachable_v6_neigh_num(dvs, number): + (exitcode, output) = dvs.runcmd(['sh', '-c', "ip -6 neigh show nud reachable | grep -v 'dev lo' | wc -l"]) + neigh_num = int(output.strip()) + assert neigh_num == number + +def kernel_restore_neighs_done(restoretbl): + keys = restoretbl.getKeys() + return (len(keys) > 0) + # function to check neighbor entry reconciliation status written in syslog def check_syslog_for_neighbor_entry(dvs, marker, new_cnt, delete_cnt, iptype): # check reconciliation results (new or delete entries) for ipv4 and ipv6 if iptype == "ipv4" or iptype == "ipv6": - (exitcode, num) = dvs.runcmd(['sh', '-c', "awk \'/%s/,ENDFILE {print;}\' /var/log/syslog | grep neighsyncd | grep cache-state:NEW | grep -i %s | wc -l" % (marker, iptype)]) + (exitcode, num) = dvs.runcmd(['sh', '-c', "awk \'/%s/,ENDFILE {print;}\' /var/log/syslog | grep neighsyncd | grep cache-state:NEW | grep -i %s | grep -v 'lo:'| wc -l" % (marker, iptype)]) assert num.strip() == str(new_cnt) - (exitcode, num) = dvs.runcmd(['sh', '-c', "awk \'/%s/,ENDFILE {print;}\' /var/log/syslog | grep neighsyncd | grep -E \"cache-state:(DELETE|STALE)\" | grep -i %s | wc -l" % (marker, iptype)]) + (exitcode, num) = dvs.runcmd(['sh', '-c', "awk \'/%s/,ENDFILE {print;}\' /var/log/syslog | grep neighsyncd | grep -E \"cache-state:(DELETE|STALE)\" | grep -i %s | grep -v 'lo:' | wc -l" % (marker, iptype)]) assert num.strip() == str(delete_cnt) else: assert "iptype is unknown" == "" @@ -327,6 +357,12 @@ def test_swss_neighbor_syncup(dvs, testlog): # create neighbor entries (4 ipv4 and 4 ip6, two each on each interface) in linux kernel intfs = ["Ethernet24", "Ethernet28"] + + for intf in intfs: + # set timeout to be the same as real HW + dvs.runcmd("sysctl -w net.ipv4.neigh.{}.base_reachable_time_ms=1800000".format(intf)) + dvs.runcmd("sysctl -w net.ipv6.neigh.{}.base_reachable_time_ms=1800000".format(intf)) + #enable ipv6 on docker dvs.runcmd("sysctl net.ipv6.conf.all.disable_ipv6=0") @@ -342,10 +378,10 @@ def test_swss_neighbor_syncup(dvs, testlog): macs = ["00:00:00:00:24:02", "00:00:00:00:24:03", "00:00:00:00:28:02", "00:00:00:00:28:03"] for i in range(len(ips)): - dvs.runcmd("ip neigh add {} dev {} lladdr {}".format(ips[i], intfs[i%2], macs[i])) + dvs.runcmd("ip neigh add {} dev {} lladdr {} nud reachable".format(ips[i], intfs[i/2], macs[i])) for i in range(len(v6ips)): - dvs.runcmd("ip -6 neigh add {} dev {} lladdr {}".format(v6ips[i], intfs[i%2], macs[i])) + dvs.runcmd("ip -6 neigh add {} dev {} lladdr {} nud reachable".format(v6ips[i], intfs[i/2], macs[i])) time.sleep(1) @@ -354,7 +390,7 @@ def test_swss_neighbor_syncup(dvs, testlog): tbl = swsscommon.Table(db, "NEIGH_TABLE") for i in range(len(ips)): - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], ips[i])) assert status == True for v in fvs: @@ -364,7 +400,7 @@ def test_swss_neighbor_syncup(dvs, testlog): assert v[1] == "IPv4" for i in range(len(v6ips)): - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], v6ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], v6ips[i])) assert status == True for v in fvs: @@ -384,14 +420,16 @@ def test_swss_neighbor_syncup(dvs, testlog): # stop neighsyncd and sairedis.rec stop_neighsyncd(dvs) + del_entry_tbl(state_db, "NEIGH_RESTORE_TABLE", "Flags") marker = dvs.add_log_marker() pubsub = dvs.SubscribeAsicDbObject("SAI_OBJECT_TYPE_NEIGHBOR_ENTRY") start_neighsyncd(dvs) + start_restore_neighbors(dvs) time.sleep(10) # Check the neighbor entries are still in appDB correctly for i in range(len(ips)): - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], ips[i])) assert status == True for v in fvs: @@ -401,7 +439,7 @@ def test_swss_neighbor_syncup(dvs, testlog): assert v[1] == "IPv4" for i in range(len(v6ips)): - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], v6ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], v6ips[i])) assert status == True for v in fvs: @@ -433,24 +471,26 @@ def test_swss_neighbor_syncup(dvs, testlog): # stop neighsyncd stop_neighsyncd(dvs) + del_entry_tbl(state_db, "NEIGH_RESTORE_TABLE", "Flags") marker = dvs.add_log_marker() # delete even nummber of ipv4/ipv6 neighbor entries from each interface for i in range(0, len(ips), 2): - dvs.runcmd("ip neigh del {} dev {}".format(ips[i], intfs[i%2])) + dvs.runcmd("ip neigh del {} dev {}".format(ips[i], intfs[i/2])) for i in range(0, len(v6ips), 2): - dvs.runcmd("ip -6 neigh del {} dev {}".format(v6ips[i], intfs[i%2])) + dvs.runcmd("ip -6 neigh del {} dev {}".format(v6ips[i], intfs[i/2])) # start neighsyncd again start_neighsyncd(dvs) + start_restore_neighbors(dvs) time.sleep(10) # check ipv4 and ipv6 neighbors for i in range(len(ips)): - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], ips[i])) #should not see deleted neighbor entries - if i %2 == 0: + if i % 2 == 0: assert status == False continue else: @@ -464,9 +504,9 @@ def test_swss_neighbor_syncup(dvs, testlog): assert v[1] == "IPv4" for i in range(len(v6ips)): - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], v6ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], v6ips[i])) #should not see deleted neighbor entries - if i %2 == 0: + if i % 2 == 0: assert status == False continue else: @@ -504,6 +544,7 @@ def test_swss_neighbor_syncup(dvs, testlog): # stop neighsyncd stop_neighsyncd(dvs) + del_entry_tbl(state_db, "NEIGH_RESTORE_TABLE", "Flags") marker = dvs.add_log_marker() # add even nummber of ipv4/ipv6 neighbor entries to each interface @@ -511,21 +552,22 @@ def test_swss_neighbor_syncup(dvs, testlog): for i in range(0, len(ips), 2): (rc, output) = dvs.runcmd(['sh', '-c', "ip -4 neigh | grep {}".format(ips[i])]) print output - if rc == 0: - dvs.runcmd("ip neigh change {} dev {} lladdr {}".format(ips[i], intfs[i%2], macs[i])) + if output: + dvs.runcmd("ip neigh change {} dev {} lladdr {} nud reachable".format(ips[i], intfs[i/2], macs[i])) else: - dvs.runcmd("ip neigh add {} dev {} lladdr {}".format(ips[i], intfs[i%2], macs[i])) + dvs.runcmd("ip neigh add {} dev {} lladdr {} nud reachable".format(ips[i], intfs[i/2], macs[i])) for i in range(0, len(v6ips), 2): (rc, output) = dvs.runcmd(['sh', '-c', "ip -6 neigh | grep {}".format(v6ips[i])]) print output - if rc == 0: - dvs.runcmd("ip -6 neigh change {} dev {} lladdr {}".format(v6ips[i], intfs[i%2], macs[i])) + if output: + dvs.runcmd("ip -6 neigh change {} dev {} lladdr {} nud reachable".format(v6ips[i], intfs[i/2], macs[i])) else: - dvs.runcmd("ip -6 neigh add {} dev {} lladdr {}".format(v6ips[i], intfs[i%2], macs[i])) + dvs.runcmd("ip -6 neigh add {} dev {} lladdr {} nud reachable".format(v6ips[i], intfs[i/2], macs[i])) # start neighsyncd again start_neighsyncd(dvs) + start_restore_neighbors(dvs) time.sleep(10) # no neighsyncd timer configured @@ -533,7 +575,7 @@ def test_swss_neighbor_syncup(dvs, testlog): # check ipv4 and ipv6 neighbors, should see all neighbors for i in range(len(ips)): - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], ips[i])) assert status == True for v in fvs: if v[0] == "neigh": @@ -542,7 +584,7 @@ def test_swss_neighbor_syncup(dvs, testlog): assert v[1] == "IPv4" for i in range(len(v6ips)): - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], v6ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], v6ips[i])) assert status == True for v in fvs: if v[0] == "neigh": @@ -565,7 +607,7 @@ def test_swss_neighbor_syncup(dvs, testlog): # # Testcase 5: # Even number of ip4/6 neigbors updated with new mac. - # Odd number of ipv4/6 neighbors removed and added to different interfaces. + # Odd number of ipv4/6 neighbors removed # neighbor syncd should sync it up after warm restart # include the timer settings in this testcase @@ -579,28 +621,28 @@ def test_swss_neighbor_syncup(dvs, testlog): # stop neighsyncd stop_neighsyncd(dvs) + del_entry_tbl(state_db, "NEIGH_RESTORE_TABLE", "Flags") marker = dvs.add_log_marker() # Even number of ip4/6 neigbors updated with new mac. - # Odd number of ipv4/6 neighbors removed and added to different interfaces. + # Odd number of ipv4/6 neighbors removed newmacs = ["00:00:00:01:12:02", "00:00:00:01:12:03", "00:00:00:01:16:02", "00:00:00:01:16:03"] for i in range(len(ips)): if i % 2 == 0: - dvs.runcmd("ip neigh change {} dev {} lladdr {}".format(ips[i], intfs[i%2], newmacs[i])) + dvs.runcmd("ip neigh change {} dev {} lladdr {} nud reachable".format(ips[i], intfs[i/2], newmacs[i])) else: - dvs.runcmd("ip neigh del {} dev {}".format(ips[i], intfs[i%2])) - dvs.runcmd("ip neigh add {} dev {} lladdr {}".format(ips[i], intfs[1-i%2], macs[i])) + dvs.runcmd("ip neigh del {} dev {}".format(ips[i], intfs[i/2])) for i in range(len(v6ips)): if i % 2 == 0: - dvs.runcmd("ip -6 neigh change {} dev {} lladdr {}".format(v6ips[i], intfs[i%2], newmacs[i])) + dvs.runcmd("ip -6 neigh change {} dev {} lladdr {} nud reachable".format(v6ips[i], intfs[i/2], newmacs[i])) else: - dvs.runcmd("ip -6 neigh del {} dev {}".format(v6ips[i], intfs[i%2])) - dvs.runcmd("ip -6 neigh add {} dev {} lladdr {}".format(v6ips[i], intfs[1-i%2], macs[i])) + dvs.runcmd("ip -6 neigh del {} dev {}".format(v6ips[i], intfs[i/2])) # start neighsyncd again start_neighsyncd(dvs) + start_restore_neighbors(dvs) time.sleep(10) # timer is not expired yet, state should be "restored" @@ -613,7 +655,7 @@ def test_swss_neighbor_syncup(dvs, testlog): # check ipv4 and ipv6 neighbors, should see all neighbors with updated info for i in range(len(ips)): if i % 2 == 0: - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], ips[i])) assert status == True for v in fvs: if v[0] == "neigh": @@ -621,17 +663,12 @@ def test_swss_neighbor_syncup(dvs, testlog): if v[0] == "family": assert v[1] == "IPv4" else: - (status, fvs) = tbl.get("{}:{}".format(intfs[1-i%2], ips[i])) - assert status == True - for v in fvs: - if v[0] == "neigh": - assert v[1] == macs[i] - if v[0] == "family": - assert v[1] == "IPv4" + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], ips[i])) + assert status == False for i in range(len(v6ips)): if i % 2 == 0: - (status, fvs) = tbl.get("{}:{}".format(intfs[i%2], v6ips[i])) + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], v6ips[i])) assert status == True for v in fvs: if v[0] == "neigh": @@ -639,23 +676,18 @@ def test_swss_neighbor_syncup(dvs, testlog): if v[0] == "family": assert v[1] == "IPv6" else: - (status, fvs) = tbl.get("{}:{}".format(intfs[1-i%2], v6ips[i])) - assert status == True - for v in fvs: - if v[0] == "neigh": - assert v[1] == macs[i] - if v[0] == "family": - assert v[1] == "IPv6" + (status, fvs) = tbl.get("{}:{}".format(intfs[i/2], v6ips[i])) + assert status == False time.sleep(2) # check syslog and asic db for activities - # 4 news, 2 deletes for ipv4 and ipv6 each - # 4 create, 4 set, 4 removes for neighbor in asic db - check_syslog_for_neighbor_entry(dvs, marker, 4, 2, "ipv4") - check_syslog_for_neighbor_entry(dvs, marker, 4, 2, "ipv6") + # 2 news, 2 deletes for ipv4 and ipv6 each + # 4 set, 4 removes for neighbor in asic db + check_syslog_for_neighbor_entry(dvs, marker, 2, 2, "ipv4") + check_syslog_for_neighbor_entry(dvs, marker, 2, 2, "ipv6") (nadd, ndel) = dvs.CountSubscribedObjects(pubsub) - assert nadd == 8 + assert nadd == 4 assert ndel == 4 # check restore Count @@ -787,6 +819,10 @@ def test_swss_port_state_syncup(dvs, testlog): assert oper_status == "down" else: assert oper_status == "up" + #clean up arp + dvs.runcmd("arp -d 10.0.0.1") + dvs.runcmd("arp -d 10.0.0.3") + dvs.runcmd("arp -d 10.0.0.5") ############################################################################# @@ -1442,3 +1478,285 @@ def test_routing_WarmRestart(dvs, testlog): assert rt_key['dest'] == "192.168.100.0/24" +# 'ip neigh flush all' won't remove failed entries if number of neighs less than gc_threshold1 +# Also it takes time to remove them completly. +# We use arp off/on to do it +def flush_neigh_entries(dvs): + dvs.runcmd("ip link set group default arp off") + dvs.runcmd("ip link set group default arp on") + +def test_system_warmreboot_neighbor_syncup(dvs, testlog): + + appl_db = swsscommon.DBConnector(swsscommon.APPL_DB, dvs.redis_sock, 0) + conf_db = swsscommon.DBConnector(swsscommon.CONFIG_DB, dvs.redis_sock, 0) + state_db = swsscommon.DBConnector(swsscommon.STATE_DB, dvs.redis_sock, 0) + + #enable ipv6 on docker + dvs.runcmd("sysctl net.ipv6.conf.all.disable_ipv6=0") + + # flush all neighs first + flush_neigh_entries(dvs) + time.sleep(5) + + dvs.runcmd("config warm_restart enable system") + + # Test neighbors on NUM_INTF (e,g 8) interfaces + # Ethernet32/36/.../60, with ip: 32.0.0.1/24... 60.0.0.1/24 + # ipv6: 3200::1/64...6000::1/64 + # bring up the servers'interfaces and assign NUM_NEIGH_PER_INTF (e,g 128) ips per interface + # TBD: NUM_NEIGH_PER_INTF >= 128 ips will cause test framework to hang by default settings + # TBD: Need tune gc_thresh1/2/3 at host side of vs docker to support this. + NUM_INTF = 8 + NUM_NEIGH_PER_INTF = 64 #128 + NUM_OF_NEIGHS = (NUM_INTF*NUM_NEIGH_PER_INTF) + macs = [] + for i in range(8, 8+NUM_INTF): + # set timeout to be the same as real HW + # set ip on server facing interfaces + # bring servers' interface up, save the macs + dvs.runcmd("sysctl -w net.ipv4.neigh.Ethernet{}.base_reachable_time_ms=1800000".format(i*4)) + dvs.runcmd("sysctl -w net.ipv6.neigh.Ethernet{}.base_reachable_time_ms=1800000".format(i*4)) + dvs.runcmd("ip addr flush dev Ethernet{}".format(i*4)) + dvs.runcmd("ifconfig Ethernet{} {}.0.0.1/24 up".format(i*4, i*4)) + dvs.runcmd("ip -6 addr add {}00::1/64 dev Ethernet{}".format(i*4,i*4)) + dvs.servers[i].runcmd("ip link set up dev eth0") + dvs.servers[i].runcmd("ip addr flush dev eth0") + result = dvs.servers[i].runcmd_output("ifconfig eth0 | grep HWaddr | awk '{print $NF}'") + macs.append(result.strip()) + + # + # Testcase 1: + # Add neighbor entries on servers connecting to SONiC ports + # 128 ipv4 and 128 ipv6 on each server + # total: 1024 ipv4 and 1024 ipv6 + # ping them to get the neighbor entries + for i in range(8, 8+NUM_INTF): + for j in range(NUM_NEIGH_PER_INTF): + dvs.servers[i].runcmd("ip addr add {}.0.0.{}/24 dev eth0".format(i*4, j+2)) + dvs.servers[i].runcmd("ip -6 addr add {}00::{}/64 dev eth0".format(i*4,j+2)) + + time.sleep(1) + + for i in range(8, 8+NUM_INTF): + for j in range(NUM_NEIGH_PER_INTF): + dvs.runcmd(['sh', '-c', "ping -c 1 -W 0 -q {}.0.0.{} > /dev/null 2>&1".format(i*4,j+2)]) + dvs.runcmd(['sh', '-c', "ping6 -c 1 -W 0 -q {}00::{} > /dev/null 2>&1".format(i*4,j+2)]) + + # Check the neighbor entries are inserted correctly + db = swsscommon.DBConnector(0, dvs.redis_sock, 0) + tbl = swsscommon.Table(db, "NEIGH_TABLE") + + # number of neighbors should match what we configured + # ipv4/ipv6 entries and loopback + check_redis_neigh_entries(dvs, tbl, 2*NUM_OF_NEIGHS) + + # All neighbor entries should match + for i in range(8, 8+NUM_INTF): + for j in range(NUM_NEIGH_PER_INTF): + (status, fvs) = tbl.get("Ethernet{}:{}.0.0.{}".format(i*4, i*4, j+2)) + assert status == True + for v in fvs: + if v[0] == "family": + assert v[1] == "IPv4" + if v[0] == "neigh": + assert v[1] == macs[i-8] + + (status, fvs) = tbl.get("Ethernet{}:{}00::{}".format(i*4, i*4, j+2)) + assert status == True + for v in fvs: + if v[0] == "family": + assert v[1] == "IPv6" + if v[0] == "neigh": + assert v[1] == macs[i-8] + + # + # Testcase 2: + # Stop neighsyncd, appDB entries should be reserved + # flush kernel neigh table to simulate warm reboot + # start neighsyncd, start restore_neighbors service to restore the neighbor table in kernel + # check all neighbors learned in kernel + # no changes should be there in syslog and sairedis.rec + + # get restore_count + restore_count = swss_get_RestoreCount(dvs, state_db) + + # stop neighsyncd and sairedis.rec + stop_neighsyncd(dvs) + del_entry_tbl(state_db, "NEIGH_RESTORE_TABLE", "Flags") + time.sleep(3) + flush_neigh_entries(dvs) + time.sleep(3) + + # check neighbors are gone + check_kernel_reachable_neigh_num(dvs, 0) + + # start neighsyncd and restore_neighbors + marker = dvs.add_log_marker() + pubsub = dvs.SubscribeAsicDbObject("SAI_OBJECT_TYPE_NEIGHBOR_ENTRY") + start_neighsyncd(dvs) + start_restore_neighbors(dvs) + + # should finish the store within 10 seconds + time.sleep(10) + + check_kernel_reachable_v4_neigh_num(dvs, NUM_OF_NEIGHS) + check_kernel_reachable_v6_neigh_num(dvs, NUM_OF_NEIGHS) + + # check syslog and sairedis.rec file for activities + check_syslog_for_neighbor_entry(dvs, marker, 0, 0, "ipv4") + check_syslog_for_neighbor_entry(dvs, marker, 0, 0, "ipv6") + (nadd, ndel) = dvs.CountSubscribedObjects(pubsub) + assert nadd == 0 + assert ndel == 0 + + # check restore Count + swss_app_check_RestoreCount_single(state_db, restore_count, "neighsyncd") + + # + # Testcase 3: + # Stop neighsyncd, appDB entries should be reserved + # flush kernel neigh table to simulate warm reboot + # Remove half of ips of servers' interfaces, add new half of ips + # start neighsyncd, start restore_neighbors service to restore the neighbor table in kernel + # check all new neighbors learned in kernel + # no changes should be there in syslog and sairedis.rec + + # get restore_count + restore_count = swss_get_RestoreCount(dvs, state_db) + + # stop neighsyncd and sairedis.rec + stop_neighsyncd(dvs) + del_entry_tbl(state_db, "NEIGH_RESTORE_TABLE", "Flags") + + # Del half of the ips and a new half of the ips + # note: the first ipv4 can not be deleted only + for i in range(8, 8+NUM_INTF): + for j in range(NUM_NEIGH_PER_INTF/2): + dvs.servers[i].runcmd("ip addr del {}.0.0.{}/24 dev eth0".format(i*4, j+NUM_NEIGH_PER_INTF/2+2)) + dvs.servers[i].runcmd("ip -6 addr del {}00::{}/64 dev eth0".format(i*4,j+NUM_NEIGH_PER_INTF/2+2)) + dvs.servers[i].runcmd("ip addr add {}.0.0.{}/24 dev eth0".format(i*4, j+NUM_NEIGH_PER_INTF+2)) + dvs.servers[i].runcmd("ip -6 addr add {}00::{}/64 dev eth0".format(i*4,j+NUM_NEIGH_PER_INTF+2)) + + flush_neigh_entries(dvs) + time.sleep(3) + + # check neighbors are gone + check_kernel_reachable_neigh_num(dvs, 0) + + # start neighsyncd and restore_neighbors + marker = dvs.add_log_marker() + start_neighsyncd(dvs) + start_restore_neighbors(dvs) + + # should finish the store within 10 seconds + time.sleep(10) + + check_kernel_reachable_v4_neigh_num(dvs, NUM_OF_NEIGHS) + check_kernel_reachable_v6_neigh_num(dvs, NUM_OF_NEIGHS) + + # check syslog and sairedis.rec file for activities + check_syslog_for_neighbor_entry(dvs, marker, 0, 0, "ipv4") + check_syslog_for_neighbor_entry(dvs, marker, 0, 0, "ipv6") + (nadd, ndel) = dvs.CountSubscribedObjects(pubsub) + assert nadd == 0 + assert ndel == 0 + + # check restore Count + swss_app_check_RestoreCount_single(state_db, restore_count, "neighsyncd") + + # Test case 4: + # ping the new ips, should get it into appDB + marker = dvs.add_log_marker() + + for i in range(8, 8+NUM_INTF): + for j in range(NUM_NEIGH_PER_INTF/2): + dvs.runcmd(['sh', '-c', "ping -c 1 -W 0 -q {}.0.0.{} > /dev/null 2>&1".format(i*4,j+NUM_NEIGH_PER_INTF+2)]) + dvs.runcmd(['sh', '-c', "ping6 -c 1 -W 0 -q {}00::{} > /dev/null 2>&1".format(i*4,j+NUM_NEIGH_PER_INTF+2)]) + + + check_kernel_reachable_v4_neigh_num(dvs, NUM_OF_NEIGHS+NUM_OF_NEIGHS/2) + check_kernel_reachable_v6_neigh_num(dvs, NUM_OF_NEIGHS+NUM_OF_NEIGHS/2) + check_redis_neigh_entries(dvs, tbl, 2*(NUM_OF_NEIGHS+NUM_OF_NEIGHS/2)) + + (nadd, ndel) = dvs.CountSubscribedObjects(pubsub) + assert nadd == NUM_OF_NEIGHS #ipv4 and ipv6 + assert ndel == 0 + + # Remove stale entries manually + for i in range(8, 8+NUM_INTF): + for j in range(NUM_NEIGH_PER_INTF/2): + dvs.runcmd(['sh', '-c', "ip neigh del {}.0.0.{} dev Ethernet{}".format(i*4,j+NUM_NEIGH_PER_INTF/2+2, i*4)]) + dvs.runcmd(['sh', '-c', "ip -6 neigh del {}00::{} dev Ethernet{}".format(i*4,j+NUM_NEIGH_PER_INTF/2+2, i*4)]) + + time.sleep(5) + + check_kernel_reachable_v4_neigh_num(dvs, NUM_OF_NEIGHS) + check_kernel_reachable_v6_neigh_num(dvs, NUM_OF_NEIGHS) + check_redis_neigh_entries(dvs, tbl, 2*NUM_OF_NEIGHS) + + (nadd, ndel) = dvs.CountSubscribedObjects(pubsub) + assert nadd == 0 + assert ndel == NUM_OF_NEIGHS #ipv4 and ipv6 + + # + # Testcase 5: + # Stop neighsyncd, appDB entries should be reserved + # flush kernel neigh table to simulate warm reboot + # keep half of the interface down + # start neighsyncd, start restore_neighbors service to restore the neighbor table in kernel + # check all new neighbors with interface up to be learned in kernel + # syslog/sai log should show half of the entries stale/deleted + + # get restore_count + restore_count = swss_get_RestoreCount(dvs, state_db) + + # stop neighsyncd and sairedis.rec + stop_neighsyncd(dvs) + del_entry_tbl(state_db, "NEIGH_RESTORE_TABLE", "Flags") + time.sleep(3) + + flush_neigh_entries(dvs) + time.sleep(3) + + # check neighbors are gone + check_kernel_reachable_neigh_num(dvs, 0) + + # bring down half of the links + for i in range(8, 8+NUM_INTF/2): + dvs.runcmd("ip link set down dev Ethernet{}".format(i*4)) + + # start neighsyncd and restore_neighbors + marker = dvs.add_log_marker() + start_neighsyncd(dvs) + start_restore_neighbors(dvs) + + # restore for up interfaces should be done within 10 seconds + time.sleep(10) + + check_kernel_reachable_v4_neigh_num(dvs, NUM_OF_NEIGHS/2) + check_kernel_reachable_v6_neigh_num(dvs, NUM_OF_NEIGHS/2) + + restoretbl = swsscommon.Table(state_db, swsscommon.STATE_NEIGH_RESTORE_TABLE_NAME) + + # waited 10 above already + i = 10 + while (not kernel_restore_neighs_done(restoretbl)): + print "Waiting for kernel neighbors restore process done: {} seconds".format(i) + time.sleep(10) + i += 10 + + time.sleep(10) + + # check syslog and sairedis.rec file for activities + check_syslog_for_neighbor_entry(dvs, marker, 0, NUM_OF_NEIGHS/2, "ipv4") + check_syslog_for_neighbor_entry(dvs, marker, 0, NUM_OF_NEIGHS/2, "ipv6") + (nadd, ndel) = dvs.CountSubscribedObjects(pubsub) + assert nadd == 0 + assert ndel == NUM_OF_NEIGHS + + # check restore Count + swss_app_check_RestoreCount_single(state_db, restore_count, "neighsyncd") + + # disable system warm restart + dvs.runcmd("config warm_restart disable system") +