diff --git a/dockers/docker-orchagent/Dockerfile.j2 b/dockers/docker-orchagent/Dockerfile.j2 index 81b1d78cd410..43900d29c7d4 100755 --- a/dockers/docker-orchagent/Dockerfile.j2 +++ b/dockers/docker-orchagent/Dockerfile.j2 @@ -17,9 +17,9 @@ RUN apt-get -y install libjemalloc1 RUN apt-get install -y libelf1 libmnl0 -RUN pip install --upgrade pip RUN pip install setuptools -RUN pip install pyroute2 netifaces +RUN pip install pyroute2==0.5.3 netifaces==0.10.7 +RUN pip install monotonic==1.5 COPY \ {% for deb in docker_orchagent_debs.split(' ') -%} @@ -37,7 +37,6 @@ RUN apt-get clean -y; apt-get autoclean -y; apt-get autoremove -y RUN rm -rf /debs COPY ["files/arp_update", "/usr/bin"] -COPY ["files/restore_neighbors.py", "/usr/bin"] COPY ["enable_counters.py", "/usr/bin"] COPY ["start.sh", "orchagent.sh", "swssconfig.sh", "/usr/bin/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] diff --git a/dockers/docker-orchagent/supervisord.conf b/dockers/docker-orchagent/supervisord.conf index 52f703d1085f..13806a83ff09 100644 --- a/dockers/docker-orchagent/supervisord.conf +++ b/dockers/docker-orchagent/supervisord.conf @@ -122,6 +122,8 @@ command=/usr/bin/restore_neighbors.py priority=15 autostart=false autorestart=false +startsecs=0 +startretries=0 stdout_logfile=syslog stderr_logfile=syslog diff --git a/files/scripts/restore_neighbors.py b/files/scripts/restore_neighbors.py deleted file mode 100755 index 0689f8e5e415..000000000000 --- a/files/scripts/restore_neighbors.py +++ /dev/null @@ -1,257 +0,0 @@ -#!/usr/bin/env python - -"""" -Description: restore_neighbors.py -- restoring neighbor table into kernel during system warm reboot. - The script is started by supervisord in swss docker when the docker is started. - If does not do anything in case warm restart is not enabled. - In case system warm reboot is enabled, it will try to restore the neighbor table into kernel - through netlink API calls and update the neigh table by sending arp/ns requests to all neighbor - entries, then it sets the stateDB flag for neighsyncd to continue the reconciliation process. - In case docker restart enabled only, it sets the stateDB flag so neighsyncd can follow - the same logic. -""" - -import sys -import swsssdk -import netifaces -import time -from pyroute2 import IPRoute -from pyroute2.netlink.rtnl import ndmsg -from socket import AF_INET,AF_INET6 -import logging -logging.getLogger("scapy.runtime").setLevel(logging.ERROR) -from scapy.all import conf, in6_getnsma, inet_pton, inet_ntop, in6_getnsmac, get_if_hwaddr, Ether, ARP, IPv6, ICMPv6ND_NS, ICMPv6NDOptSrcLLAddr - -logger = logging.getLogger(__name__) -logger.setLevel(logging.WARNING) -logger.addHandler(logging.NullHandler()) - -TIME_OUT = 300 # timeout the restore process in 5 mins if not finished -CHECK_INTERVAL = 5 # every 5 seconds to check interfaces state - -ip_family = {"IPv4": AF_INET, "IPv6": AF_INET6} - -# return the first ipv4/ipv6 address assigned on intf -def first_ip_on_intf(intf, family): - if intf in netifaces.interfaces(): - ipaddresses = netifaces.ifaddresses(intf) - if ip_family[family] in ipaddresses: - # cover link local address as well - return ipaddresses[ip_family[family]][0]['addr'].split("%")[0] - return None - -# check if the intf is operational up -def is_intf_oper_state_up(intf): - oper_file = '/sys/class/net/{0}/carrier' - try: - state_file = open(oper_file.format(intf), 'r') - state = state_file.readline().rstrip() - except IOError as e: - logger.info('Error: {}'.format(str(e))) - return False - - if state == '1': - return True - return False - -# read the neigh table from AppDB to memory, format as below -# build map as below, this can efficiently access intf and family groups later -# { intf1 -> { { family1 -> [[ip1, mac1], [ip2, mac2] ...] } -# { family2 -> [[ipM, macM], [ipN, macN] ...] } }, -# ... -# intfA -> { { family1 -> [[ipW, macW], [ipX, macX] ...] } -# { family2 -> [[ipY, macY], [ipZ, macZ] ...] } } -# } -# -# Alternatively: -# 1, we can build: -# { intf1 -> [[family1, ip1, mac1], [family2, ip2, mac2] ...]}, -# ... -# { intfA -> [[family1, ipX, macX], [family2, ipY, macY] ...]} -# -# 2, Or simply build two maps based on families -# These alternative solutions would have worse performance because: -# 1, need iterate the whole list if only one family is up. -# 2, need check interface state twice due to the split map - -def read_neigh_table_to_maps(): - db = swsssdk.SonicV2Connector(host='127.0.0.1') - db.connect(db.APPL_DB, False) - - intf_neigh_map = {} - - keys = db.keys(db.APPL_DB, 'NEIGH_TABLE:*') - keys = [] if keys is None else keys - for key in keys: - key_split = key.split(':', 2) - intf_name = key_split[1] - if intf_name == 'lo': - continue - dst_ip = key_split[2] - value = db.get_all(db.APPL_DB, key) - if 'neigh' in value and 'family' in value: - dmac = value['neigh'] - family = value['family'] - else: - raise RuntimeError('Neigh table format is incorrect') - - if family not in ip_family: - raise RuntimeError('Neigh table format is incorrect') - - ip_mac_pair = [] - ip_mac_pair.append(dst_ip) - ip_mac_pair.append(dmac) - - intf_neigh_map.setdefault(intf_name, {}).setdefault(family, []).append(ip_mac_pair) - - db.close(db.APPL_DB) - - return intf_neigh_map - -# check if the swss warm restart is enabled. -def is_swss_warm_restart_enabled(): - db = swsssdk.SonicV2Connector(host='127.0.0.1') - db.connect(db.CONFIG_DB, False) - - keys = db.keys(db.CONFIG_DB, 'WARM_RESTART|swss') - if keys : - value = db.get_all(db.CONFIG_DB, keys[0]) - if 'enable' in value: - if (value['enable']) == 'true': - db.close(db.STATE_DB) - return True - db.close(db.STATE_DB) - return False - -# check if the system warm reboot is enabled. -# The restore process only happens when system warm reboot enabled and during the bootup -def is_system_warm_reboot_enabled(): - db = swsssdk.SonicV2Connector(host='127.0.0.1') - db.connect(db.CONFIG_DB, False) - - keys = db.keys(db.CONFIG_DB, 'WARM_RESTART|system') - if keys : - value = db.get_all(db.CONFIG_DB, keys[0]) - if 'enable' in value: - if (value['enable']) == 'true': - db.close(db.STATE_DB) - return True - db.close(db.STATE_DB) - return False - -# Use netlink to set neigh table into kernel -def set_neigh_in_kernel(ipclass, family, intf_idx, dst_ip, dmac): - logging.info('set neighbor entries: family: {}, intf_idx: {}, ip: {}, mac {}'.format( - family, intf_idx, dst_ip, dmac)) - - if family not in ip_family: - return - - family_af_inet = ip_family[family] - - ipclass.neigh('set', - family=family_af_inet, - dst=dst_ip, - lladdr=dmac, - ifindex=intf_idx, - state=ndmsg.states['reachable']) - -# build ARP or NS packets depending on family -def build_arp_ns_pkt(family, smac, src_ip, dst_ip): - if family == 'IPv4': - eth = Ether(src=smac, dst='ff:ff:ff:ff:ff:ff') - pkt = eth/ARP(op=ARP.who_has, pdst=dst_ip) - elif family == 'IPv6': - nsma = in6_getnsma(inet_pton(AF_INET6, dst_ip)) - mcast_dst_ip = inet_ntop(AF_INET6, nsma) - dmac = in6_getnsmac(nsma) - eth = Ether(src=smac,dst=dmac) - ipv6 = IPv6(src=src_ip, dst=mcast_dst_ip) - ns = ICMPv6ND_NS(tgt=dst_ip) - ns_opt = ICMPv6NDOptSrcLLAddr(lladdr=smac) - pkt = eth/ipv6/ns/ns_opt - return pkt - -# Set the statedb "NEIGH_RESTORE_TABLE|Flags", so neighsyncd can start reconciliation -def set_statedb_neigh_restore_done(): - db = swsssdk.SonicV2Connector(host='127.0.0.1') - db.connect(db.STATE_DB, False) - db.set(db.STATE_DB, 'NEIGH_RESTORE_TABLE|Flags', 'restored', 'true') - db.close(db.STATE_DB) - return - - -def main(): - - print "restore_neighbors service is started" - # if swss or system warm reboot not enabled, don't run - if not is_system_warm_reboot_enabled() and not is_swss_warm_restart_enabled(): - print "restore_neighbors service is skipped as warm restart not enabled" - # make supervisord happy - time.sleep(2) - return - - if not is_system_warm_reboot_enabled(): - set_statedb_neigh_restore_done() - print "restore_neighbors service is done as system warm reboot not enabled" - # make supervisord happy - time.sleep(2) - return - - # read the neigh table from appDB to internal map - try: - intf_neigh_map = read_neigh_table_to_maps() - except RuntimeError, e: - logger.exception(str(e)) - # make supervisord happy - time.sleep(2) - sys.exit(1) - - # create object for netlink calls to kernel - ipclass = IPRoute() - - start_time = time.time() - while (time.time() - start_time) < TIME_OUT: - for intf, family_neigh_map in intf_neigh_map.items(): - # only try to restore to kernel when link is up - if is_intf_oper_state_up(intf): - src_mac = get_if_hwaddr(intf) - intf_idx = ipclass.link_lookup(ifname=intf)[0] - # create socket per intf to send packets - s = conf.L2socket(iface=intf) - - # Only two families: 'IPv4' and 'IPv6' - for family in ip_family.keys(): - # if ip address assigned and if we have neighs in this family, restore them - src_ip = first_ip_on_intf(intf, family) - if src_ip and (family in family_neigh_map): - neigh_list = family_neigh_map[family] - for dst_ip, dmac in neigh_list: - # use netlink to set neighbor entries - set_neigh_in_kernel(ipclass, family, intf_idx, dst_ip, dmac) - - # best effort to update kernel neigh info - # this will be updated by arp_update later too - s.send(build_arp_ns_pkt(family, src_mac, src_ip, dst_ip)) - # delete this family on the intf - del family_neigh_map[family] - # close the pkt socket - s.close() - - # if all families are deleted, remove the key - if len(intf_neigh_map[intf]) == 0: - del intf_neigh_map[intf] - # map is empty, all neigh entries are restored - if not intf_neigh_map: - break - time.sleep(CHECK_INTERVAL) - - # set statedb to signal other processes like neighsyncd - set_statedb_neigh_restore_done() - print "restore_neighbor service is done for system warmreboot" - # make supervisord happy - time.sleep(2) - return - -if __name__ == '__main__': - main() diff --git a/platform/barefoot/docker-orchagent-bfn.mk b/platform/barefoot/docker-orchagent-bfn.mk index 34a8f59b6bd2..fefd39691855 100644 --- a/platform/barefoot/docker-orchagent-bfn.mk +++ b/platform/barefoot/docker-orchagent-bfn.mk @@ -21,4 +21,4 @@ $(DOCKER_ORCHAGENT_BFN)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro $(DOCKER_ORCHAGENT_BFN)_RUN_OPT += -v /var/log/swss:/var/log/swss:rw $(DOCKER_ORCHAGENT_BFN)_BASE_IMAGE_FILES += swssloglevel:/usr/bin/swssloglevel -$(DOCKER_ORCHAGENT_BFN)_FILES += $(ARP_UPDATE_SCRIPT) $(RESTORE_NEIGHS_SCRIPT) +$(DOCKER_ORCHAGENT_BFN)_FILES += $(ARP_UPDATE_SCRIPT) diff --git a/platform/broadcom/docker-orchagent-brcm.mk b/platform/broadcom/docker-orchagent-brcm.mk index 4d17fdf2040b..dc3c31f43d5b 100644 --- a/platform/broadcom/docker-orchagent-brcm.mk +++ b/platform/broadcom/docker-orchagent-brcm.mk @@ -21,4 +21,4 @@ $(DOCKER_ORCHAGENT_BRCM)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro $(DOCKER_ORCHAGENT_BRCM)_RUN_OPT += -v /var/log/swss:/var/log/swss:rw $(DOCKER_ORCHAGENT_BRCM)_BASE_IMAGE_FILES += swssloglevel:/usr/bin/swssloglevel -$(DOCKER_ORCHAGENT_BRCM)_FILES += $(ARP_UPDATE_SCRIPT) $(RESTORE_NEIGHS_SCRIPT) +$(DOCKER_ORCHAGENT_BRCM)_FILES += $(ARP_UPDATE_SCRIPT) diff --git a/platform/cavium/docker-orchagent-cavm.mk b/platform/cavium/docker-orchagent-cavm.mk index 18cf32a9320d..282cdaa9172d 100644 --- a/platform/cavium/docker-orchagent-cavm.mk +++ b/platform/cavium/docker-orchagent-cavm.mk @@ -21,4 +21,4 @@ $(DOCKER_ORCHAGENT_CAVM)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro $(DOCKER_ORCHAGENT_CAVM)_RUN_OPT += -v /var/log/swss:/var/log/swss:rw $(DOCKER_ORCHAGENT_CAVM)_BASE_IMAGE_FILES += swssloglevel:/usr/bin/swssloglevel -$(DOCKER_ORCHAGENT_CAVM)_FILES += $(ARP_UPDATE_SCRIPT) $(RESTORE_NEIGHS_SCRIPT) +$(DOCKER_ORCHAGENT_CAVM)_FILES += $(ARP_UPDATE_SCRIPT) diff --git a/platform/centec/docker-orchagent-centec.mk b/platform/centec/docker-orchagent-centec.mk index 02965691b1c2..891f5550bac6 100644 --- a/platform/centec/docker-orchagent-centec.mk +++ b/platform/centec/docker-orchagent-centec.mk @@ -21,4 +21,4 @@ $(DOCKER_ORCHAGENT_CENTEC)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro $(DOCKER_ORCHAGENT_CENTEC)_RUN_OPT += -v /var/log/swss:/var/log/swss:rw $(DOCKER_ORCHAGENT_CENTEC)_BASE_IMAGE_FILES += swssloglevel:/usr/bin/swssloglevel -$(DOCKER_ORCHAGENT_CENTEC)_FILES += $(ARP_UPDATE_SCRIPT) $(RESTORE_NEIGHS_SCRIPT) +$(DOCKER_ORCHAGENT_CENTEC)_FILES += $(ARP_UPDATE_SCRIPT) diff --git a/platform/marvell/docker-orchagent-mrvl.mk b/platform/marvell/docker-orchagent-mrvl.mk index 8262749465d0..9c3c4c13a8f9 100644 --- a/platform/marvell/docker-orchagent-mrvl.mk +++ b/platform/marvell/docker-orchagent-mrvl.mk @@ -20,4 +20,4 @@ $(DOCKER_ORCHAGENT_MRVL)_RUN_OPT += -v /host/machine.conf:/host/machine.conf $(DOCKER_ORCHAGENT_MRVL)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro $(DOCKER_ORCHAGENT_MRVL)_BASE_IMAGE_FILES += swssloglevel:/usr/bin/swssloglevel -$(DOCKER_ORCHAGENT_MRVL)_FILES += $(ARP_UPDATE_SCRIPT) $(RESTORE_NEIGHS_SCRIPT) +$(DOCKER_ORCHAGENT_MRVL)_FILES += $(ARP_UPDATE_SCRIPT) diff --git a/platform/mellanox/docker-orchagent-mlnx.mk b/platform/mellanox/docker-orchagent-mlnx.mk index c782ea1c278e..5e72beb4c300 100644 --- a/platform/mellanox/docker-orchagent-mlnx.mk +++ b/platform/mellanox/docker-orchagent-mlnx.mk @@ -21,4 +21,4 @@ $(DOCKER_ORCHAGENT_MLNX)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro $(DOCKER_ORCHAGENT_MLNX)_RUN_OPT += -v /var/log/swss:/var/log/swss:rw $(DOCKER_ORCHAGENT_MLNX)_BASE_IMAGE_FILES += swssloglevel:/usr/bin/swssloglevel -$(DOCKER_ORCHAGENT_MLNX)_FILES += $(ARP_UPDATE_SCRIPT) $(RESTORE_NEIGHS_SCRIPT) +$(DOCKER_ORCHAGENT_MLNX)_FILES += $(ARP_UPDATE_SCRIPT) diff --git a/platform/nephos/docker-orchagent-nephos.mk b/platform/nephos/docker-orchagent-nephos.mk index 176903835501..f7719b3df5dd 100644 --- a/platform/nephos/docker-orchagent-nephos.mk +++ b/platform/nephos/docker-orchagent-nephos.mk @@ -21,4 +21,4 @@ $(DOCKER_ORCHAGENT_NEPHOS)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro $(DOCKER_ORCHAGENT_NEPHOS)_RUN_OPT += -v /var/log/swss:/var/log/swss:rw $(DOCKER_ORCHAGENT_NEPHOS)_BASE_IMAGE_FILES += swssloglevel:/usr/bin/swssloglevel -$(DOCKER_ORCHAGENT_NEPHOS)_FILES += $(ARP_UPDATE_SCRIPT) $(RESTORE_NEIGHS_SCRIPT) +$(DOCKER_ORCHAGENT_NEPHOS)_FILES += $(ARP_UPDATE_SCRIPT) diff --git a/platform/vs/docker-sonic-vs.mk b/platform/vs/docker-sonic-vs.mk index 8c6f188354b4..ea0d15134f5a 100644 --- a/platform/vs/docker-sonic-vs.mk +++ b/platform/vs/docker-sonic-vs.mk @@ -30,8 +30,7 @@ $(DOCKER_SONIC_VS)_DEPENDS += $(GOBGP) endif $(DOCKER_SONIC_VS)_FILES += $(CONFIGDB_LOAD_SCRIPT) \ - $(ARP_UPDATE_SCRIPT) \ - $(RESTORE_NEIGHS_SCRIPT) + $(ARP_UPDATE_SCRIPT) $(DOCKER_SONIC_VS)_LOAD_DOCKERS += $(DOCKER_CONFIG_ENGINE) SONIC_DOCKER_IMAGES += $(DOCKER_SONIC_VS) diff --git a/platform/vs/docker-sonic-vs/Dockerfile.j2 b/platform/vs/docker-sonic-vs/Dockerfile.j2 index 515f44184239..71caf92e531b 100644 --- a/platform/vs/docker-sonic-vs/Dockerfile.j2 +++ b/platform/vs/docker-sonic-vs/Dockerfile.j2 @@ -52,8 +52,8 @@ RUN apt-get install -y net-tools \ RUN pip install setuptools RUN pip install py2_ipaddress RUN pip install six -RUN pip install pyroute2 -RUN pip install netifaces +RUN pip install pyroute2==0.5.3 netifaces==0.10.7 +RUN pip install monotonic==1.5 {% if docker_sonic_vs_debs.strip() -%} # Copy locally-built Debian package dependencies @@ -90,7 +90,6 @@ COPY ["pg_profile_lookup.ini", "/usr/share/sonic/device/vswitch/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] COPY ["files/configdb-load.sh", "/usr/bin/"] COPY ["files/arp_update", "/usr/bin"] -COPY ["files/restore_neighbors.py", "/usr/bin"] # Workaround the tcpdump issue RUN mv /usr/sbin/tcpdump /usr/bin/tcpdump diff --git a/platform/vs/docker-sonic-vs/supervisord.conf b/platform/vs/docker-sonic-vs/supervisord.conf index e6cd5fe8d14f..c4ec73b41fad 100644 --- a/platform/vs/docker-sonic-vs/supervisord.conf +++ b/platform/vs/docker-sonic-vs/supervisord.conf @@ -160,5 +160,7 @@ command=/usr/bin/restore_neighbors.py priority=19 autostart=false autorestart=false +startsecs=0 +startretries=0 stdout_logfile=syslog stderr_logfile=syslog diff --git a/rules/scripts.mk b/rules/scripts.mk index 210e8eb6cafe..fbefdd68d2cd 100644 --- a/rules/scripts.mk +++ b/rules/scripts.mk @@ -5,11 +5,7 @@ $(ARP_UPDATE_SCRIPT)_PATH = files/scripts CONFIGDB_LOAD_SCRIPT = configdb-load.sh $(CONFIGDB_LOAD_SCRIPT)_PATH = files/scripts -RESTORE_NEIGHS_SCRIPT = restore_neighbors.py -$(RESTORE_NEIGHS_SCRIPT)_PATH = files/scripts - SONIC_COPY_FILES += $(CONFIGDB_LOAD_SCRIPT) \ - $(ARP_UPDATE_SCRIPT) \ - $(RESTORE_NEIGHS_SCRIPT) + $(ARP_UPDATE_SCRIPT)