From 70f9210fcad9b0c97ca4427972f11bd2167a1c44 Mon Sep 17 00:00:00 2001 From: Lawrence Lee Date: Tue, 14 Nov 2023 20:42:30 +0000 Subject: [PATCH 1/5] [arp_update]: Flush MAC mismatch neighbors - Check for MAC mismatch between neighbor entries in the kernel and APPL_DB - Flush any entries with a mismatch Signed-off-by: Lawrence Lee --- files/scripts/arp_update | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/files/scripts/arp_update b/files/scripts/arp_update index 14a82ebe4da3..1fa34133ec87 100755 --- a/files/scripts/arp_update +++ b/files/scripts/arp_update @@ -9,6 +9,11 @@ ARP_UPDATE_VARS_FILE="/usr/share/sonic/templates/arp_update_vars.j2" +# Overload `logger` command to include arp_update tag +logger () { + command logger -t "arp_update" "$@" +} + while /bin/true; do # find L3 interfaces which are UP, send ipv6 multicast pings ARP_UPDATE_VARS=$(sonic-cfggen -d -t ${ARP_UPDATE_VARS_FILE}) @@ -70,6 +75,19 @@ while /bin/true; do fi done + # Flush neighbor entries with MAC mismatch between kernel and APPL_DB + KERNEL_NEIGH=$(ip neigh show | grep -v "FAILED\|INCOMPLETE" | cut -d ' ' -f 1,3,5 --output-delimiter=',' | tr -d ' ') + for neigh in $KERNEL_NEIGH; do + ip="$( cut -d ',' -f 1 <<< "$neigh" )" + intf="$( cut -d ',' -f 2 <<< "$neigh" )" + kernel_mac="$( cut -d ',' -f 3 <<< "$neigh" )" + appl_db_mac="$(sonic-db-cli APPL_DB hget NEIGH_TABLE:$intf:$ip neigh)" + if [[ $kernel_mac != $appl_db_mac ]]; then + logger -p warning "MAC mismatch for ${ip} on ${intf} - kernel: ${kernel_mac}, APPL_DB: ${appl_db_mac}" + ip neigh flush $ip + fi + done + VLAN=$(echo $ARP_UPDATE_VARS | jq -r '.vlan') SUBTYPE=$(sonic-db-cli CONFIG_DB hget 'DEVICE_METADATA|localhost' 'subtype' | tr '[:upper:]' '[:lower:]') for vlan in $VLAN; do From fba877634b521912ab9ef2755cbef936f5ad4fd8 Mon Sep 17 00:00:00 2001 From: Lawrence Lee Date: Wed, 15 Nov 2023 00:34:50 +0000 Subject: [PATCH 2/5] [arp_update]: Use overloaded logger command - Remove manual log info (such as severity and process name) from logger calls since these are now automatically added by the overloaded logger function Signed-off-by: Lawrence Lee --- files/scripts/arp_update | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/files/scripts/arp_update b/files/scripts/arp_update index 1fa34133ec87..7e53e257390a 100755 --- a/files/scripts/arp_update +++ b/files/scripts/arp_update @@ -24,7 +24,7 @@ while /bin/true; do STATIC_ROUTE_IFNAMES=($(echo $ARP_UPDATE_VARS | jq -r '.static_route_ifnames')) # on supervisor/rp exit the script gracefully if [[ -z "$STATIC_ROUTE_NEXTHOPS" ]] || [[ -z "$STATIC_ROUTE_IFNAMES" ]]; then - logger "arp_update: exiting as no static route in packet based chassis" + logger "exiting as no static route in packet based chassis" exit 0 fi for i in ${!STATIC_ROUTE_NEXTHOPS[@]}; do @@ -43,7 +43,7 @@ while /bin/true; do interface="${STATIC_ROUTE_IFNAMES[i]}" if [[ -z "$interface" ]]; then # should never be here, handling just in case - logger "ERR: arp_update: missing interface entry for static route $nexthop" + logger -p error "missing interface entry for static route $nexthop" continue fi intf_up=$(ip link show $interface | grep "state UP") @@ -52,7 +52,7 @@ while /bin/true; do eval $pingcmd # STALE entries may appear more often, not logging to prevent periodic syslogs if [[ -z $(echo ${neigh_state} | grep 'STALE') ]]; then - logger "arp_update: static route nexthop not resolved ($neigh_state), pinging $nexthop on $interface" + logger "static route nexthop not resolved ($neigh_state), pinging $nexthop on $interface" fi fi fi @@ -176,11 +176,11 @@ while /bin/true; do if [[ $ip == *"."* ]] && [[ ! $KERNEIGH4 =~ "${ip},${intf}" ]]; then pingcmd="timeout 0.2 ping -I $intf -n -q -i 0 -c 1 -W 1 $ip >/dev/null" eval $pingcmd - logger "arp_update: mismatch arp entry, pinging ${ip} on ${intf}" + logger "mismatch arp entry, pinging ${ip} on ${intf}" elif [[ $ip == *":"* ]] && [[ ! $KERNEIGH6 =~ "${ip},${intf}" ]]; then ping6cmd="timeout 0.2 ping6 -I $intf -n -q -i 0 -c 1 -W 1 $ip >/dev/null" eval $ping6cmd - logger "arp_update: mismatch v6 nbr entry, pinging ${ip} on ${intf}" + logger "mismatch v6 nbr entry, pinging ${ip} on ${intf}" fi fi done From fd21923ae900939dd9b959e29721093f29a00525 Mon Sep 17 00:00:00 2001 From: Lawrence Lee Date: Wed, 15 Nov 2023 01:55:45 +0000 Subject: [PATCH 3/5] [arp_update]: Flush neighs w/ stale MACs Signed-off-by: Lawrence Lee --- files/scripts/arp_update | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/files/scripts/arp_update b/files/scripts/arp_update index 7e53e257390a..202fca186a70 100755 --- a/files/scripts/arp_update +++ b/files/scripts/arp_update @@ -75,6 +75,16 @@ while /bin/true; do fi done + # find neighbor entries with aged MAC and flush/relearn them + STALE_NEIGHS=$(ip neigh show | grep -v "fe80" | grep "STALE" | awk '{print $1 "," $5}' | tr [:lower:] [:upper:]) + for neigh in $STALE_NEIGHS; do + ip="$( cut -d ',' -f 1 <<< "$neigh" )" + mac="$( cut -d ',' -f 2 <<< "$neigh" )" + if [[ -z $(sonic-db-cli ASIC_DB keys "ASIC_STATE:SAI_OBJECT_TYPE_FDB_ENTRY*${mac}*") ]]; then + timeout 1 ping -c1 -w1 $ip > /dev/null + fi + done + # Flush neighbor entries with MAC mismatch between kernel and APPL_DB KERNEL_NEIGH=$(ip neigh show | grep -v "FAILED\|INCOMPLETE" | cut -d ' ' -f 1,3,5 --output-delimiter=',' | tr -d ' ') for neigh in $KERNEL_NEIGH; do From 42fb294051e4f95792d158a8b8669c6332bc48f4 Mon Sep 17 00:00:00 2001 From: Lawrence Lee Date: Mon, 20 Nov 2023 21:26:10 +0000 Subject: [PATCH 4/5] exclude ipv6 link local from MAC flush Signed-off-by: Lawrence Lee --- files/scripts/arp_update | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/scripts/arp_update b/files/scripts/arp_update index 202fca186a70..66757c015f99 100755 --- a/files/scripts/arp_update +++ b/files/scripts/arp_update @@ -86,7 +86,7 @@ while /bin/true; do done # Flush neighbor entries with MAC mismatch between kernel and APPL_DB - KERNEL_NEIGH=$(ip neigh show | grep -v "FAILED\|INCOMPLETE" | cut -d ' ' -f 1,3,5 --output-delimiter=',' | tr -d ' ') + KERNEL_NEIGH=$(ip neigh show | grep -v "fe80" | grep -v "FAILED\|INCOMPLETE" | cut -d ' ' -f 1,3,5 --output-delimiter=',' | tr -d ' ') for neigh in $KERNEL_NEIGH; do ip="$( cut -d ',' -f 1 <<< "$neigh" )" intf="$( cut -d ',' -f 2 <<< "$neigh" )" From 5bc1b39e7b88ec64af45d924eb0b9f2a86405364 Mon Sep 17 00:00:00 2001 From: Lawrence Lee Date: Wed, 29 Nov 2023 23:46:53 +0000 Subject: [PATCH 5/5] ping flushed neigh IPs Signed-off-by: Lawrence Lee --- files/scripts/arp_update | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/files/scripts/arp_update b/files/scripts/arp_update index 66757c015f99..f402d53dced3 100755 --- a/files/scripts/arp_update +++ b/files/scripts/arp_update @@ -81,7 +81,7 @@ while /bin/true; do ip="$( cut -d ',' -f 1 <<< "$neigh" )" mac="$( cut -d ',' -f 2 <<< "$neigh" )" if [[ -z $(sonic-db-cli ASIC_DB keys "ASIC_STATE:SAI_OBJECT_TYPE_FDB_ENTRY*${mac}*") ]]; then - timeout 1 ping -c1 -w1 $ip > /dev/null + timeout 0.2 ping -c1 -w1 $ip > /dev/null fi done @@ -95,6 +95,7 @@ while /bin/true; do if [[ $kernel_mac != $appl_db_mac ]]; then logger -p warning "MAC mismatch for ${ip} on ${intf} - kernel: ${kernel_mac}, APPL_DB: ${appl_db_mac}" ip neigh flush $ip + timeout 0.2 ping -c1 -w1 $ip > /dev/null fi done