From 95d7d440c21f7e7b7c891a49273e43d44de0a76a Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Sat, 2 Sep 2023 00:10:11 +0800 Subject: [PATCH 1/5] [submodule] Update submodule sonic-linux-kernel to the latest HEAD automatically (#16331) src/sonic-linux-kernel * db00eb9 - (HEAD -> 202205, origin/202205) PATCH] net: allow user to set metric on default route learned via Router Advertisement (#326) (2 days ago) [abdosi] --- src/sonic-linux-kernel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sonic-linux-kernel b/src/sonic-linux-kernel index 836aedf152..db00eb9623 160000 --- a/src/sonic-linux-kernel +++ b/src/sonic-linux-kernel @@ -1 +1 @@ -Subproject commit 836aedf1528b427b7856b072b45b6cc7aebe9790 +Subproject commit db00eb96232204bcc4b764f8d5868f43a5a11630 From 896b8e720934c6d55a294aad48d3d66bfd11c9e1 Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Sat, 2 Sep 2023 00:10:46 +0800 Subject: [PATCH 2/5] [submodule] Update submodule sonic-swss to the latest HEAD automatically (#16333) src/sonic-swss * d787d50d - (HEAD -> 202205, origin/202205) Remove fabric queue counters. (#2862) (2 days ago) [jfeng-arista] * 4579d43f - update portStatIds for cisco (#2876) (3 days ago) [Zhixin Zhu] --- src/sonic-swss | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sonic-swss b/src/sonic-swss index 3e2974df0e..d787d50d9f 160000 --- a/src/sonic-swss +++ b/src/sonic-swss @@ -1 +1 @@ -Subproject commit 3e2974df0e179aca67f4ec7c614b3876c2a4c836 +Subproject commit d787d50d9fcbb050c6a87a40faede655f7e95f0e From 35bb472601b3f23eaf81170e9fbd8b6987f1ec31 Mon Sep 17 00:00:00 2001 From: Xichen96 Date: Sat, 2 Sep 2023 02:05:12 +0800 Subject: [PATCH 3/5] [installer] add processor.max_cstate=1 to intel kernel cmdline for intel cpu (#16371) This is a fix for PR #6051 The original PR will disable intel idle driver but it cannot limit the max c-state to 1 due to system will fall back to acpi idle driver. Currently intel_idle.max_cstate=0 is already present, which will disable intel idle driver. With the added option, common idle driver will be disabled as well, so there will not be idle management. This is to prevent a bug that can be triggered by idle instruction on intel platform. Work item tracking Microsoft ADO (number only): 24867921 How I did it Add the option to installer file beside intel_idle.max_cstate=0 Signed-off-by: Xichen Lin --- installer/x86_64/install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installer/x86_64/install.sh b/installer/x86_64/install.sh index 2ad8993ca1..e8c3c7fe66 100755 --- a/installer/x86_64/install.sh +++ b/installer/x86_64/install.sh @@ -598,7 +598,7 @@ CPUVENDOR="$(cat /proc/cpuinfo | grep -m 1 vendor_id | awk '{print $3}')" echo "Switch CPU vendor is: $CPUVENDOR" if echo "$CPUVENDOR" | grep -i 'Intel' >/dev/null 2>&1; then echo "Switch CPU cstates are: disabled" - CSTATES="intel_idle.max_cstate=0" + CSTATES="processor.max_cstate=1 intel_idle.max_cstate=0" else CSTATES="" fi From 88d692f987a33f68545099db25cc27a34cf5bce0 Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Sat, 2 Sep 2023 07:19:22 +0800 Subject: [PATCH 4/5] [Nokia][DeviceData] Update the Nokia platform IXR-7250E device data (#16028) (#16381) Why I did it Update the platform_reboot of Nokia Platform IXR-7250E-36x400G to displays the correct reboot-cause history when reboot from supervisor card. Work item tracking Microsoft ADO (number only): How I did it Modify the platform_reboot script to copy the correct reboo-cause.txt file from NDK to the /host/reboot-cause directory at the down cycle when the reboot is issued from Supervisor (for both reboot right after install a new image and normal reboot) Signed-off-by: mlok Co-authored-by: Marty Y. Lok <76118573+mlok-nokia@users.noreply.github.com> --- .../platform_reboot | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/device/nokia/x86_64-nokia_ixr7250e_36x400g-r0/platform_reboot b/device/nokia/x86_64-nokia_ixr7250e_36x400g-r0/platform_reboot index 180db164df..43aace70f7 100755 --- a/device/nokia/x86_64-nokia_ixr7250e_36x400g-r0/platform_reboot +++ b/device/nokia/x86_64-nokia_ixr7250e_36x400g-r0/platform_reboot @@ -1,23 +1,24 @@ #!/bin/bash -update_reboot_cause_for_supervisor_reboot() +update_reboot_cause() { DEVICE_MGR_REBOOT_FILE=/tmp/device_mgr_reboot REBOOT_CAUSE_FILE=/host/reboot-cause/reboot-cause.txt - TMP_REBOOT_CAUSE_FILE=/tmp/tmp-reboot-cause.txt - if [ -f $DEVICE_MGR_REBOOT_FILE ]; then - if [ -f $REBOOT_CAUSE_FILE ]; then - t1=`sudo grep "User: ," $REBOOT_CAUSE_FILE` - if [ ! -z "$t1" ]; then - echo $t1 | sed 's/reboot/reboot from Supervisor/g' | sed 's/User: /User: admin/g' > $TMP_REBOOT_CAUSE_FILE - cp $TMP_REBOOT_CAUSE_FILE $REBOOT_CAUSE_FILE - fi + DEVICE_REBOOT_CAUSE_FILE=/etc/opt/srlinux/reboot-cause.txt + if [ -e $DEVICE_MGR_REBOOT_FILE ]; then + if [ -e $DEVICE_REBOOT_CAUSE_FILE ]; then + cp -f $DEVICE_REBOOT_CAUSE_FILE $REBOOT_CAUSE_FILE fi + rm -f $DEVICE_MGR_REBOOT_FILE + else + touch /etc/opt/srlinux/devmgr_reboot_cause.done + rm -f $DEVICE_REBOOT_CAUSE_FILE &> /dev/null fi + sync } # update the reboot_cuase file when reboot is trigger by device-mgr -update_reboot_cause_for_supervisor_reboot +update_reboot_cause systemctl stop nokia-watchdog.service sleep 2 @@ -25,7 +26,5 @@ echo "w" > /dev/watchdog kick_date=`date -u` echo "last watchdog kick $kick_date" > /var/log/nokia-watchdog-last.log rm -f /sys/firmware/efi/efivars/dump-* -touch /etc/opt/srlinux/devmgr_reboot_cause.done -rm -f /etc/opt/srlinux/reboot-cause.txt sync exec /sbin/reboot $@ From f7f2e654c409d02adc1309809f7b0e453d687256 Mon Sep 17 00:00:00 2001 From: mssonicbld <79238446+mssonicbld@users.noreply.github.com> Date: Sat, 2 Sep 2023 07:20:31 +0800 Subject: [PATCH 5/5] [chassis] Chassis DB cleanup when asic comes up (#16213) (#16378) * [chassis]Chassis DB cleanup when asic comes up Cleanup the entries from the following tables in chassis app db in redis_chassis server in the supervisor (1) SYSTEM_NEIGH (2) SYSTEM_INTERFACE (3) SYSTEM_LAG_MEMBER_TABLE (4) SYSTEM_LAG_TABLE As part of the clean up only those entries created by the asic that is coming up are deleted. The LAG IDs used by the asics are also de-allocated from SYSTEM_LAG_ID_TABLE and SYSTEM_LAG_ID_SET - Added check to run the chassis db clean up only for voq switches. Signed-off-by: vedganes Co-authored-by: vganesan-nokia <67648637+vganesan-nokia@users.noreply.github.com> --- files/scripts/swss.sh | 101 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/files/scripts/swss.sh b/files/scripts/swss.sh index 6412debfe7..9e62106f28 100755 --- a/files/scripts/swss.sh +++ b/files/scripts/swss.sh @@ -107,6 +107,106 @@ function clean_up_tables() end" 0 } +# This function cleans up the chassis db table entries created ONLY by this asic +# This is used to do the clean up operation when the line card / asic reboots +# When the asic/lc is RE-booting, the chassis db server is supposed to be running +# in the supervisor. So the clean up is done when only the chassis db connectable. +# Otherwise no need to do the clean up since both the supervisor and line card may be +# rebooting (the whole chassis scenario) +# The clean up operation is required to delete only those entries created by +# the asic that is rebooted. Entries from the following tables are deleted in the order +# given below +# (1) SYSTEM_NEIGH +# (2) SYSTEM_INTERFACE +# (3) SYSTEM_LAG_MEMBER_TABLE +# (4) SYSTEM_LAG_TABLE +# (5) The corresponding LAG IDs of the entries from SYSTEM_LAG_TABLE +# SYSTEM_LAG_ID_TABLE and SYSTEM_LAG_ID_SET are adjusted appropriately +function clean_up_chassis_db_tables() +{ + if [[ !($($SONIC_DB_CLI CHASSIS_APP_DB PING | grep -c True) -gt 0) ]]; then + return + fi + + lc=`$SONIC_DB_CLI CONFIG_DB hget 'DEVICE_METADATA|localhost' 'hostname'` + asic=`$SONIC_DB_CLI CONFIG_DB hget 'DEVICE_METADATA|localhost' 'asic_name'` + switch_type=`$SONIC_DB_CLI CONFIG_DB hget 'DEVICE_METADATA|localhost' 'switch_type'` + + # Run clean up only in swss running for voq switches + if is_chassis_supervisor || [[ $switch_type != 'voq' ]]; then + return + fi + + # First, delete SYSTEM_NEIGH entries + $SONIC_DB_CLI CHASSIS_APP_DB EVAL " + local host = string.gsub(ARGV[1], '%-', '%%-') + local dev = ARGV[2] + local ps = 'SYSTEM_NEIGH*|' .. host .. '|' .. dev + local keylist = redis.call('KEYS', 'SYSTEM_NEIGH*') + for j,key in ipairs(keylist) do + if string.match(key, ps) ~= nil then + redis.call('DEL', key) + end + end + return " 0 $lc $asic + + # Wait for some time before deleting system interface so that the system interface's "object in use" + # is cleared in both orchangent and in syncd. Without this delay, the orchagent clears the refcount + # but the syncd (meta) still has no-zero refcount. Because of this, orchagent gets "object still in use" + # error and aborts. + + sleep 30 + + # Next, delete SYSTEM_INTERFACE entries + $SONIC_DB_CLI CHASSIS_APP_DB EVAL " + local host = string.gsub(ARGV[1], '%-', '%%-') + local dev = ARGV[2] + local ps = 'SYSTEM_INTERFACE*|' .. host .. '|' .. dev + local keylist = redis.call('KEYS', 'SYSTEM_INTERFACE*') + for j,key in ipairs(keylist) do + if string.match(key, ps) ~= nil then + redis.call('DEL', key) + end + end + return " 0 $lc $asic + + # Next, delete SYSTEM_LAG_MEMBER_TABLE entries + $SONIC_DB_CLI CHASSIS_APP_DB EVAL " + local host = string.gsub(ARGV[1], '%-', '%%-') + local dev = ARGV[2] + local ps = 'SYSTEM_LAG_MEMBER_TABLE*|' .. host .. '|' .. dev + local keylist = redis.call('KEYS', 'SYSTEM_LAG_MEMBER_TABLE*') + for j,key in ipairs(keylist) do + if string.match(key, ps) ~= nil then + redis.call('DEL', key) + end + end + return " 0 $lc $asic + + # Wait for some time before deleting system lag so that the all the memebers of the + # system lag will be cleared. + + sleep 15 + + # Finally, delete SYSTEM_LAG_TABLE entries and deallot LAG IDs + $SONIC_DB_CLI CHASSIS_APP_DB EVAL " + local host = string.gsub(ARGV[1], '%-', '%%-') + local dev = ARGV[2] + local ps = 'SYSTEM_LAG_TABLE*|' .. '(' .. host .. '|' .. dev ..'.*' .. ')' + local keylist = redis.call('KEYS', 'SYSTEM_LAG_TABLE*') + for j,key in ipairs(keylist) do + local lagname = string.match(key, ps) + if lagname ~= nil then + redis.call('DEL', key) + local lagid = redis.call('HGET', 'SYSTEM_LAG_ID_TABLE', lagname) + redis.call('SREM', 'SYSTEM_LAG_ID_SET', lagid) + redis.call('HDEL', 'SYSTEM_LAG_ID_TABLE', lagname) + end + end + return " 0 $lc $asic + +} + start_peer_and_dependent_services() { check_warm_boot @@ -177,6 +277,7 @@ start() { $SONIC_DB_CLI RESTAPI_DB FLUSHDB clean_up_tables STATE_DB "'PORT_TABLE*', 'MGMT_PORT_TABLE*', 'VLAN_TABLE*', 'VLAN_MEMBER_TABLE*', 'LAG_TABLE*', 'LAG_MEMBER_TABLE*', 'INTERFACE_TABLE*', 'MIRROR_SESSION*', 'VRF_TABLE*', 'FDB_TABLE*', 'FG_ROUTE_TABLE*', 'BUFFER_POOL*', 'BUFFER_PROFILE*', 'MUX_CABLE_TABLE*', 'ADVERTISE_NETWORK_TABLE*', 'VXLAN_TUNNEL_TABLE*', 'MACSEC_PORT_TABLE*', 'MACSEC_INGRESS_SA_TABLE*', 'MACSEC_EGRESS_SA_TABLE*', 'MACSEC_INGRESS_SC_TABLE*', 'MACSEC_EGRESS_SC_TABLE*', 'VNET_ROUTE*', 'VNET_MONITOR_TABLE*', 'BFD_SESSION_TABLE*'" $SONIC_DB_CLI APPL_STATE_DB FLUSHDB + clean_up_chassis_db_tables rm -rf /tmp/cache fi