From f840692a212c89a39bb633e6b64278f83614a763 Mon Sep 17 00:00:00 2001 From: Stepan Blyschak Date: Tue, 11 Dec 2018 06:02:03 +0000 Subject: [PATCH 1/2] [mellanox|ffb] use system level warm reboot for Mellanox fastfast boot Signed-off-by: Stepan Blyschak --- files/build_templates/docker_image_ctl.j2 | 6 +- files/scripts/swss.sh | 6 +- files/scripts/syncd.sh | 29 ++++---- platform/mellanox/docker-syncd-mlnx/start.sh | 5 -- platform/mellanox/mlnx-ffb.sh | 15 ++++- platform/mellanox/mlnx-issu/scripts/ffb | 69 -------------------- platform/mellanox/mlnx-issu/setup.py | 1 - 7 files changed, 33 insertions(+), 98 deletions(-) delete mode 100755 platform/mellanox/mlnx-issu/scripts/ffb diff --git a/files/build_templates/docker_image_ctl.j2 b/files/build_templates/docker_image_ctl.j2 index 22d69702286b..a3cbf5adbae7 100644 --- a/files/build_templates/docker_image_ctl.j2 +++ b/files/build_templates/docker_image_ctl.j2 @@ -28,7 +28,7 @@ function preStartAction() { {%- if docker_container_name == "database" %} WARM_DIR=/host/warmboot - if [[ "$BOOT_TYPE" == "warm" && -f $WARM_DIR/dump.rdb ]]; then + if [[ ("$BOOT_TYPE" == "warm" || "$BOOT_TYPE" == "fastfast") && -f $WARM_DIR/dump.rdb ]]; then # Load redis content from /host/warmboot/dump.rdb docker cp $WARM_DIR/dump.rdb database:/var/lib/redis/dump.rdb else @@ -49,7 +49,7 @@ function postStartAction() until [[ $(/usr/bin/docker exec database redis-cli -s $REDIS_SOCK ping | grep -c PONG) -gt 0 ]]; do sleep 1; done - if [[ "$BOOT_TYPE" == "warm" && -f $WARM_DIR/dump.rdb ]]; then + if [[ ("$BOOT_TYPE" == "warm" || "$BOOT_TYPE" == "fastfast") && -f $WARM_DIR/dump.rdb ]]; then rm -f $WARM_DIR/dump.rdb else # If there is a config db dump file, load it @@ -61,7 +61,7 @@ function postStartAction() fi {%- elif docker_container_name == "swss" %} docker exec swss rm -f /ready # remove cruft - if [[ "$BOOT_TYPE" == "fast" || "$BOOT_TYPE" == "fastfast" ]] && [[ -d /host/fast-reboot ]]; then + if [[ "$BOOT_TYPE" == "fast" ]] && [[ -d /host/fast-reboot ]]; then test -e /host/fast-reboot/fdb.json && docker cp /host/fast-reboot/fdb.json swss:/ test -e /host/fast-reboot/arp.json && docker cp /host/fast-reboot/arp.json swss:/ test -e /host/fast-reboot/default_routes.json && docker cp /host/fast-reboot/default_routes.json swss:/ diff --git a/files/scripts/swss.sh b/files/scripts/swss.sh index b5ff18770938..70b81383fa44 100755 --- a/files/scripts/swss.sh +++ b/files/scripts/swss.sh @@ -90,11 +90,7 @@ start() { # Don't flush DB during warm boot if [[ x"$WARM_BOOT" != x"true" ]]; then - # Don't flush APP_DB during MLNX fastfast boot - BOOT_TYPE="$(cat /proc/cmdline | grep -o 'SONIC_BOOT_TYPE=\S*' | cut -d'=' -f2)" - if [[ x"$BOOT_TYPE" != x"fastfast" ]] && [[ ! -f /var/warmboot/issu_started ]]; then - /usr/bin/docker exec database redis-cli -n 0 FLUSHDB - fi + /usr/bin/docker exec database redis-cli -n 0 FLUSHDB /usr/bin/docker exec database redis-cli -n 2 FLUSHDB /usr/bin/docker exec database redis-cli -n 5 FLUSHDB clean_up_tables 6 "'PORT_TABLE*', 'MGMT_PORT_TABLE*', 'VLAN_TABLE*', 'VLAN_MEMBER_TABLE*', 'INTERFACE_TABLE*', 'MIRROR_SESSION*'" diff --git a/files/scripts/syncd.sh b/files/scripts/syncd.sh index 3e011a2f07bf..fd58f2e44e36 100755 --- a/files/scripts/syncd.sh +++ b/files/scripts/syncd.sh @@ -90,14 +90,10 @@ start() { # Flush DB during non-warm start /usr/bin/docker exec database redis-cli -n 1 FLUSHDB - - # platform specific tasks - if [ x$sonic_asic_platform == x'cavium' ]; then - /etc/init.d/xpnet.sh start - fi fi # platform specific tasks + if [ x"$sonic_asic_platform" == x"mellanox" ]; then BOOT_TYPE=`getBootType` if [[ x"$WARM_BOOT" == x"true" || x"$BOOT_TYPE" == x"fast" ]]; then @@ -109,6 +105,13 @@ start() { /sbin/modprobe i2c-dev fi + if [[ x"$WARM_BOOT" != x"true" ]]; then + if [ x$sonic_asic_platform == x'cavium' ]; then + /etc/init.d/xpnet.sh start + fi + fi + + # start service docker /usr/bin/${SERVICE}.sh start debug "Started ${SERVICE} service..." @@ -146,21 +149,21 @@ stop() { /usr/bin/${SERVICE}.sh stop debug "Stopped ${SERVICE} service..." - # if warm start enabled, don't stop peer service docker + # platform specific tasks + + if [ x$sonic_asic_platform == x'mellanox' ]; then + /etc/init.d/sxdkernel stop + /usr/bin/mst stop + fi + + if [[ x"$WARM_BOOT" != x"true" ]]; then - # platform specific tasks if [ x$sonic_asic_platform == x'cavium' ]; then /etc/init.d/xpnet.sh stop /etc/init.d/xpnet.sh start fi fi - # platform specific tasks - if [ x"$sonic_asic_platform" == x"mellanox" ]; then - /etc/init.d/sxdkernel stop - /usr/bin/mst stop - fi - unlock_service_state_change } diff --git a/platform/mellanox/docker-syncd-mlnx/start.sh b/platform/mellanox/docker-syncd-mlnx/start.sh index 7753b134f7ce..61ccd2db8933 100755 --- a/platform/mellanox/docker-syncd-mlnx/start.sh +++ b/platform/mellanox/docker-syncd-mlnx/start.sh @@ -8,8 +8,3 @@ supervisorctl start syncd supervisorctl start mlnx-sfpd -BOOT_TYPE="$(cat /proc/cmdline | grep -o 'SONIC_BOOT_TYPE=\S*' | cut -d'=' -f2)" -if [[ x"$BOOT_TYPE" == x"fastfast" ]] && [[ -f /var/warmboot/issu_started ]]; then - rm -f /var/warmboot/issu_started - /usr/bin/ffb &>/dev/null & -fi diff --git a/platform/mellanox/mlnx-ffb.sh b/platform/mellanox/mlnx-ffb.sh index d06721081a5d..c87ea56f08f5 100755 --- a/platform/mellanox/mlnx-ffb.sh +++ b/platform/mellanox/mlnx-ffb.sh @@ -62,6 +62,19 @@ check_sdk_upgrade() return "${CHECK_RESULT}" } +check_ffb() +{ + check_issu_enabled || { + echo "ISSU is not enabled on this HWSKU" + return "${FFB_FAILURE}" + } + check_sdk_upgrade || { + echo "SDK upgrade check failued" + return "${FFB_FAILURE}" + } + return "${FFB_SUCCESS}"; +} + # Perform ISSU start issu_start() { @@ -70,8 +83,6 @@ issu_start() EXIT_CODE=$? - touch /host/warmboot/issu_started - return $EXIT_CODE } diff --git a/platform/mellanox/mlnx-issu/scripts/ffb b/platform/mellanox/mlnx-issu/scripts/ffb deleted file mode 100755 index e862083d2942..000000000000 --- a/platform/mellanox/mlnx-issu/scripts/ffb +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python -""" -Part of Mellanox platform specific fastfast boot implementation for warm-boot. -Notifies SYNCD proccess once boot is finished after warm-reboot. -Once SYNCD received such notification it should set appropriate SAI attribute. -Then SAI will notify SDK to end ISSU mode for the FFB. -""" - - -import time -import swsssdk -from threading import Timer - - -class FFB(object): - """Provides implementation for Mellanox fastfast boot""" - DB_WARM_TABLE_KEY = 'WARM_RESTART_TABLE|bgp' - DB_STATE_ENTRY_NAME = 'state' - DB_STATE_TYPE_RECONCILED = 'reconciled' - DB_CHANNEL_NAME = 'MLNX_FFB' - DB_CHANNEL_MSG = '["SET","ISSU_END"]' # message should be in the following format: ["",""] - SUB_THREAD_TIMEOUT = 1 - STOP_TIMER_TIMEOUT = 180 - - def __init__(self): - self.state_db = swsssdk.SonicV2Connector() - self.state_db.connect(self.state_db.STATE_DB) - - self.prevState = self.state_db.get(self.state_db.STATE_DB, self.DB_WARM_TABLE_KEY, self.DB_STATE_ENTRY_NAME) - - self.pubSub = self.state_db.redis_clients[self.state_db.STATE_DB].pubsub() - self.pubSub.psubscribe(**{'__key*@6__:{}'.format(self.DB_WARM_TABLE_KEY): self.eventHandler}) - - self.timeoutTimer = Timer(self.STOP_TIMER_TIMEOUT, self.finish) - - def run(self): - # Start event thread in order to get required events - self.eventThread = self.pubSub.run_in_thread(sleep_time=self.SUB_THREAD_TIMEOUT) - # Start oneshot timer in order to exit in case required event is not received during defined timeout - self.timeoutTimer.start() - - def finish(self): - # Stop event thread and timeout timer - self.eventThread.stop() - self.timeoutTimer.cancel() - - # Publish "FFB END" event to SYNCD process - time.sleep(60) # W/A: Wait until configuration is applied to HW since it takes some time - self.state_db.publish(self.state_db.STATE_DB, self.DB_CHANNEL_NAME, self.DB_CHANNEL_MSG) - - def eventHandler(self, msg): - # Only "set" operations are needed so just skip all others - if msg['data'] != 'hset': - return - - state = self.state_db.get(self.state_db.STATE_DB, self.DB_WARM_TABLE_KEY, self.DB_STATE_ENTRY_NAME) - - if (state != self.prevState) and (state == self.DB_STATE_TYPE_RECONCILED): - self.finish() - else: - self.prevState = state - - -def main(): - FFB().run() - - -if __name__ == '__main__': - main() diff --git a/platform/mellanox/mlnx-issu/setup.py b/platform/mellanox/mlnx-issu/setup.py index d1a733d6dce8..614c9a0bbe46 100755 --- a/platform/mellanox/mlnx-issu/setup.py +++ b/platform/mellanox/mlnx-issu/setup.py @@ -10,6 +10,5 @@ maintainer_email='stepanb@mellanox.com', scripts=[ 'scripts/issu', - 'scripts/ffb', ] ) From e49be759bd13414ee202354028550d5407ed54e3 Mon Sep 17 00:00:00 2001 From: Stepan Blyschak Date: Wed, 12 Dec 2018 18:02:19 +0000 Subject: [PATCH 2/2] [mellanox|ffb] add comments for mellanox start/stop drivers section Signed-off-by: Stepan Blyschak --- files/scripts/syncd.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/files/scripts/syncd.sh b/files/scripts/syncd.sh index fd58f2e44e36..bef8ff11558c 100755 --- a/files/scripts/syncd.sh +++ b/files/scripts/syncd.sh @@ -94,6 +94,8 @@ start() { # platform specific tasks + # start mellanox drivers regardless of + # boot type if [ x"$sonic_asic_platform" == x"mellanox" ]; then BOOT_TYPE=`getBootType` if [[ x"$WARM_BOOT" == x"true" || x"$BOOT_TYPE" == x"fast" ]]; then @@ -151,6 +153,8 @@ stop() { # platform specific tasks + # stop mellanox driver regardless of + # shutdown type if [ x$sonic_asic_platform == x'mellanox' ]; then /etc/init.d/sxdkernel stop /usr/bin/mst stop