diff --git a/dockers/docker-config-engine-bullseye/Dockerfile.j2 b/dockers/docker-config-engine-bullseye/Dockerfile.j2 index 92786ae7f94b..0d1c22b5daed 100644 --- a/dockers/docker-config-engine-bullseye/Dockerfile.j2 +++ b/dockers/docker-config-engine-bullseye/Dockerfile.j2 @@ -41,6 +41,7 @@ RUN pip3 uninstall -y enum34 # Copy files COPY ["files/swss_vars.j2", "/usr/share/sonic/templates/"] +COPY ["files/readiness_probe.sh", "/usr/bin/"] COPY ["files/container_startup.py", "/usr/share/sonic/scripts/"] ## Clean up diff --git a/dockers/docker-config-engine-buster/Dockerfile.j2 b/dockers/docker-config-engine-buster/Dockerfile.j2 index 197cd903b468..a4136a45ef89 100644 --- a/dockers/docker-config-engine-buster/Dockerfile.j2 +++ b/dockers/docker-config-engine-buster/Dockerfile.j2 @@ -41,6 +41,7 @@ RUN pip3 uninstall -y enum34 # Copy files COPY ["files/swss_vars.j2", "/usr/share/sonic/templates/"] +COPY ["files/readiness_probe.sh", "/usr/bin/"] COPY ["files/container_startup.py", "/usr/share/sonic/scripts/"] ## Clean up diff --git a/rules/docker-config-engine-bullseye.mk b/rules/docker-config-engine-bullseye.mk index 470f2463a5fd..ad12c59d5438 100644 --- a/rules/docker-config-engine-bullseye.mk +++ b/rules/docker-config-engine-bullseye.mk @@ -17,6 +17,7 @@ $(DOCKER_CONFIG_ENGINE_BULLSEYE)_PYTHON_WHEELS += $(SONIC_CONFIG_ENGINE_PY3) $(DOCKER_CONFIG_ENGINE_BULLSEYE)_LOAD_DOCKERS += $(DOCKER_BASE_BULLSEYE) $(DOCKER_CONFIG_ENGINE_BULLSEYE)_FILES += $(SWSS_VARS_TEMPLATE) $(DOCKER_CONFIG_ENGINE_BULLSEYE)_FILES += $($(SONIC_CTRMGRD)_CONTAINER_SCRIPT) +$(DOCKER_CONFIG_ENGINE_BULLSEYE)_FILES += $($(SONIC_CTRMGRD)_HEALTH_PROBE) $(DOCKER_CONFIG_ENGINE_BULLSEYE)_FILES += $($(SONIC_CTRMGRD)_STARTUP_SCRIPT) $(DOCKER_CONFIG_ENGINE_BULLSEYE)_DBG_DEPENDS = $($(DOCKER_BASE_BULLSEYE)_DBG_DEPENDS) \ diff --git a/rules/docker-config-engine-buster.mk b/rules/docker-config-engine-buster.mk index ef43fb66f081..9be26b1da236 100644 --- a/rules/docker-config-engine-buster.mk +++ b/rules/docker-config-engine-buster.mk @@ -17,6 +17,7 @@ $(DOCKER_CONFIG_ENGINE_BUSTER)_PYTHON_WHEELS += $(SONIC_CONFIG_ENGINE_PY3) $(DOCKER_CONFIG_ENGINE_BUSTER)_LOAD_DOCKERS += $(DOCKER_BASE_BUSTER) $(DOCKER_CONFIG_ENGINE_BUSTER)_FILES += $(SWSS_VARS_TEMPLATE) $(DOCKER_CONFIG_ENGINE_BUSTER)_FILES += $($(SONIC_CTRMGRD)_CONTAINER_SCRIPT) +$(DOCKER_CONFIG_ENGINE_BUSTER)_FILES += $($(SONIC_CTRMGRD)_HEALTH_PROBE) $(DOCKER_CONFIG_ENGINE_BUSTER)_FILES += $($(SONIC_CTRMGRD)_STARTUP_SCRIPT) $(DOCKER_CONFIG_ENGINE_BUSTER)_DBG_DEPENDS = $($(DOCKER_BASE_BUSTER)_DBG_DEPENDS) \ diff --git a/rules/sonic-ctrmgrd.mk b/rules/sonic-ctrmgrd.mk index 659a2cf4ace1..167d78c43c88 100644 --- a/rules/sonic-ctrmgrd.mk +++ b/rules/sonic-ctrmgrd.mk @@ -20,12 +20,16 @@ $($(SONIC_CTRMGRD)_CFG_JSON)_PATH = $($(SONIC_CTRMGRD)_FILES_PATH) $(SONIC_CTRMGRD)_SERVICE = ctrmgrd.service $($(SONIC_CTRMGRD)_SERVICE)_PATH = $($(SONIC_CTRMGRD)_FILES_PATH) +$(SONIC_CTRMGRD)_HEALTH_PROBE = readiness_probe.sh +$($(SONIC_CTRMGRD)_HEALTH_PROBE)_PATH = $($(SONIC_CTRMGRD)_FILES_PATH) + SONIC_PYTHON_WHEELS += $(SONIC_CTRMGRD) $(SONIC_CTRMGRD)_FILES = $($(SONIC_CTRMGRD)_CONTAINER_SCRIPT) $(SONIC_CTRMGRD)_FILES += $($(SONIC_CTRMGRD)_STARTUP_SCRIPT) $(SONIC_CTRMGRD)_FILES += $($(SONIC_CTRMGRD)_CFG_JSON) $(SONIC_CTRMGRD)_FILES += $($(SONIC_CTRMGRD)_SERVICE) +$(SONIC_CTRMGRD)_FILES += $($(SONIC_CTRMGRD)_HEALTH_PROBE) SONIC_COPY_FILES += $($(SONIC_CTRMGRD)_FILES) diff --git a/src/sonic-ctrmgrd/ctrmgr/readiness_probe.sh b/src/sonic-ctrmgrd/ctrmgr/readiness_probe.sh new file mode 100644 index 000000000000..9e796ca03816 --- /dev/null +++ b/src/sonic-ctrmgrd/ctrmgr/readiness_probe.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# This script is used by k8s to check the readiness of containers +# Check if the container is readiness or not, exit code 0 means readiness, others mean not readiness + +#### exit code contract, k8s only cares zero or not none-zero, but we want to use none-zero code to indicate different error +# 0: readiness +# 1: if the hook script is python code, the default crash exit code is 1 +# 2: supervisor start service doesn't exit normally +# other exit code: returned by post_check_script, define in the post_check_script, should not include 1,2 + +# check if the start service exists +# if the start service doesn't exist, do nothing +# if the start service exists, check if it exits normally +# if the start service doesn't exit normally, exit with code 2 +pre_check_service_name="start" +no_process_string="ERROR (no such process)" +service_status=$(supervisorctl status $pre_check_service_name) +if [[ $service_status != *"$no_process_string"* ]] && [[ $(echo $service_status |awk '{print $2}') != 'EXITED' ]]; then + exit 2 +fi + +# feature owner can add their own readiness check script +# check if the post_check_script exists +# if the post_check_script exists, run it +# if the post_check_script exits with non-zero code, exit with the code +post_check_script="/usr/bin/readiness_probe_hook" +if [ -x $post_check_script ]; then + $post_check_script + post_check_result=$? + if [ $post_check_result != 0 ]; then + exit $post_check_result + fi +fi + +exit 0