From d63d16ba58d8e49c6d914c5c681ec7096977ed4d Mon Sep 17 00:00:00 2001 From: yozhao101 <56170650+yozhao101@users.noreply.github.com> Date: Fri, 17 Jun 2022 12:13:18 -0700 Subject: [PATCH] [memory_checker] Do not check memory usage of containers which are not created (#11129) Signed-off-by: Yong Zhao yozhao@microsoft.com Why I did it This PR aims to fix an issue (#10088) by enhancing the script memory_checker. Specifically, if container is not created successfully during device is booted/rebooted, then memory_checker do not need check its memory usage. How I did it In the script memory_checker, a function is added to get names of running containers. If the specified container name is not in current running container list, then this script will exit without checking its memory usage. How to verify it I tested on a lab device by following the steps: Stops telemetry container with command sudo systemctl stop telemetry.service Removes telemetry container with command docker rm telemetry Checks whether the script memory_checker ran by Monit will generate the syslog message saying it will exit without checking memory usage of telemetry. --- files/image_config/monit/memory_checker | 34 +++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/files/image_config/monit/memory_checker b/files/image_config/monit/memory_checker index 3cd0f2d80225..dfe270e79524 100755 --- a/files/image_config/monit/memory_checker +++ b/files/image_config/monit/memory_checker @@ -25,6 +25,8 @@ import sys import syslog import re +import docker + def get_command_result(command): """Executes the command and return the resulting output. @@ -86,7 +88,7 @@ def check_memory_usage(container_name, threshold_value): print("[{}]: Memory usage ({} Bytes) is larger than the threshold ({} Bytes)!" .format(container_name, mem_usage_bytes, threshold_value)) syslog.syslog(syslog.LOG_INFO, "[{}]: Memory usage ({} Bytes) is larger than the threshold ({} Bytes)!" - .format(container_name, mem_usage_bytes, threshold_value)) + .format(container_name, mem_usage_bytes, threshold_value)) sys.exit(3) else: syslog.syslog(syslog.LOG_ERR, "[memory_checker] Failed to retrieve memory value from '{}'" @@ -94,6 +96,28 @@ def check_memory_usage(container_name, threshold_value): sys.exit(4) +def get_running_container_names(): + """Retrieves names of running containers by talking to the docker daemon. + + Args: + None. + + Returns: + running_container_names: A list indicates names of running containers. + """ + try: + docker_client = docker.DockerClient(base_url='unix://var/run/docker.sock') + running_container_list = docker_client.containers.list(filters={"status": "running"}) + running_container_names = [ container.name for container in running_container_list ] + except (docker.errors.APIError, docker.errors.DockerException) as err: + syslog.syslog(syslog.LOG_ERR, + "Failed to retrieve the running container list from docker daemon! Error message is: '{}'" + .format(err)) + sys.exit(5) + + return running_container_names + + def main(): parser = argparse.ArgumentParser(description="Check memory usage of a container \ and an alerting message will be written into syslog if memory usage \ @@ -104,7 +128,13 @@ def main(): parser.add_argument("threshold_value", type=int, help="threshold value in bytes") args = parser.parse_args() - check_memory_usage(args.container_name, args.threshold_value) + running_container_names = get_running_container_names() + if args.container_name in running_container_names: + check_memory_usage(args.container_name, args.threshold_value) + else: + syslog.syslog(syslog.LOG_INFO, + "[memory_checker] Exits without checking memory usage since container '{}' is not running!" + .format(args.container_name)) if __name__ == "__main__":