Skip to content

Commit

Permalink
Adding containerd compatability to oom_logger - COMPINFRA-3947 (#3904)
Browse files Browse the repository at this point in the history
* Adding containerd compatability to oom_logger

* Addressing reviews + adding new regex for containerd

* Adding in addition to the nerdctl regex a regex for capturing containerd-cri oom

* Updating packages for the k8s_itest to pass

* use wheels + main internal pypi

* Revert "Updating packages for the k8s_itest to pass"

This reverts commit 52b62c8.

* update addict

* update argcomplete

* Revert "update argcomplete"

This reverts commit 8a9f0d1.

* Revert "update addict"

This reverts commit aa1df40.

* prefer binary

* prefer binary harder

* prefer binary this way

* maybe?

* missed a spot

* upgrade??

* do we still need deadsnakes here

* anotha one

* more fixes

* deadsnakes

* distutils is fun

* cleanup

* Revert "Updating packages for the k8s_itest to pass"

This reverts commit 52b62c8.

* Removed the nerdctl regex and the adjusted ones for docker

* Address getting from config when its none

* Add a less structured regex for containerd

* Added two tests to test the regex if its working

* Adding a unit test for testing main with containerd=true

* Addressing more reviews

---------

Co-authored-by: Luis Perez <luisp@yelp.com>
  • Loading branch information
EmanElsaban and nemacysts authored Aug 29, 2024
1 parent 74508cc commit f2cc8fc
Show file tree
Hide file tree
Showing 12 changed files with 251 additions and 103 deletions.
1 change: 0 additions & 1 deletion debian/paasta-tools.links
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ opt/venvs/paasta-tools/bin/generate_deployments_for_service.py usr/bin/generate_
opt/venvs/paasta-tools/bin/generate_services_file.py usr/bin/generate_services_file
opt/venvs/paasta-tools/bin/generate_services_yaml.py usr/bin/generate_services_yaml
opt/venvs/paasta-tools/bin/generate_authenticating_services.py usr/bin/generate_authenticating_services
opt/venvs/paasta-tools/bin/kill_orphaned_docker_containers.py usr/bin/kill_orphaned_docker_containers
opt/venvs/paasta-tools/bin/kubernetes_remove_evicted_pods.py usr/bin/kubernetes_remove_evicted_pods
opt/venvs/paasta-tools/bin/paasta-api usr/bin/paasta-api
opt/venvs/paasta-tools/bin/paasta-fsm usr/bin/paasta-fsm
Expand Down

This file was deleted.

1 change: 0 additions & 1 deletion docs/source/generated/paasta_tools.monitoring.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ Submodules
.. toctree::

paasta_tools.monitoring.check_k8s_api_performance
paasta_tools.monitoring.kill_orphaned_docker_containers

Module contents
---------------
Expand Down
72 changes: 0 additions & 72 deletions paasta_tools/monitoring/kill_orphaned_docker_containers.py

This file was deleted.

83 changes: 73 additions & 10 deletions paasta_tools/oom_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,16 @@
destination(paasta_oom_logger);
};
"""
import argparse
import json
import re
import sys
from collections import namedtuple
from typing import Dict

import grpc
from containerd.services.containers.v1 import containers_pb2
from containerd.services.containers.v1 import containers_pb2_grpc
from docker.errors import APIError

from paasta_tools.cli.utils import get_instance_config
Expand Down Expand Up @@ -76,6 +82,16 @@
)


def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="paasta_oom_logger")
parser.add_argument(
"--containerd",
action="store_true",
help="Use containerd to inspect containers, otherwise use docker",
)
return parser.parse_args()


def capture_oom_events_from_stdin():
process_name_regex = re.compile(
r"^\d+\s[a-zA-Z0-9\-]+\s.*\]\s(.+)\sinvoked\soom-killer:"
Expand All @@ -93,6 +109,25 @@ def capture_oom_events_from_stdin():
""",
re.VERBOSE,
)
oom_regex_kubernetes_containerd_systemd_cgroup = re.compile(
r"""
^(\d+)\s # timestamp
([a-zA-Z0-9\-]+) # hostname
\s.*oom-kill:.*task_memcg=/.*\.slice/.* # loosely match systemd slice and containerid
cri-containerd:(\w{64}).*$ # containerid
""",
re.VERBOSE,
)

oom_regex_kubernetes_containerd_systemd_cgroup_structured = re.compile(
r"""
^(\d+)\s # timestamp
([a-zA-Z0-9\-]+) # hostname
\s.*oom-kill:.*task_memcg=/kubepods\.slice/.* # match systemd slice and containerid
cri-containerd-(\w{64}).*$ # containerid
""",
re.VERBOSE,
)
oom_regex_kubernetes_structured = re.compile(
r"""
^(\d+)\s # timestamp
Expand All @@ -115,6 +150,8 @@ def capture_oom_events_from_stdin():
oom_regex_kubernetes,
oom_regex_kubernetes_structured,
oom_regex_kubernetes_systemd_cgroup,
oom_regex_kubernetes_containerd_systemd_cgroup,
oom_regex_kubernetes_containerd_systemd_cgroup_structured,
]

process_name = ""
Expand All @@ -136,11 +173,18 @@ def capture_oom_events_from_stdin():
break


def get_container_env_as_dict(docker_inspect):
def get_container_env_as_dict(
is_cri_containerd: bool, container_inspect: dict
) -> Dict[str, str]:
env_vars = {}
config = docker_inspect.get("Config")
if is_cri_containerd:
config = container_inspect.get("process")
env_key = "env"
else:
config = container_inspect.get("Config")
env_key = "Env"
if config is not None:
env = config.get("Env", [])
env = config.get(env_key, [])
for i in env:
name, _, value = i.partition("=")
env_vars[name] = value
Expand Down Expand Up @@ -209,18 +253,26 @@ def send_sfx_event(service, instance, cluster):
counter.count()


def get_containerd_container(container_id: str) -> containers_pb2.Container:
with grpc.insecure_channel("unix:///run/containerd/containerd.sock") as channel:
containersv1 = containers_pb2_grpc.ContainersStub(channel)
return containersv1.Get(
containers_pb2.GetContainerRequest(id=container_id),
metadata=(("containerd-namespace", "k8s.io"),),
).container


def main():
if clog is None:
print("CLog logger unavailable, exiting.", file=sys.stderr)
sys.exit(1)

args = parse_args()
clog.config.configure(
scribe_host="169.254.255.254",
scribe_port=1463,
monk_disable=False,
scribe_disable=False,
)

cluster = load_system_paasta_config().get_cluster()
client = get_docker_client()
for (
Expand All @@ -229,11 +281,22 @@ def main():
container_id,
process_name,
) in capture_oom_events_from_stdin():
try:
docker_inspect = client.inspect_container(resource_id=container_id)
except (APIError):
continue
env_vars = get_container_env_as_dict(docker_inspect)
if args.containerd:
# then we're using containerd to inspect containers
try:
container_info = get_containerd_container(container_id)
except grpc.RpcError as e:
print("An error occurred while getting the container:", e)
continue
container_spec_raw = container_info.spec.value.decode("utf-8")
container_inspect = json.loads(container_spec_raw)
else:
# we're using docker to inspect containers
try:
container_inspect = client.inspect_container(resource_id=container_id)
except (APIError):
continue
env_vars = get_container_env_as_dict(args.containerd, container_inspect)
service = env_vars.get("PAASTA_SERVICE", "unknown")
instance = env_vars.get("PAASTA_INSTANCE", "unknown")
mesos_container_id = env_vars.get("MESOS_CONTAINER_NAME", "mesos-null")
Expand Down
2 changes: 2 additions & 0 deletions requirements-minimal.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@ botocore
bravado >= 10.2.0
certifi
choice >= 0.1
containerd
cookiecutter >= 1.4.0
croniter
docker
dulwich >= 0.17.3
ephemeral-port-reserve >= 1.0.1
graphviz
grpcio
gunicorn
humanfriendly
humanize >= 0.5.1
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ certifi==2017.11.5
chardet==3.0.4
choice==0.1
click==6.6
containerd==1.5.3
cookiecutter==1.4.0
croniter==1.3.4
decorator==4.1.2
Expand All @@ -27,6 +28,7 @@ ephemeral-port-reserve==1.1.0
future==0.16.0
google-auth==1.2.0
graphviz==0.8.2
grpcio==1.62.2
gunicorn==19.8.1
http-parser==0.9.0
humanfriendly==4.18
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ def get_install_requires():
"paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py",
"paasta_tools/kubernetes/bin/paasta_secrets_sync.py",
"paasta_tools/log_task_lifecycle_events.py",
"paasta_tools/monitoring/kill_orphaned_docker_containers.py",
"paasta_tools/paasta_deploy_tron_jobs",
"paasta_tools/paasta_execute_docker_command.py",
"paasta_tools/paasta_remote_run.py",
Expand Down
Loading

0 comments on commit f2cc8fc

Please sign in to comment.