Skip to content

Commit

Permalink
cgroups: deal with crun subgroups
Browse files Browse the repository at this point in the history
crun, an OCI container runtime used by cri-o breaks pod association for
tetragon by using placing processes in a cgroup below the cgroup specified by
the OCI spec:
https://github.com/containers/crun/blob/main/crun.1.md#runocisystemdsubgroupsubgroup.

With the introduction of cgidmap, this commit can finally deal with this issue
by scanning the cgroup directory for children directories and, if it finds one,
use the cgroup id of the child.

A better solution would be to allow for multiple cgroup ids for each container,
but this is left as a followup.

The commit includes a script for testing this issue using minikube. Becaues
minikube uses an older version of crun, we need to install it.

The steps for reproducing this are:
   minikube start --driver=kvm2 --container-runtime=crio --force-systemd=true
   ./scripts/minikube-install-crun.sh

Running tetragon without cgidmap, we observe events without pod association:
  🚀 process minikube /usr/bin/ls
  💥 exit    minikube /usr/bin/ls  0

By installing the runtime hooks:
 ./scripts/minikube-install-hook.sh

And runing tetragon with cgidmap (and nri) using --enable-cri --enable-cgidmap,
we observe pod association for both old and new pods:

🚀 process default/test /usr/bin/ls
💥 exit    default/test /usr/bin/ls  0

Signed-off-by: Kornilios Kourtis <kornilios@isovalent.com>
  • Loading branch information
kkourt committed Aug 21, 2024
1 parent 0227f5d commit b1e8c44
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 3 deletions.
70 changes: 70 additions & 0 deletions contrib/tetragon-rthooks/scripts/minikube-install-crun.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/bin/bash
# vim:set noet ci pi ts=4 sw=4

set -o pipefail
set -e

if [ "$1" != "install" ]; then
SCRIPTPATH=$(dirname "$0")
source ${SCRIPTPATH}/helpers

runtime=$(detect_runtime)
if [ "$runtime" != "crio" ]; then
echo "crio not installed, bailing out"
exit 1
fi

name=$(basename "$0")
minikube cp $0 /tmp/$name
minikube ssh sudo chmod +x /tmp/$name
minikube ssh sudo /tmp/$name install
exit 0
fi

set -x

echo "Running inside minikube: $(uname -a)"
crio_v=$(crio --version | sed -ne 's/^Version:[[:space:]]\+\(.\+\)/\1/p')
echo "crio version: $crio_v"
crun_v=$(crun --version | sed -ne 's/^crun version[[:space:]]\+\(.\+\)/\1/p')
echo "old crun version: $crun_v"

# cleanup everything
systemctl stop kubelet
crictl ps -a -q | xargs crictl stop
crictl ps -a -q | xargs crictl rm
crictl pods -q | xargs crictl stopp
crictl pods -q | xargs crictl rmp
systemctl stop crio

cd /tmp
tarball=cri-o.amd64.v${crio_v}.tar.gz
if [ -f "${tarball}" ]; then
echo "tarball ${tarball} exists, skipping download"
else
curl -sOL -C - https://storage.googleapis.com/cri-o/artifacts/${tarball}
fi
rm -rf cri-o
tar zxf $tarball
cd cri-o
cp ./bin/crio-{conmon,conmonrs,crun} /usr/bin
crio_crun_v=$(crio-crun --version | sed -ne 's/^crun version[[:space:]]\+\(.\+\)/\1/p')
echo "new crun version: $crio_crun_v"

fname=$(mktemp -t crio-crun-conf.XXXXX)
cat >$fname <<EOF
[crio.runtime]
default_runtime = "crun"
[crio.runtime.runtimes.crun]
runtime_path = "/usr/bin/crio-crun"
monitor_path = "/usr/bin/crio-conmon"
allowed_annotations = [
"io.containers.trace-syscall",
]
EOF
chmod go+r ${fname}
chown root:root ${fname}
cp $fname /etc/crio/crio.conf.d/10-crun.conf
systemctl start crio
systemctl start kubelet
2 changes: 1 addition & 1 deletion pkg/cgidmap/cri.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func criResolve(m Map, id unmappedID) error {
}

path := filepath.Join(cgRoot, cgPath)
cgID, err := cgroups.GetCgroupIdFromPath(path)
cgID, err := cgroups.GetCgroupIDFromSubCgroup(path)
if err != nil {
return err
}
Expand Down
42 changes: 42 additions & 0 deletions pkg/cgroups/cgroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -822,3 +822,45 @@ func CgroupIDFromPID(pid uint32) (uint64, error) {

return cgID, nil
}

// GetCgroupIDFromSubCgroup deals with some idiosyncrancies of container runtimes
//
// Typically, the container processes run in the cgroup path specified in the OCI spec under
// cgroupsPath. crun, however, is an exception because it uses another directory (called subgroup)
// under the cgroupsPath:
// https://github.com/containers/crun/blob/main/crun.1.md#runocisystemdsubgroupsubgroup.
//
// This function deals with this by checking for a child directory. If it finds one (and only one)
// it uses the cgroup id from the child.
func GetCgroupIDFromSubCgroup(p string) (uint64, error) {

getSingleDirChild := func() string {
var ret string
dentries, err := os.ReadDir(p)
if err != nil {
return ""
}
for _, dentry := range dentries {
if !dentry.IsDir() {
continue
}

if ret == "" {
ret = dentry.Name()
} else {
// NB: there are more than one directories :( nothing reasonable we
// can do at this point bail out
return ""
}
}

return ret
}

child := getSingleDirChild()
if child != "" {
p = filepath.Join(p, child)
}

return GetCgroupIdFromPath(p)
}
2 changes: 1 addition & 1 deletion pkg/policyfilter/cgroupid.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@ func (s *cgfsFinder) findCgroupID(podID PodID, containerID string) (CgroupID, er
} else if err != nil {
return CgroupID(0), err
}
cgid, err := cgroups.GetCgroupIdFromPath(path)
cgid, err := cgroups.GetCgroupIDFromSubCgroup(path)
return CgroupID(cgid), err
}
2 changes: 1 addition & 1 deletion pkg/rthooks/args.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func (arg *CreateContainerArg) CgroupID() (uint64, error) {
}

path := filepath.Join(cgRoot, cgPath)
cgID, err := cgroups.GetCgroupIdFromPath(path)
cgID, err := cgroups.GetCgroupIDFromSubCgroup(path)
if err != nil {
return 0, err
}
Expand Down

0 comments on commit b1e8c44

Please sign in to comment.