From 9389a446a352a3fb35d7f1edd00716abe2814d8a Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Thu, 1 Jul 2021 12:55:08 +1000 Subject: [PATCH] cgroupv2: ebpf: ignore inaccessible existing programs This is necessary in order for runc to be able to configure device cgroups with --systemd-cgroup on distributions that have very strict SELinux policies such as openSUSE MicroOS[1]. The core issue here is that systemd is adding its own BPF policy that has an SELinux label such that runc cannot interact with it. In order to work around this, we can just ignore the policy -- in theory this behaviour is not correct but given that the most obvious case (--systemd-cgroup) will still handle updates correctly, this logic is reasonable. [1]: https://bugzilla.suse.com/show_bug.cgi?id=1182428 Fixes: d0f2c25f521e ("cgroup2: devices: replace all existing filters when attaching") Signed-off-by: Aleksa Sarai (cherry picked from commit 57e3c54182db6119c0081e841ec98f70db7b622d) Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/ebpf/ebpf_linux.go | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/libcontainer/cgroups/ebpf/ebpf_linux.go b/libcontainer/cgroups/ebpf/ebpf_linux.go index fa1b04e2599..5fc230bfa05 100644 --- a/libcontainer/cgroups/ebpf/ebpf_linux.go +++ b/libcontainer/cgroups/ebpf/ebpf_linux.go @@ -59,13 +59,26 @@ func findAttachedCgroupDeviceFilters(dirFd int) ([]*ebpf.Program, error) { // Convert the ids to program handles. progIds = progIds[:size] - programs := make([]*ebpf.Program, len(progIds)) - for idx, progId := range progIds { + programs := make([]*ebpf.Program, 0, len(progIds)) + for _, progId := range progIds { program, err := ebpf.NewProgramFromID(ebpf.ProgramID(progId)) if err != nil { + // We skip over programs that give us -EACCES or -EPERM. This + // is necessary because there may be BPF programs that have + // been attached (such as with --systemd-cgroup) which have an + // LSM label that blocks us from interacting with the program. + // + // Because additional BPF_CGROUP_DEVICE programs only can add + // restrictions, there's no real issue with just ignoring these + // programs (and stops runc from breaking on distributions with + // very strict SELinux policies). + if errors.Is(err, os.ErrPermission) { + logrus.Debugf("ignoring existing CGROUP_DEVICE program (prog_id=%v) which cannot be accessed by runc -- likely due to LSM policy: %v", progId, err) + continue + } return nil, fmt.Errorf("cannot fetch program from id: %w", err) } - programs[idx] = program + programs = append(programs, program) } runtime.KeepAlive(progIds) return programs, nil