From 79a52b4349ead396cb70910d456de7ff1f118943 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Mon, 24 Apr 2023 15:49:19 -0700 Subject: [PATCH] libct/cg/sd: use systemd version when generating dev props Commit 343951a22b58c38feb added a call to os.Stat for the device path when generating systemd device properties, to avoid systemd warning for non-existing devices. The idea was, since systemd uses stat(2) to look up device properties for a given path, it will fail anyway. In addition, this allowed to suppress a warning like this from systemd: > Couldn't stat device /dev/char/10:200 NOTE that this was done because: - systemd could not add the rule anyway; - runs puts its own set of rules on top of what systemd does. Apparently, the above change broke some setups, resulting in inability to use e.g. /dev/null inside a container. My guess is this is because in cgroup v2 we add a second eBPF program, which is not used if the first one (added by systemd) returns "access denied". Next, commit 3b9582895b8685 fixed that by adding a call to os.Stat for "/sys/"+path (meaning, if "/dev/char/10:200" does not exist, we retry with "/sys/dev/char/10:200", and if it exists, proceed with adding a device rule with the original (non-"/sys") path). How that second fix ever worked was a mystery, because the path we gave to systemd still doesn't exist. Well, I think now I know. Since systemd v240 (commit 74c48bf5a8005f20) device access rules specified as /dev/{block|char}/MM:mm are no longer looked up on the filesystem, instead, if possible, those are parsed from the string. So, we need to do different things, depending on systemd version: - for systemd >= v240, use the /dev/{char,block}/MM:mm as is, without doing stat() -- since systemd doesn't do stat() either; - for older version, check if the path exists, and skip passing it on to systemd otherwise. - the check for /sys/dev/{block,char}/MM:mm is not needed in either case. Pass the systemd version to the function that generates the rules, and fix it accordingly. Signed-off-by: Kir Kolyshkin (cherry picked from commit d7208f59105e079ba037aea05f9d0603ba7f779f) Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/systemd/common.go | 32 +++++++++++--------------- libcontainer/cgroups/systemd/v1.go | 2 +- libcontainer/cgroups/systemd/v2.go | 2 +- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/libcontainer/cgroups/systemd/common.go b/libcontainer/cgroups/systemd/common.go index 50746ae0c56..5d561facebc 100644 --- a/libcontainer/cgroups/systemd/common.go +++ b/libcontainer/cgroups/systemd/common.go @@ -177,7 +177,7 @@ func allowAllDevices() []systemdDbus.Property { // generateDeviceProperties takes the configured device rules and generates a // corresponding set of systemd properties to configure the devices correctly. -func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, error) { +func generateDeviceProperties(r *configs.Resources, sdVer int) ([]systemdDbus.Property, error) { if r.SkipDevices { return nil, nil } @@ -238,9 +238,10 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err // trickery to convert things: // // * Concrete rules with non-wildcard major/minor numbers have to use - // /dev/{block,char} paths. This is slightly odd because it means - // that we cannot add whitelist rules for devices that don't exist, - // but there's not too much we can do about that. + // /dev/{block,char}/MAJOR:minor paths. Before v240, systemd uses + // stat(2) on such paths to look up device properties, meaning we + // cannot add whitelist rules for devices that don't exist. Since v240, + // device properties are parsed from the path string. // // However, path globbing is not support for path-based rules so we // need to handle wildcards in some other manner. @@ -288,21 +289,14 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err case devices.CharDevice: entry.Path = fmt.Sprintf("/dev/char/%d:%d", rule.Major, rule.Minor) } - // systemd will issue a warning if the path we give here doesn't exist. - // Since all of this logic is best-effort anyway (we manually set these - // rules separately to systemd) we can safely skip entries that don't - // have a corresponding path. - if _, err := os.Stat(entry.Path); err != nil { - // Also check /sys/dev so that we don't depend on /dev/{block,char} - // being populated. (/dev/{block,char} is populated by udev, which - // isn't strictly required for systemd). Ironically, this happens most - // easily when starting containerd within a runc created container - // itself. - - // We don't bother with securejoin here because we create entry.Path - // right above here, so we know it's safe. - if _, err := os.Stat("/sys" + entry.Path); err != nil { - logrus.Warnf("skipping device %s for systemd: %s", entry.Path, err) + if sdVer < 240 { + // Old systemd versions use stat(2) on path to find out device major:minor + // numbers and type. If the path doesn't exist, it will not add the rule, + // emitting a warning instead. + // Since all of this logic is best-effort anyway (we manually set these + // rules separately to systemd) we can safely skip entries that don't + // have a corresponding path. + if _, err := os.Stat(entry.Path); err != nil { continue } } diff --git a/libcontainer/cgroups/systemd/v1.go b/libcontainer/cgroups/systemd/v1.go index 046c3056fba..fe036b3bda5 100644 --- a/libcontainer/cgroups/systemd/v1.go +++ b/libcontainer/cgroups/systemd/v1.go @@ -77,7 +77,7 @@ var legacySubsystems = []subsystem{ func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) { var properties []systemdDbus.Property - deviceProperties, err := generateDeviceProperties(r) + deviceProperties, err := generateDeviceProperties(r, systemdVersion(cm)) if err != nil { return nil, err } diff --git a/libcontainer/cgroups/systemd/v2.go b/libcontainer/cgroups/systemd/v2.go index 94d24ee4502..919e5632f34 100644 --- a/libcontainer/cgroups/systemd/v2.go +++ b/libcontainer/cgroups/systemd/v2.go @@ -182,7 +182,7 @@ func genV2ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]syst // aren't the end of the world, but it is a bit concerning. However // it's unclear if systemd removes all eBPF programs attached when // doing SetUnitProperties... - deviceProperties, err := generateDeviceProperties(r) + deviceProperties, err := generateDeviceProperties(r, systemdVersion(cm)) if err != nil { return nil, err }