From e930bf9d8a2cd9f23b62ff762d9d8bd4bb318fc0 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 11 Sep 2024 17:57:01 -0700 Subject: [PATCH] runc create/run: warn on rootless + shared pidns + no cgroup Shared pid namespace means `runc kill` (or `runc delete -f`) have to kill all container process, not just init. To do so, it needs a cgroup to read the PIDs from. If there is no cgroup, it's impossible to kill all container processes. Therefore, such configuration is bad and should not be allowed. To keep backward compatibility, though, let's merely warn about this for now. Alas, the only way to know if cgroup access is available is by returning an error from Manager.Apply. Amend fs cgroup managers to do so (systemd doesn't need it, since v1 can't work with rootless, and cgroup v2 does not have a special rootless case). Signed-off-by: Kir Kolyshkin --- libcontainer/cgroups/cgroups.go | 5 +++++ libcontainer/cgroups/fs/fs.go | 5 +++-- libcontainer/cgroups/fs2/fs2.go | 2 +- libcontainer/process_linux.go | 13 ++++++++++++- tests/integration/create.bats | 27 +++++++++++++++++++++++++++ 5 files changed, 48 insertions(+), 4 deletions(-) diff --git a/libcontainer/cgroups/cgroups.go b/libcontainer/cgroups/cgroups.go index 811f2d26e00..53e194c74dc 100644 --- a/libcontainer/cgroups/cgroups.go +++ b/libcontainer/cgroups/cgroups.go @@ -11,6 +11,11 @@ var ( // is not configured to set device rules. ErrDevicesUnsupported = errors.New("cgroup manager is not configured to set device rules") + // ErrRootless is returned by [Manager.Apply] when there is an error + // creating cgroup directory, and cgroup.Rootless is set. In general, + // this error is to be ignored. + ErrRootless = errors.New("cgroup manager can not access cgroup (rootless container)") + // DevicesSetV1 and DevicesSetV2 are functions to set devices for // cgroup v1 and v2, respectively. Unless // [github.com/opencontainers/runc/libcontainer/cgroups/devices] diff --git a/libcontainer/cgroups/fs/fs.go b/libcontainer/cgroups/fs/fs.go index d2decb127ca..ba15bfc4077 100644 --- a/libcontainer/cgroups/fs/fs.go +++ b/libcontainer/cgroups/fs/fs.go @@ -105,7 +105,7 @@ func isIgnorableError(rootless bool, err error) bool { return false } -func (m *Manager) Apply(pid int) (err error) { +func (m *Manager) Apply(pid int) (retErr error) { m.mu.Lock() defer m.mu.Unlock() @@ -129,6 +129,7 @@ func (m *Manager) Apply(pid int) (err error) { // later by Set, which fails with a friendly error (see // if path == "" in Set). if isIgnorableError(c.Rootless, err) && c.Path == "" { + retErr = cgroups.ErrRootless delete(m.paths, name) continue } @@ -136,7 +137,7 @@ func (m *Manager) Apply(pid int) (err error) { } } - return nil + return retErr } func (m *Manager) Destroy() error { diff --git a/libcontainer/cgroups/fs2/fs2.go b/libcontainer/cgroups/fs2/fs2.go index b1be7df5ccc..93f81bf8dae 100644 --- a/libcontainer/cgroups/fs2/fs2.go +++ b/libcontainer/cgroups/fs2/fs2.go @@ -71,7 +71,7 @@ func (m *Manager) Apply(pid int) error { if m.config.Rootless { if m.config.Path == "" { if blNeed, nErr := needAnyControllers(m.config.Resources); nErr == nil && !blNeed { - return nil + return cgroups.ErrRootless } return fmt.Errorf("rootless needs no limits + no cgrouppath when no permission is granted for cgroups: %w", err) } diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index 9a2473f716e..4af035283a1 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -580,7 +580,18 @@ func (p *initProcess) start() (retErr error) { // cgroup. We don't need to worry about not doing this and not being root // because we'd be using the rootless cgroup manager in that case. if err := p.manager.Apply(p.pid()); err != nil { - return fmt.Errorf("unable to apply cgroup configuration: %w", err) + if errors.Is(err, cgroups.ErrRootless) { + // ErrRootless is to be ignored except when the + // container doesn't have private pidns. + if !p.config.Config.Namespaces.IsPrivate(configs.NEWPID) { + // TODO: make this an error in runc 1.3. + logrus.Warn("Creating a rootless container with no cgroup and no private pid namespace. " + + "Such configuration is strongly discouraged (as it is impossible to properly kill all container's processes) " + + "and will result in an error in a future runc version.") + } + } else { + return fmt.Errorf("unable to apply cgroup configuration: %w", err) + } } if p.intelRdtManager != nil { if err := p.intelRdtManager.Apply(p.pid()); err != nil { diff --git a/tests/integration/create.bats b/tests/integration/create.bats index 938ac8b63dc..edb0aa5f5be 100644 --- a/tests/integration/create.bats +++ b/tests/integration/create.bats @@ -81,3 +81,30 @@ function teardown() { testcontainer test_busybox running } + +# https://github.com/opencontainers/runc/issues/4394#issuecomment-2334926257 +@test "runc create [shared pidns + rootless]" { + # Remove pidns so it's shared with the host. + update_config ' .linux.namespaces -= [{"type": "pid"}]' + if [ $EUID -ne 0 ]; then + if rootless_cgroup; then + set_cgroups_path + fi + # Can't mount real /proc when rootless + no pidns, + # so change it to a bind-mounted one from the host. + update_config ' .mounts |= map((select(.type == "proc") + | .type = "none" + | .source = "/proc" + | .options = ["rbind", "nosuid", "nodev", "noexec"] + ) // .)' + fi + + exp="Such configuration is strongly discouraged" + runc create --console-socket "$CONSOLE_SOCKET" test + [ "$status" -eq 0 ] + if [ $EUID -ne 0 ] && ! rootless_cgroup; then + [[ "$output" = *"$exp"* ]] + else + [[ "$output" != *"$exp"* ]] + fi +}