Skip to content

Commit

Permalink
runc create/run: warn on rootless + shared pidns + no cgroup
Browse files Browse the repository at this point in the history
Shared pid namespace means `runc kill` (or `runc delete -f`) have to
kill all container process, not just init. To do so, it needs a cgroup
to read the PIDs from.

If there is no cgroup, it's impossible to kill all container processes.

Therefore, such configuration is bad and should not be allowed. To keep
backward compatibility, though, let's merely warn about this for now.

Alas, the only way to know if cgroup access is available is by returning
an error from Manager.Apply. Amend fs cgroup managers to do so (systemd
doesn't need it, since v1 can't work with rootless, and cgroup v2 does
not have a special rootless case).

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
  • Loading branch information
kolyshkin committed Sep 13, 2024
1 parent 49bc17f commit e930bf9
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 4 deletions.
5 changes: 5 additions & 0 deletions libcontainer/cgroups/cgroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ var (
// is not configured to set device rules.
ErrDevicesUnsupported = errors.New("cgroup manager is not configured to set device rules")

// ErrRootless is returned by [Manager.Apply] when there is an error
// creating cgroup directory, and cgroup.Rootless is set. In general,
// this error is to be ignored.
ErrRootless = errors.New("cgroup manager can not access cgroup (rootless container)")

// DevicesSetV1 and DevicesSetV2 are functions to set devices for
// cgroup v1 and v2, respectively. Unless
// [github.com/opencontainers/runc/libcontainer/cgroups/devices]
Expand Down
5 changes: 3 additions & 2 deletions libcontainer/cgroups/fs/fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func isIgnorableError(rootless bool, err error) bool {
return false
}

func (m *Manager) Apply(pid int) (err error) {
func (m *Manager) Apply(pid int) (retErr error) {
m.mu.Lock()
defer m.mu.Unlock()

Expand All @@ -129,14 +129,15 @@ func (m *Manager) Apply(pid int) (err error) {
// later by Set, which fails with a friendly error (see
// if path == "" in Set).
if isIgnorableError(c.Rootless, err) && c.Path == "" {
retErr = cgroups.ErrRootless
delete(m.paths, name)
continue
}
return err
}

}
return nil
return retErr
}

func (m *Manager) Destroy() error {
Expand Down
2 changes: 1 addition & 1 deletion libcontainer/cgroups/fs2/fs2.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ func (m *Manager) Apply(pid int) error {
if m.config.Rootless {
if m.config.Path == "" {
if blNeed, nErr := needAnyControllers(m.config.Resources); nErr == nil && !blNeed {
return nil
return cgroups.ErrRootless
}
return fmt.Errorf("rootless needs no limits + no cgrouppath when no permission is granted for cgroups: %w", err)
}
Expand Down
13 changes: 12 additions & 1 deletion libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,18 @@ func (p *initProcess) start() (retErr error) {
// cgroup. We don't need to worry about not doing this and not being root
// because we'd be using the rootless cgroup manager in that case.
if err := p.manager.Apply(p.pid()); err != nil {
return fmt.Errorf("unable to apply cgroup configuration: %w", err)
if errors.Is(err, cgroups.ErrRootless) {
// ErrRootless is to be ignored except when the
// container doesn't have private pidns.
if !p.config.Config.Namespaces.IsPrivate(configs.NEWPID) {
// TODO: make this an error in runc 1.3.
logrus.Warn("Creating a rootless container with no cgroup and no private pid namespace. " +
"Such configuration is strongly discouraged (as it is impossible to properly kill all container's processes) " +
"and will result in an error in a future runc version.")
}
} else {
return fmt.Errorf("unable to apply cgroup configuration: %w", err)
}
}
if p.intelRdtManager != nil {
if err := p.intelRdtManager.Apply(p.pid()); err != nil {
Expand Down
27 changes: 27 additions & 0 deletions tests/integration/create.bats
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,30 @@ function teardown() {

testcontainer test_busybox running
}

# https://github.com/opencontainers/runc/issues/4394#issuecomment-2334926257
@test "runc create [shared pidns + rootless]" {
# Remove pidns so it's shared with the host.
update_config ' .linux.namespaces -= [{"type": "pid"}]'
if [ $EUID -ne 0 ]; then
if rootless_cgroup; then
set_cgroups_path
fi
# Can't mount real /proc when rootless + no pidns,
# so change it to a bind-mounted one from the host.
update_config ' .mounts |= map((select(.type == "proc")
| .type = "none"
| .source = "/proc"
| .options = ["rbind", "nosuid", "nodev", "noexec"]
) // .)'
fi

exp="Such configuration is strongly discouraged"
runc create --console-socket "$CONSOLE_SOCKET" test
[ "$status" -eq 0 ]
if [ $EUID -ne 0 ] && ! rootless_cgroup; then
[[ "$output" = *"$exp"* ]]
else
[[ "$output" != *"$exp"* ]]
fi
}

0 comments on commit e930bf9

Please sign in to comment.