Skip to content

Commit

Permalink
Fix cgroup2 mount for rootless case
Browse files Browse the repository at this point in the history
In case of rootless, cgroup2 mount is not possible (see [1] for more
details), so since commit 9c81440 runc bind-mounts the whole
/sys/fs/cgroup into container.

Problem is, if cgroupns is enabled, /sys/fs/cgroup inside the container
is supposed to show the cgroup files for this cgroup, not the root one.

The fix is to pass through and use the cgroup path in case cgroup2
mount failed, cgroupns is enabled, and the path is non-empty.

Surely this requires the /sys/fs/cgroup mount in the spec, so modify
runc spec --rootless to keep it.

Before:

	$ ./runc run aaa
	# find /sys/fs/cgroup/ -type d
	/sys/fs/cgroup
	/sys/fs/cgroup/user.slice
	/sys/fs/cgroup/user.slice/user-1000.slice
	/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service
	...
	# ls -l /sys/fs/cgroup/cgroup.controllers
	-r--r--r--    1 nobody   nogroup          0 Feb 24 02:22 /sys/fs/cgroup/cgroup.controllers
	# wc -w /sys/fs/cgroup/cgroup.procs
	142 /sys/fs/cgroup/cgroup.procs
	# cat /sys/fs/cgroup/memory.current
	cat: can't open '/sys/fs/cgroup/memory.current': No such file or directory

After:

	# find /sys/fs/cgroup/ -type d
	/sys/fs/cgroup/
	# ls -l /sys/fs/cgroup/cgroup.controllers
	-r--r--r--    1 root     root             0 Feb 24 02:43 /sys/fs/cgroup/cgroup.controllers
	# wc -w /sys/fs/cgroup/cgroup.procs
	2 /sys/fs/cgroup/cgroup.procs
	# cat /sys/fs/cgroup/memory.current
	577536

[1] opencontainers#2158

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
  • Loading branch information
kolyshkin committed Apr 20, 2021
1 parent 3826db1 commit ff692f2
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 16 deletions.
3 changes: 3 additions & 0 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,9 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
if len(process.Rlimits) > 0 {
cfg.Rlimits = process.Rlimits
}
if cgroups.IsCgroup2UnifiedMode() {
cfg.Cgroup2Path = c.cgroupManager.Path("")
}

return cfg
}
Expand Down
1 change: 1 addition & 0 deletions libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ type initConfig struct {
RootlessEUID bool `json:"rootless_euid,omitempty"`
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
SpecState *specs.State `json:"spec_state,omitempty"`
Cgroup2Path string `json:"cgroup2_path,omitempty"`
}

type initer interface {
Expand Down
28 changes: 21 additions & 7 deletions libcontainer/rootfs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,11 @@ import (
const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV

type mountConfig struct {
root string
label string
cgroupns bool
root string
label string
cgroup2Path string
rootlessCgroups bool
cgroupns bool
}

// needsSetupDev returns true if /dev needs to be set up.
Expand All @@ -56,9 +58,11 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
}

mountConfig := &mountConfig{
root: config.Rootfs,
label: config.MountLabel,
cgroupns: config.Namespaces.Contains(configs.NEWCGROUP),
root: config.Rootfs,
label: config.MountLabel,
cgroup2Path: iConfig.Cgroup2Path,
rootlessCgroups: iConfig.RootlessCgroups,
cgroupns: config.Namespaces.Contains(configs.NEWCGROUP),
}
setupDev := needsSetupDev(config)
for _, m := range config.Mounts {
Expand Down Expand Up @@ -307,7 +311,17 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
// when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
if err == unix.EPERM || err == unix.EBUSY {
src := fs2.UnifiedMountpoint
return unix.Mount(src, dest, "", uintptr(m.Flags)|unix.MS_BIND, "")
if c.cgroupns && c.cgroup2Path != "" {
// Emulate cgroupns by bind-mounting
// the container cgroup path rather than
// the whole /sys/fs/cgroup.
src = c.cgroup2Path
}
err = unix.Mount(src, dest, "", uintptr(m.Flags)|unix.MS_BIND, "")
if err == unix.ENOENT && c.rootlessCgroups {
err = nil
}
return err
}
return err
}
Expand Down
18 changes: 9 additions & 9 deletions libcontainer/specconv/example.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package specconv

import (
"os"
"path/filepath"
"strings"

"github.com/opencontainers/runc/libcontainer/cgroups"
Expand Down Expand Up @@ -200,8 +201,14 @@ func ToRootless(spec *specs.Spec) {
// Fix up mounts.
var mounts []specs.Mount
for _, mount := range spec.Mounts {
// Ignore all mounts that are under /sys.
if strings.HasPrefix(mount.Destination, "/sys") {
// Replace the /sys mount with an rbind.
if filepath.Clean(mount.Destination) == "/sys" {
mounts = append(mounts, specs.Mount{
Source: "/sys",
Destination: "/sys",
Type: "none",
Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
})
continue
}

Expand All @@ -216,13 +223,6 @@ func ToRootless(spec *specs.Spec) {
mount.Options = options
mounts = append(mounts, mount)
}
// Add the sysfs mount as an rbind.
mounts = append(mounts, specs.Mount{
Source: "/sys",
Destination: "/sys",
Type: "none",
Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
})
spec.Mounts = mounts

// Remove cgroup settings.
Expand Down

0 comments on commit ff692f2

Please sign in to comment.