diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 8cc8b343a96..5e89285df0a 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -626,6 +626,9 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig { if len(process.Rlimits) > 0 { cfg.Rlimits = process.Rlimits } + if cgroups.IsCgroup2UnifiedMode() { + cfg.Cgroup2Path = c.cgroupManager.Path("") + } return cfg } diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index d6b50ff3b60..798e7a84d35 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -70,6 +70,7 @@ type initConfig struct { RootlessEUID bool `json:"rootless_euid,omitempty"` RootlessCgroups bool `json:"rootless_cgroups,omitempty"` SpecState *specs.State `json:"spec_state,omitempty"` + Cgroup2Path string `json:"cgroup2_path,omitempty"` } type initer interface { diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index 1df9c2f5504..1d8a5a03601 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -31,9 +31,11 @@ import ( const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV type mountConfig struct { - root string - label string - cgroupns bool + root string + label string + cgroup2Path string + rootlessCgroups bool + cgroupns bool } // needsSetupDev returns true if /dev needs to be set up. @@ -56,9 +58,11 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) { } mountConfig := &mountConfig{ - root: config.Rootfs, - label: config.MountLabel, - cgroupns: config.Namespaces.Contains(configs.NEWCGROUP), + root: config.Rootfs, + label: config.MountLabel, + cgroup2Path: iConfig.Cgroup2Path, + rootlessCgroups: iConfig.RootlessCgroups, + cgroupns: config.Namespaces.Contains(configs.NEWCGROUP), } setupDev := needsSetupDev(config) for _, m := range config.Mounts { @@ -307,7 +311,17 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error { // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158) if err == unix.EPERM || err == unix.EBUSY { src := fs2.UnifiedMountpoint - return unix.Mount(src, dest, "", uintptr(m.Flags)|unix.MS_BIND, "") + if c.cgroupns && c.cgroup2Path != "" { + // Emulate cgroupns by bind-mounting + // the container cgroup path rather than + // the whole /sys/fs/cgroup. + src = c.cgroup2Path + } + err = unix.Mount(src, dest, "", uintptr(m.Flags)|unix.MS_BIND, "") + if err == unix.ENOENT && c.rootlessCgroups { + err = nil + } + return err } return err } diff --git a/libcontainer/specconv/example.go b/libcontainer/specconv/example.go index 8a201bc78dd..56bab3bfbfa 100644 --- a/libcontainer/specconv/example.go +++ b/libcontainer/specconv/example.go @@ -2,6 +2,7 @@ package specconv import ( "os" + "path/filepath" "strings" "github.com/opencontainers/runc/libcontainer/cgroups" @@ -200,8 +201,14 @@ func ToRootless(spec *specs.Spec) { // Fix up mounts. var mounts []specs.Mount for _, mount := range spec.Mounts { - // Ignore all mounts that are under /sys. - if strings.HasPrefix(mount.Destination, "/sys") { + // Replace the /sys mount with an rbind. + if filepath.Clean(mount.Destination) == "/sys" { + mounts = append(mounts, specs.Mount{ + Source: "/sys", + Destination: "/sys", + Type: "none", + Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"}, + }) continue } @@ -216,13 +223,6 @@ func ToRootless(spec *specs.Spec) { mount.Options = options mounts = append(mounts, mount) } - // Add the sysfs mount as an rbind. - mounts = append(mounts, specs.Mount{ - Source: "/sys", - Destination: "/sys", - Type: "none", - Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"}, - }) spec.Mounts = mounts // Remove cgroup settings.