Skip to content

Commit

Permalink
Merge pull request #774 from cyphar/rootless-containers
Browse files Browse the repository at this point in the history
Rootless Containers
  • Loading branch information
Mrunal Patel authored Mar 27, 2017
2 parents ef9a4b3 + ba38383 commit 653207b
Show file tree
Hide file tree
Showing 46 changed files with 1,257 additions and 306 deletions.
7 changes: 7 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ RUN echo 'deb http://httpredir.debian.org/debian jessie-backports main' > /etc/a
RUN apt-get update && apt-get install -y \
build-essential \
curl \
sudo \
gawk \
iptables \
jq \
Expand All @@ -22,6 +23,12 @@ RUN apt-get update && apt-get install -y \
--no-install-recommends \
&& apt-get clean

# Add a dummy user for the rootless integration tests. While runC does
# not require an entry in /etc/passwd to operate, one of the tests uses
# `git clone` -- and `git clone` does not allow you to clone a
# repository if the current uid does not have an entry in /etc/passwd.
RUN useradd -u1000 -m -d/home/rootless -s/bin/bash rootless

# install bats
RUN cd /tmp \
&& git clone https://github.com/sstephenson/bats.git \
Expand Down
13 changes: 10 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

SOURCES := $(shell find . 2>&1 | grep -E '.*\.(c|h|go)$$')
PREFIX := $(DESTDIR)/usr/local
BINDIR := $(PREFIX)/sbin
BINDIR := $(PREFIX)/bin
GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g")
RUNC_IMAGE := runc_dev$(if $(GIT_BRANCH_CLEAN),:$(GIT_BRANCH_CLEAN))
Expand Down Expand Up @@ -79,10 +79,10 @@ runcimage:
docker build -t $(RUNC_IMAGE) .

test:
make unittest integration
make unittest integration rootlessintegration

localtest:
make localunittest localintegration
make localunittest localintegration localrootlessintegration

unittest: runcimage
docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localunittest
Expand All @@ -96,6 +96,13 @@ integration: runcimage
localintegration: all
bats -t tests/integration${TESTFLAGS}

rootlessintegration: runcimage
docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) --cap-drop=ALL -u rootless $(RUNC_IMAGE) make localintegration

# FIXME: This should not be separate from rootlessintegration's method of running.
localrootlessintegration: all
sudo -u rootless -H PATH="${PATH}" bats -t tests/integration${TESTFLAGS}

shell: all
docker run -e TESTFLAGS -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) bash

Expand Down
5 changes: 5 additions & 0 deletions checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ checkpointed.`,
if err := checkArgs(context, 1, exactArgs); err != nil {
return err
}
// XXX: Currently this is untested with rootless containers.
if isRootless() {
return fmt.Errorf("runc checkpoint requires root")
}

container, err := getContainer(context)
if err != nil {
return err
Expand Down
3 changes: 0 additions & 3 deletions exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,6 @@ following will output a list of processes running in the container:
if err := checkArgs(context, 1, minArgs); err != nil {
return err
}
if os.Geteuid() != 0 {
return fmt.Errorf("runc should be run as root")
}
if err := revisePidFile(context); err != nil {
return err
}
Expand Down
24 changes: 5 additions & 19 deletions libcontainer/cgroups/fs/apply_raw.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,25 +267,8 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
}, nil
}

func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) {
// Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating
// process could in container and shared pid namespace with host, and
// /proc/1/cgroup could point to whole other world of cgroups.
initPath, err := cgroups.GetThisCgroupDir(subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see pathes from host, which don't exist in container.
relDir, err := filepath.Rel(root, initPath)
if err != nil {
return "", err
}
return filepath.Join(mountpoint, relDir), nil
}

func (raw *cgroupData) path(subsystem string) (string, error) {
mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem)
mnt, err := cgroups.FindCgroupMountpoint(subsystem)
// If we didn't mount the subsystem, there is no point we make the path.
if err != nil {
return "", err
Expand All @@ -297,7 +280,10 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
}

parentPath, err := raw.parentPath(subsystem, mnt, root)
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
// process could in container and shared pid namespace with host, and
// /proc/1/cgroup could point to whole other world of cgroups.
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
if err != nil {
return "", err
}
Expand Down
128 changes: 128 additions & 0 deletions libcontainer/cgroups/rootless/rootless.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
// +build linux

package rootless

import (
"fmt"

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
)

// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code
// needlessly. We should probably export this list.

var subsystems = []subsystem{
&fs.CpusetGroup{},
&fs.DevicesGroup{},
&fs.MemoryGroup{},
&fs.CpuGroup{},
&fs.CpuacctGroup{},
&fs.PidsGroup{},
&fs.BlkioGroup{},
&fs.HugetlbGroup{},
&fs.NetClsGroup{},
&fs.NetPrioGroup{},
&fs.PerfEventGroup{},
&fs.FreezerGroup{},
&fs.NameGroup{GroupName: "name=systemd"},
}

type subsystem interface {
// Name returns the name of the subsystem.
Name() string

// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
GetStats(path string, stats *cgroups.Stats) error
}

// The noop cgroup manager is used for rootless containers, because we currently
// cannot manage cgroups if we are in a rootless setup. This manager is chosen
// by factory if we are in rootless mode. We error out if any cgroup options are
// set in the config -- this may change in the future with upcoming kernel features
// like the cgroup namespace.

type Manager struct {
Cgroups *configs.Cgroup
Paths map[string]string
}

func (m *Manager) Apply(pid int) error {
// If there are no cgroup settings, there's nothing to do.
if m.Cgroups == nil {
return nil
}

// We can't set paths.
// TODO(cyphar): Implement the case where the runner of a rootless container
// owns their own cgroup, which would allow us to set up a
// cgroup for each path.
if m.Cgroups.Paths != nil {
return fmt.Errorf("cannot change cgroup path in rootless container")
}

// We load the paths into the manager.
paths := make(map[string]string)
for _, sys := range subsystems {
name := sys.Name()

path, err := cgroups.GetOwnCgroupPath(name)
if err != nil {
// Ignore paths we couldn't resolve.
continue
}

paths[name] = path
}

m.Paths = paths
return nil
}

func (m *Manager) GetPaths() map[string]string {
return m.Paths
}

func (m *Manager) Set(container *configs.Config) error {
// We have to re-do the validation here, since someone might decide to
// update a rootless container.
return validate.New().Validate(container)
}

func (m *Manager) GetPids() ([]int, error) {
dir, err := cgroups.GetOwnCgroupPath("devices")
if err != nil {
return nil, err
}
return cgroups.GetPids(dir)
}

func (m *Manager) GetAllPids() ([]int, error) {
dir, err := cgroups.GetOwnCgroupPath("devices")
if err != nil {
return nil, err
}
return cgroups.GetAllPids(dir)
}

func (m *Manager) GetStats() (*cgroups.Stats, error) {
// TODO(cyphar): We can make this work if we figure out a way to allow usage
// of cgroups with a rootless container. While this doesn't
// actually require write access to a cgroup directory, the
// statistics are not useful if they can be affected by
// non-container processes.
return nil, fmt.Errorf("cannot get cgroup stats in rootless container")
}

func (m *Manager) Freeze(state configs.FreezerState) error {
// TODO(cyphar): We can make this work if we figure out a way to allow usage
// of cgroups with a rootless container.
return fmt.Errorf("cannot use freezer cgroup in rootless container")
}

func (m *Manager) Destroy() error {
// We don't have to do anything here because we didn't do any setup.
return nil
}
2 changes: 1 addition & 1 deletion libcontainer/cgroups/systemd/apply_systemd.go
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
return "", err
}

initPath, err := cgroups.GetInitCgroupDir(subsystem)
initPath, err := cgroups.GetInitCgroup(subsystem)
if err != nil {
return "", err
}
Expand Down
41 changes: 37 additions & 4 deletions libcontainer/cgroups/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ type Mount struct {
Subsystems []string
}

func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
if len(m.Subsystems) == 0 {
return "", fmt.Errorf("no subsystem for mount")
}
Expand Down Expand Up @@ -203,8 +203,8 @@ func GetAllSubsystems() ([]string, error) {
return subsystems, nil
}

// GetThisCgroupDir returns the relative path to the cgroup docker is running in.
func GetThisCgroupDir(subsystem string) (string, error) {
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
func GetOwnCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return "", err
Expand All @@ -213,8 +213,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
return getControllerPath(subsystem, cgroups)
}

func GetInitCgroupDir(subsystem string) (string, error) {
func GetOwnCgroupPath(subsystem string) (string, error) {
cgroup, err := GetOwnCgroup(subsystem)
if err != nil {
return "", err
}

return getCgroupPathHelper(subsystem, cgroup)
}

func GetInitCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
if err != nil {
return "", err
Expand All @@ -223,6 +231,31 @@ func GetInitCgroupDir(subsystem string) (string, error) {
return getControllerPath(subsystem, cgroups)
}

func GetInitCgroupPath(subsystem string) (string, error) {
cgroup, err := GetInitCgroup(subsystem)
if err != nil {
return "", err
}

return getCgroupPathHelper(subsystem, cgroup)
}

func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
if err != nil {
return "", err
}

// This is needed for nested containers, because in /proc/self/cgroup we
// see pathes from host, which don't exist in container.
relCgroup, err := filepath.Rel(root, cgroup)
if err != nil {
return "", err
}

return filepath.Join(mnt, relCgroup), nil
}

func readProcsFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, CgroupProcesses))
if err != nil {
Expand Down
3 changes: 3 additions & 0 deletions libcontainer/configs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,9 @@ type Config struct {
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
// callers keyring in this case.
NoNewKeyring bool `json:"no_new_keyring"`

// Rootless specifies whether the container is a rootless container.
Rootless bool `json:"rootless"`
}

type Hooks struct {
Expand Down
Loading

0 comments on commit 653207b

Please sign in to comment.