From c2a4d749aefe230eaf9918a8031f08e40a7e81de Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Mon, 13 Jul 2020 18:24:54 +0900 Subject: [PATCH 1/3] podman: unlock rootless Signed-off-by: Akihiro Suda --- pkg/cluster/internal/providers/podman/provider.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg/cluster/internal/providers/podman/provider.go b/pkg/cluster/internal/providers/podman/provider.go index 99616b0a1e..bcf468fcbc 100644 --- a/pkg/cluster/internal/providers/podman/provider.go +++ b/pkg/cluster/internal/providers/podman/provider.go @@ -60,10 +60,8 @@ func (p *Provider) Provision(status *cli.Status, cfg *config.Cluster) (err error return err } - // kind doesn't work with podman rootless, surface an error if os.Geteuid() != 0 { - p.logger.Errorf("podman provider does not work properly in rootless mode") - os.Exit(1) + p.logger.Warn("support for rootless mode is experimental, some features may not work") } // TODO: validate cfg From 559014906877c6a3efe0fdc31e4b37c25ca7cd48 Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Mon, 13 Jul 2020 18:45:10 +0900 Subject: [PATCH 2/3] base: ignore EACCES from `mount -o remount,ro /sys` `mount -o remount,ro /sys` fails with `permission denied` on rootless Docker and on rootless Podman, but the error is negligible. Signed-off-by: Akihiro Suda --- images/base/files/usr/local/bin/entrypoint | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/images/base/files/usr/local/bin/entrypoint b/images/base/files/usr/local/bin/entrypoint index 327040fb1b..acc0ec6331 100755 --- a/images/base/files/usr/local/bin/entrypoint +++ b/images/base/files/usr/local/bin/entrypoint @@ -38,7 +38,11 @@ fix_mount() { # https://systemd.io/CONTAINER_INTERFACE/ # however, we need other things from `docker run --privileged` ... # and this flag also happens to make /sys rw, amongst other things + # + # EACCES on rootless is negligible. + set +o errexit mount -o remount,ro /sys + set -o errexit echo 'INFO: making mounts shared' >&2 # for mount propagation From 554d2e076b1ea0fb55fcdff5cf8d972933bb78df Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Mon, 13 Jul 2020 19:30:55 +0900 Subject: [PATCH 3/3] containerd: add /etc/containerd/config-rootless.toml `/etc/containerd/config-rootless.toml` is the config for running kind in rootless Docker/Podman. * `ociwrapper` script is used to remove `.linux.resources.devices` from `config.json`, because `.linux.resources.devices` is meaningless on rootless and yet produces errors. Workaround until we get proper fixes in containerd and runc. * restrict_oom_score_adj is set to true to ignore oom_score_adj errors The entrypoint overrides `/etc/containerd/config.toml` with `config-rootless.toml` when running in rootless Docker/Podman. The rootless-ness is detected by comparing `/proc/1/uid_map` with `0 0 4294967295`. Note that Kubernetes needs to be patched as well (see the PR description text) Signed-off-by: Akihiro Suda --- images/base/Dockerfile | 1 + images/base/files/usr/local/bin/entrypoint | 8 +++++ images/base/files/usr/local/bin/ociwrapper | 40 ++++++++++++++++++++++ pkg/build/nodeimage/build_impl.go | 15 +++++++- pkg/build/nodeimage/containerd_config.go | 13 +++++-- 5 files changed, 74 insertions(+), 3 deletions(-) create mode 100755 images/base/files/usr/local/bin/ociwrapper diff --git a/images/base/Dockerfile b/images/base/Dockerfile index d580333835..f6b9ccedf7 100644 --- a/images/base/Dockerfile +++ b/images/base/Dockerfile @@ -70,6 +70,7 @@ RUN echo "Ensuring scripts are executable ..." \ libseccomp2 pigz \ bash ca-certificates curl rsync \ nfs-common \ + jq \ && find /lib/systemd/system/sysinit.target.wants/ -name "systemd-tmpfiles-setup.service" -delete \ && rm -f /lib/systemd/system/multi-user.target.wants/* \ && rm -f /etc/systemd/system/*.wants/* \ diff --git a/images/base/files/usr/local/bin/entrypoint b/images/base/files/usr/local/bin/entrypoint index acc0ec6331..fbb2da1df5 100755 --- a/images/base/files/usr/local/bin/entrypoint +++ b/images/base/files/usr/local/bin/entrypoint @@ -236,6 +236,13 @@ enable_network_magic(){ fi } +select_containerd_config_toml() { + if ! egrep -q "0[[:space:]]+0[[:space:]]+4294967295" /proc/1/uid_map; then + echo "INFO: Detected rootless provider. Overriding /etc/containerd/config.toml with /etc/containerd/config-rootless.toml" >&2 + cp -f /etc/containerd/config-rootless.toml /etc/containerd/config.toml + fi +} + # run pre-init fixups fix_kmsg fix_mount @@ -246,6 +253,7 @@ fix_product_uuid configure_proxy select_iptables enable_network_magic +select_containerd_config_toml # we want the command (expected to be systemd) to be PID1, so exec to it exec "$@" diff --git a/images/base/files/usr/local/bin/ociwrapper b/images/base/files/usr/local/bin/ociwrapper new file mode 100755 index 0000000000..8a7f22fc4c --- /dev/null +++ b/images/base/files/usr/local/bin/ociwrapper @@ -0,0 +1,40 @@ +#!/bin/bash +# +# A wrapper script to remove .linux.resources.devices, which are meaningless in userns. +# Needs jq. +# +# Workaround until we get proper fixes in containerd and runc +set -eu -o pipefail +RUNTIME="runc" + +if egrep -q "0[[:space:]]+0[[:space:]]+4294967295" /proc/self/uid_map; then + # we are not in userns, no need to patch the config + exec $RUNTIME "$@" + exit $? +fi + +bundle="." +bundle_flag="" +# FIXME: support `--bundle=STRING` as well +for f in $@; do + if [[ -n $bundle_flag ]]; then + bundle=$f + break + else + case $f in + -b | --bundle) + bundle_flag=$f + ;; + esac + fi +done + +if [ -f $bundle/config.json ]; then + q="del(.linux.resources.devices) | del(.linux.devices)" + tmp=$(mktemp -d ociwrapper.XXXXXXXX) + jq "$q" <$bundle/config.json >$tmp/config.json + mv $tmp/config.json $bundle/config.json + rm -rf $tmp +fi + +exec $RUNTIME "$@" diff --git a/pkg/build/nodeimage/build_impl.go b/pkg/build/nodeimage/build_impl.go index 3af2671606..8ac4bd1195 100644 --- a/pkg/build/nodeimage/build_impl.go +++ b/pkg/build/nodeimage/build_impl.go @@ -187,7 +187,8 @@ func (c *buildContext) buildImage(dir string) error { return errors.New("failed to find imported pause image") } containerdConfig, err := getContainerdConfig(containerdConfigTemplateData{ - SandboxImage: pauseImage, + SandboxImage: pauseImage, + DefaultRuntimeName: "runc", }) if err != nil { return err @@ -196,6 +197,18 @@ func (c *buildContext) buildImage(dir string) error { if err := createFile(cmder, containerdConfigPath, containerdConfig); err != nil { return err } + containerdRootlessConfig, err := getContainerdConfig(containerdConfigTemplateData{ + SandboxImage: pauseImage, + DefaultRuntimeName: "ociwrapper", + RestrictOOMScoreAdj: true, + }) + if err != nil { + return err + } + const containerdRootlessConfigPath = "/etc/containerd/config-rootless.toml" + if err := createFile(cmder, containerdRootlessConfigPath, containerdRootlessConfig); err != nil { + return err + } // Save the image changes to a new image cmd := exec.Command( diff --git a/pkg/build/nodeimage/containerd_config.go b/pkg/build/nodeimage/containerd_config.go index 910dac9e22..b2fa483b4f 100644 --- a/pkg/build/nodeimage/containerd_config.go +++ b/pkg/build/nodeimage/containerd_config.go @@ -24,7 +24,9 @@ import ( ) type containerdConfigTemplateData struct { - SandboxImage string + SandboxImage string + DefaultRuntimeName string + RestrictOOMScoreAdj bool } const containerdConfigTemplate = `# explicitly use v2 config format @@ -32,10 +34,15 @@ version = 2 # set default runtime handler to v2, which has a per-pod shim [plugins."io.containerd.grpc.v1.cri".containerd] - default_runtime_name = "runc" + default_runtime_name = "{{.DefaultRuntimeName}}" [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] runtime_type = "io.containerd.runc.v2" +[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.ociwrapper] + runtime_type = "io.containerd.runc.v2" +[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.ociwrapper.options] + BinaryName = "ociwrapper" + # Setup a runtime with the magic name ("test-handler") used for Kubernetes # runtime class tests ... [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.test-handler] @@ -49,6 +56,8 @@ version = 2 # allow hugepages controller to be missing # see https://github.com/containerd/cri/pull/1501 tolerate_missing_hugepages_controller = true + # restrict_oom_score_adj is required if we are running in UserNS (i.e. Rootless Docker/Podman), + restrict_oom_score_adj = {{.RestrictOOMScoreAdj}} ` func getContainerdConfig(data containerdConfigTemplateData) (string, error) {