From b39781b0673f3c00ec8cf029a3f3a9755316cffc Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 3 Oct 2023 17:44:39 -0700 Subject: [PATCH 1/3] tests/int: add selinux test case This is a test case to demonstrate the selinux vs dmz issue. The issue is, runc calls selinux.SetExecLabel and then execs the runc-dmz binary, but the execve is denied by selinux: > type=PROCTITLE msg=audit(10/05/2023 22:54:07.911:10904) : proctitle=/tmp/bats-run-sGk2sn/runc.Ql243q/bundle/runc init > type=SYSCALL msg=audit(10/05/2023 22:54:07.911:10904) : arch=x86_64 syscall=execveat success=no exit=EACCES(Permission denied) a0=0x6 a1=0xc0000b90fa a2=0xc0000a26a0 a3=0xc000024660 items=0 ppid=105316 pid=105327 auid=root uid=root gid=root euid=root suid=root fsuid=root egid=root sgid=root fsgid=root tty=pts0 ses=8 comm=runc:[2:INIT] exe=/tmp/bats-run-sGk2sn/runc.Ql243q/bundle/runc subj=unconfined_u:unconfined_r:container_runtime_t:s0-s0:c0.c1023 key=(null) > type=AVC msg=audit(10/05/2023 22:54:07.911:10904) : avc: denied { entrypoint } for pid=105327 comm=runc:[2:INIT] path=/memfd:runc_cloned:runc-dmz (deleted) dev="tmpfs" ino=2341 scontext=system_u:system_r:container_t:s0:c4,c5 tcontext=unconfined_u:object_r:container_runtime_tmpfs_t:s0 tclass=file permissive=0 Once that error is fixed (by adding a selinux rule that enables it), we see one more error, also related to executing a file on tmpfs. Signed-off-by: Kir Kolyshkin --- .cirrus.yml | 6 ++-- Vagrantfile.fedora | 5 +++- tests/integration/selinux.bats | 55 ++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 tests/integration/selinux.bats diff --git a/.cirrus.yml b/.cirrus.yml index cfd238f154e..0b24e5c5075 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -57,7 +57,7 @@ task: mkdir -p -m 0700 /root/.ssh vagrant ssh-config >> /root/.ssh/config guest_info_script: | - ssh default 'sh -exc "uname -a && systemctl --version && df -T && cat /etc/os-release && go version"' + ssh default 'sh -exc "uname -a && systemctl --version && df -T && cat /etc/os-release && go version && sestatus"' check_config_script: | ssh default /vagrant/script/check-config.sh unit_tests_script: | @@ -79,7 +79,7 @@ task: CIRRUS_WORKING_DIR: /home/runc GO_VERSION: "1.20" BATS_VERSION: "v1.9.0" - RPMS: gcc git iptables jq glibc-static libseccomp-devel make criu fuse-sshfs + RPMS: gcc git iptables jq glibc-static libseccomp-devel make criu fuse-sshfs container-selinux # yamllint disable rule:key-duplicates matrix: DISTRO: centos-7 @@ -170,6 +170,8 @@ task: # ----- df -T # ----- + sestatus + # ----- cat /proc/cpuinfo check_config_script: | /home/runc/script/check-config.sh diff --git a/Vagrantfile.fedora b/Vagrantfile.fedora index 4e9bd87c2ad..9b4f6d72687 100644 --- a/Vagrantfile.fedora +++ b/Vagrantfile.fedora @@ -23,12 +23,15 @@ Vagrant.configure("2") do |config| cat << EOF | dnf -y --exclude=kernel,kernel-core shell && break config install_weak_deps false update -install iptables gcc golang-go make glibc-static libseccomp-devel bats jq git-core criu fuse-sshfs +install iptables gcc golang-go make glibc-static libseccomp-devel bats jq git-core criu fuse-sshfs container-selinux ts run EOF done dnf clean all + # To avoid "avc: denied { nosuid_transition }" from SELinux as we run tests on /tmp. + mount -o remount,suid /tmp + # Prevent the "fatal: unsafe repository" git complain during build. git config --global --add safe.directory /vagrant diff --git a/tests/integration/selinux.bats b/tests/integration/selinux.bats new file mode 100644 index 00000000000..6265bd461d9 --- /dev/null +++ b/tests/integration/selinux.bats @@ -0,0 +1,55 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + requires root # for chcon + if ! selinuxenabled; then + skip "requires SELinux enabled and in enforcing mode" + fi + + setup_busybox + + # Use a copy of runc binary with proper selinux label set. + cp "$RUNC" . + export RUNC="$PWD/runc" + chcon -u system_u -r object_r -t container_runtime_exec_t "$RUNC" + + # Label container fs. + chcon -u system_u -r object_r -t container_file_t -R rootfs + + # Save the start date and time for ausearch. + AU_DD="$(date +%x)" + AU_TT="$(date +%H:%M:%S)" +} + +function teardown() { + teardown_bundle + # Show any avc denials. + if [[ -v AU_DD && -v AU_TT ]] && command -v ausearch &>/dev/null; then + ausearch -ts "$AU_DD" "$AU_TT" -i -m avc || true + fi +} + +# Baseline test, to check that runc works with selinux enabled. +@test "runc run (no selinux label)" { + update_config ' .process.args = ["/bin/true"]' + runc run tst + [ "$status" -eq 0 ] +} + +# https://github.com/opencontainers/runc/issues/4057 +@test "runc run (custom selinux label)" { + update_config ' .process.selinuxLabel |= "system_u:system_r:container_t:s0:c4,c5" + | .process.args = ["/bin/true"]' + runc run tst + [ "$status" -eq 0 ] +} + +@test "runc run (custom selinux label, RUNC_DMZ=legacy)" { + export RUNC_DMZ=legacy + update_config ' .process.selinuxLabel |= "system_u:system_r:container_t:s0:c4,c5" + | .process.args = ["/bin/true"]' + runc run tst + [ "$status" -eq 0 ] +} From 393c7a81c9bb9a35e84aebd87a0ffe044d19666e Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 18 Oct 2023 19:21:20 -0700 Subject: [PATCH 2/3] README: fix reference to memfd-bind Signed-off-by: Kir Kolyshkin --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 827f837e06f..f36eea6b6ef 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ The following build tags were used earlier, but are now obsoleted: - **apparmor** (since runc v1.0.0-rc93 the feature is always enabled) - **selinux** (since runc v1.0.0-rc93 the feature is always enabled) - [contrib-memfd-bind]: /contrib/memfd-bind/README.md + [contrib-memfd-bind]: /contrib/cmd/memfd-bind/README.md ### Running the test suite From 87bd784614a1246bfe706dbe5d159c4f242daec6 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 18 Oct 2023 20:15:07 -0700 Subject: [PATCH 3/3] Add dmz-vs-selinux kludge and a way to disable it Add a workaround for a problem of older container-selinux not allowing runc to use dmz feature. If runc sees that SELinux is in enforced mode and the container's SELinux label is set, it disables dmz. Add a build tag, runc_dmz_selinux_nocompat, which disables the workaround. Newer distros that ship container-selinux >= 2.224.0 (currently CentOS Stream 8 and 9, RHEL 8 and 9, and Fedora 38+) may build runc with this build tag set to benefit from dmz working with SELinux. Document the build tag in the top-level and libct/dmz READMEs. Use the build tag in our CI builds for CentOS Stream 9 and Fedora 38, as they already has container-selinux 2.224.0 available in updates. Add a TODO to use the build tag for CentOS Stream 8 once it has container-selinux updated. Signed-off-by: Kir Kolyshkin --- .cirrus.yml | 13 ++++++++++++- README.md | 2 ++ Vagrantfile.fedora | 3 +++ libcontainer/container_linux.go | 4 ++++ libcontainer/dmz/README.md | 12 ++++++++++++ libcontainer/dmz/selinux.go | 10 ++++++++++ libcontainer/dmz/selinux_compat.go | 28 ++++++++++++++++++++++++++++ 7 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 libcontainer/dmz/selinux.go create mode 100644 libcontainer/dmz/selinux_compat.go diff --git a/.cirrus.yml b/.cirrus.yml index 0b24e5c5075..a3ee5b5b156 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -57,7 +57,7 @@ task: mkdir -p -m 0700 /root/.ssh vagrant ssh-config >> /root/.ssh/config guest_info_script: | - ssh default 'sh -exc "uname -a && systemctl --version && df -T && cat /etc/os-release && go version && sestatus"' + ssh default 'sh -exc "uname -a && systemctl --version && df -T && cat /etc/os-release && go version && sestatus && rpm -q container-selinux"' check_config_script: | ssh default /vagrant/script/check-config.sh unit_tests_script: | @@ -159,6 +159,17 @@ task: echo -e "Host localhost\n\tStrictHostKeyChecking no\t\nIdentityFile /root/.ssh/id_ed25519\n" >> /root/.ssh/config sed -e "s,PermitRootLogin.*,PermitRootLogin prohibit-password,g" -i /etc/ssh/sshd_config systemctl restart sshd + + # Disable the dmz-vs-selinux workaround for distros that have container-selinux >= 2.224.0. + case $DISTRO in + # TODO: remove centos-stream-8. + centos-7|centos-stream-8) + # Do nothing. + ;; + *) + echo 'export EXTRA_BUILDTAGS=runc_dmz_selinux_nocompat' >> /root/.bashrc + ;; + esac host_info_script: | uname -a # ----- diff --git a/README.md b/README.md index f36eea6b6ef..7e40776bbb7 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,7 @@ make BUILDTAGS="" |---------------|---------------------------------------|--------------------|---------------------| | `seccomp` | Syscall filtering using `libseccomp`. | yes | `libseccomp` | | `!runc_nodmz` | Reduce memory usage for CVE-2019-5736 protection by using a small C binary, [see `memfd-bind` for more details][contrib-memfd-bind]. `runc_nodmz` disables this feature and causes runc to use a different protection mechanism which will further increases memory usage temporarily during container startup. This feature can also be disabled at runtime by setting the `RUNC_DMZ=legacy` environment variable. | yes || +| `runc_dmz_selinux_nocompat` | Disables a SELinux DMZ workaround (new distros should set this). See [dmz README] for details. | no || The following build tags were used earlier, but are now obsoleted: - **nokmem** (since runc v1.0.0-rc94 kernel memory settings are ignored) @@ -76,6 +77,7 @@ The following build tags were used earlier, but are now obsoleted: - **selinux** (since runc v1.0.0-rc93 the feature is always enabled) [contrib-memfd-bind]: /contrib/cmd/memfd-bind/README.md + [dmz README]: /libcontainer/dmz/README.md ### Running the test suite diff --git a/Vagrantfile.fedora b/Vagrantfile.fedora index 9b4f6d72687..b1e3d628894 100644 --- a/Vagrantfile.fedora +++ b/Vagrantfile.fedora @@ -32,6 +32,9 @@ EOF # To avoid "avc: denied { nosuid_transition }" from SELinux as we run tests on /tmp. mount -o remount,suid /tmp + # Disable selinux-vs-dmz workaround as Fedora doesn't need it. + echo 'export EXTRA_BUILDTAGS=runc_dmz_selinux_nocompat' >> /root/.bashrc + # Prevent the "fatal: unsafe repository" git complain during build. git config --global --add safe.directory /vagrant diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 35f4f5df390..f5d55a1649e 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -457,6 +457,10 @@ func slicesContains[S ~[]E, E comparable](slice S, needle E) bool { } func isDmzBinarySafe(c *configs.Config) bool { + if !dmz.WorksWithSELinux(c) { + return false + } + // Because we set the dumpable flag in nsexec, the only time when it is // unsafe to use runc-dmz is when the container process would be able to // race against "runc init" and bypass the ptrace_may_access() checks. diff --git a/libcontainer/dmz/README.md b/libcontainer/dmz/README.md index 3cfa913ff68..b415f9e3988 100644 --- a/libcontainer/dmz/README.md +++ b/libcontainer/dmz/README.md @@ -15,4 +15,16 @@ It also support all the architectures we support in runc. If the GOARCH we use for compiling doesn't support nolibc, it fallbacks to using the C stdlib. +## SELinux compatibility issue and a workaround + +Older SELinux policy can prevent runc to execute the dmz binary. The issue is +fixed in [container-selinux v2.224.0]. Yet, some older distributions may not +have the fix, so runc has a runtime workaround of disabling dmz if it finds +that SELinux is in enforced mode and the container SELinux label is set. + +Distributions that have a sufficiently new container-selinux can disable the +workaround by building runc with the `runc_dmz_selinux_nocompat` build flag, +essentially allowing dmz to be used together with SELinux. + [nolibc-upstream]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/include/nolibc?h=v6.6-rc3 +[container-selinux v2.224.0]: https://github.com/containers/container-selinux/releases/tag/v2.224.0 diff --git a/libcontainer/dmz/selinux.go b/libcontainer/dmz/selinux.go new file mode 100644 index 00000000000..49a76ec1741 --- /dev/null +++ b/libcontainer/dmz/selinux.go @@ -0,0 +1,10 @@ +//go:build runc_dmz_selinux_nocompat || !linux + +package dmz + +import "github.com/opencontainers/runc/libcontainer/configs" + +// WorksWithSELinux tells whether runc-dmz can work with SELinux. +func WorksWithSELinux(*configs.Config) bool { + return true +} diff --git a/libcontainer/dmz/selinux_compat.go b/libcontainer/dmz/selinux_compat.go new file mode 100644 index 00000000000..027a5e76243 --- /dev/null +++ b/libcontainer/dmz/selinux_compat.go @@ -0,0 +1,28 @@ +//go:build linux && !runc_dmz_selinux_nocompat + +package dmz + +import ( + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/selinux/go-selinux" +) + +// WorksWithSELinux tells whether runc-dmz can work with SELinux. +// +// Older SELinux policy can prevent runc to execute the dmz binary. The issue is +// fixed in container-selinux >= 2.224.0: +// +// - https://github.com/containers/container-selinux/issues/274 +// - https://github.com/containers/container-selinux/pull/280 +// +// Alas, there is is no easy way to do a runtime check if dmz works with +// SELinux, so the below workaround is enabled by default. It results in +// disabling dmz in case container SELinux label is set and the selinux is in +// enforced mode. +// +// Newer distributions that have the sufficiently new container-selinux version +// can build runc with runc_dmz_selinux_nocompat build flag to disable this +// workaround (essentially allowing dmz to be used together with SELinux). +func WorksWithSELinux(c *configs.Config) bool { + return c.ProcessLabel == "" || selinux.EnforceMode() != selinux.Enforcing +}