Skip to content

Commit

Permalink
nixos/systemd-confinement: support ProtectSystem=/DynamicUser=
Browse files Browse the repository at this point in the history
  • Loading branch information
ju1m authored and aszlig committed May 12, 2024
1 parent 0d793f3 commit 0a5542c
Show file tree
Hide file tree
Showing 3 changed files with 181 additions and 22 deletions.
2 changes: 2 additions & 0 deletions nixos/doc/manual/release-notes/rl-2405.section.md
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,8 @@ The pre-existing [services.ankisyncd](#opt-services.ankisyncd.enable) has been m
- `documentation.man.mandoc` now by default uses `MANPATH` to set the directories where mandoc will search for manual pages.
This enables mandoc to find manual pages in Nix profiles. To set the manual search paths via the `mandoc.conf` configuration file like before, use `documentation.man.mandoc.settings.manpath` instead.

- The `systemd-confinement` module extension is now compatible with `DynamicUser=true` and thus `ProtectSystem=strict` too.

- `grafana-loki` package was updated to 3.0.0 which includes [breaking changes](https://github.com/grafana/loki/releases/tag/v3.0.0).

- `programs.fish.package` now allows you to override the package used in the `fish` module.
35 changes: 21 additions & 14 deletions nixos/modules/security/systemd-confinement.nix
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,20 @@ in {
description = ''
The value `full-apivfs` (the default) sets up
private {file}`/dev`, {file}`/proc`,
{file}`/sys` and {file}`/tmp` file systems in a separate user
name space.
{file}`/sys`, {file}`/tmp` and {file}`/var/tmp` file systems
in a separate user name space.
If this is set to `chroot-only`, only the file
system name space is set up along with the call to
{manpage}`chroot(2)`.
In all cases, unless `serviceConfig.PrivateTmp=true` is set,
both {file}`/tmp` and {file}`/var/tmp` paths are added to `InaccessiblePaths=`.
This is to overcome options like `DynamicUser=true`
implying `PrivateTmp=true` without letting it being turned off.
Beware however that giving processes the `CAP_SYS_ADMIN` and `@mount` privileges
can let them undo the effects of `InaccessiblePaths=`.
::: {.note}
This doesn't cover network namespaces and is solely for
file system level isolation.
Expand All @@ -98,8 +105,11 @@ in {
wantsAPIVFS = lib.mkDefault (config.confinement.mode == "full-apivfs");
in lib.mkIf config.confinement.enable {
serviceConfig = {
RootDirectory = "/var/empty";
TemporaryFileSystem = "/";
RuntimeDirectory = [ "confinement/${mkPathSafeName name}" ];
RootDirectory = lib.mkDefault "/run/confinement/${mkPathSafeName name}";
InaccessiblePaths = [
"-+/run/confinement/${mkPathSafeName name}"
];
PrivateMounts = lib.mkDefault true;

# https://github.com/NixOS/nixpkgs/issues/14645 is a future attempt
Expand Down Expand Up @@ -148,16 +158,6 @@ in {
+ " Please either define a separate service or find a way to run"
+ " commands other than ExecStart within the chroot.";
}
{ assertion = !cfg.serviceConfig.DynamicUser or false;
message = "${whatOpt "DynamicUser"}. Please create a dedicated user via"
+ " the 'users.users' option instead as this combination is"
+ " currently not supported.";
}
{ assertion = cfg.serviceConfig ? ProtectSystem -> cfg.serviceConfig.ProtectSystem == false;
message = "${whatOpt "ProtectSystem"}. ProtectSystem is not compatible"
+ " with service confinement as it fails to remount /usr within"
+ " our chroot. Please disable the option.";
}
]) config.systemd.services);

config.systemd.packages = lib.concatLists (lib.mapAttrsToList (name: cfg: let
Expand All @@ -183,6 +183,13 @@ in {
echo "BindReadOnlyPaths=$realprog:/bin/sh" >> "$serviceFile"
''}
# If DynamicUser= is enabled, PrivateTmp=true is implied (and cannot be turned off).
# so disable them unless PrivateTmp=true is explicitely set.
${lib.optionalString (!cfg.serviceConfig.PrivateTmp) ''
echo "InaccessiblePaths=-+/tmp" >> "$serviceFile"
echo "InaccessiblePaths=-+/var/tmp" >> "$serviceFile"
''}
while read storePath; do
if [ -L "$storePath" ]; then
# Currently, systemd can't cope with symlinks in Bind(ReadOnly)Paths,
Expand Down
166 changes: 158 additions & 8 deletions nixos/tests/systemd-confinement.nix
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import ./make-test-python.nix {
nodes.machine = { pkgs, lib, ... }: let
testServer = pkgs.writeScript "testserver.sh" ''
#!${pkgs.runtimeShell}
export PATH=${lib.escapeShellArg "${pkgs.coreutils}/bin"}
export PATH=${lib.makeBinPath [ pkgs.coreutils pkgs.findutils ]}
${lib.escapeShellArg pkgs.runtimeShell} 2>&1
echo "exit-status:$?"
'';
Expand Down Expand Up @@ -48,16 +48,22 @@ import ./make-test-python.nix {
{ config.confinement.mode = "chroot-only";
testScript = ''
with subtest("chroot-only confinement"):
paths = machine.succeed('chroot-exec ls -1 / | paste -sd,').strip()
assert_eq(paths, "bin,nix,run")
# chroot-exec starts a socket-activated service,
# but, upon starting, a systemd system service
# calls setup_namespace() which calls base_filesystem_create()
# which creates some usual top level directories.
# In chroot-only mode, without additional BindPaths= or the like,
# they must be empty and thus removable by rmdir.
paths = machine.succeed('chroot-exec rmdir /dev /etc /proc /root /sys /usr /var "&&" ls -Am /').strip()
assert_eq(paths, "bin, nix, run")
uid = machine.succeed('chroot-exec id -u').strip()
assert_eq(uid, "0")
machine.succeed("chroot-exec chown 65534 /bin")
'';
}
{ testScript = ''
with subtest("full confinement with APIVFS"):
machine.fail("chroot-exec ls -l /etc")
machine.succeed('chroot-exec rmdir /etc')
machine.fail("chroot-exec chown 65534 /bin")
assert_eq(machine.succeed('chroot-exec id -u').strip(), "0")
machine.succeed("chroot-exec chown 0 /bin")
Expand All @@ -80,6 +86,146 @@ import ./make-test-python.nix {
machine.fail("chroot-exec touch /bin/test")
'';
}
{ config.confinement.mode = "full-apivfs";
config.serviceConfig.DynamicUser = true;
testScript = ''
with subtest("check if DynamicUser is working in full-apivfs mode"):
machine.succeed("chroot-exec ls -l /dev")
paths = machine.succeed('chroot-exec find / -path /dev/"\\*" -prune -o -path /nix/"\\*" -prune -o -path /proc/"\\*" -prune -o -path /sys/"\\*" -prune -o -print || test $? = 1')
assert_eq(
'\n'.join(sorted(paths.split('\n'))),
"""
/
/bin
/bin/sh
/dev
/etc
/nix
/proc
/root
/run
/run/host
/run/host/.os-release-stage
/run/host/.os-release-stage/os-release
/run/host/os-release
/run/systemd
/run/systemd/incoming
/sys
/tmp
/usr
/var
/var/tmp
find: '/root': Permission denied
find: '/run/systemd/incoming': Permission denied"""
)
uid = machine.succeed('chroot-exec id -u').strip()
assert uid != "0", "UID of a DynamicUser shouldn't be 0"
machine.fail("chroot-exec touch /bin/test")
# DynamicUser=true implies ProtectSystem=strict
machine.fail("chroot-exec touch /etc/test")
'';
}
{ config.confinement.mode = "full-apivfs";
config.serviceConfig.DynamicUser = true;
config.serviceConfig.PrivateTmp = false;
testScript = ''
with subtest("check if DynamicUser and PrivateTmp=false are working in full-apivfs mode"):
machine.succeed("chroot-exec ls -l /dev")
paths = machine.succeed('chroot-exec find / -path /dev/"\\*" -prune -o -path /nix/"\\*" -prune -o -path /proc/"\\*" -prune -o -path /sys/"\\*" -prune -o -print || test $? = 1')
assert_eq(
'\n'.join(sorted(paths.split('\n'))),
"""
/
/bin
/bin/sh
/dev
/etc
/nix
/proc
/root
/run
/run/host
/run/host/.os-release-stage
/run/host/.os-release-stage/os-release
/run/host/os-release
/run/systemd
/run/systemd/incoming
/sys
/usr
/var
find: '/root': Permission denied
find: '/run/systemd/incoming': Permission denied"""
)
uid = machine.succeed('chroot-exec id -u').strip()
assert uid != "0", "UID of a DynamicUser shouldn't be 0"
machine.fail("chroot-exec touch /bin/test")
# DynamicUser=true implies ProtectSystem=strict
machine.fail("chroot-exec touch /etc/test")
'';
}
{ config.confinement.mode = "chroot-only";
config.serviceConfig.DynamicUser = true;
testScript = ''
with subtest("check if DynamicUser is working in chroot-only mode"):
paths = machine.succeed('chroot-exec find / -path /nix/"\\*" -prune -o -print || test $? = 1')
assert_eq(
'\n'.join(sorted(paths.split('\n'))),
"""
/
/bin
/bin/sh
/dev
/etc
/nix
/proc
/root
/run
/run/systemd
/run/systemd/incoming
/sys
/usr
/var
find: '/root': Permission denied
find: '/run/systemd/incoming': Permission denied"""
)
uid = machine.succeed('chroot-exec id -u').strip()
assert uid != "0", "UID of a DynamicUser shouldn't be 0"
machine.fail("chroot-exec touch /bin/test")
'';
}
{ config.confinement.mode = "chroot-only";
config.serviceConfig.DynamicUser = true;
config.serviceConfig.PrivateTmp = true;
testScript = ''
with subtest("check if DynamicUser and PrivateTmp=true are working in chroot-only mode"):
paths = machine.succeed('chroot-exec find / -path /nix/"\\*" -prune -o -print || test $? = 1')
assert_eq(
'\n'.join(sorted(paths.split('\n'))),
"""
/
/bin
/bin/sh
/dev
/etc
/nix
/proc
/root
/run
/run/systemd
/run/systemd/incoming
/sys
/tmp
/usr
/var
/var/tmp
find: '/root': Permission denied
find: '/run/systemd/incoming': Permission denied"""
)
uid = machine.succeed('chroot-exec id -u').strip()
assert uid != "0", "UID of a DynamicUser shouldn't be 0"
machine.fail("chroot-exec touch /bin/test")
'';
}
(let
symlink = pkgs.runCommand "symlink" {
target = pkgs.writeText "symlink-target" "got me\n";
Expand All @@ -88,7 +234,7 @@ import ./make-test-python.nix {
config.confinement.packages = lib.singleton symlink;
testScript = ''
with subtest("check if symlinks are properly bind-mounted"):
machine.fail("chroot-exec test -e /etc")
machine.succeed("chroot-exec rmdir /etc")
text = machine.succeed('chroot-exec cat ${symlink}').strip()
assert_eq(text, "got me")
'';
Expand Down Expand Up @@ -176,9 +322,13 @@ import ./make-test-python.nix {
};

testScript = { nodes, ... }: ''
def assert_eq(a, b):
assert a == b, f"{a} != {b}"
import difflib
def assert_eq(got, expected):
if got != expected:
diff = difflib.unified_diff(got.splitlines(keepends=True), expected.splitlines(keepends=True))
print("".join(diff))
assert got == expected, f"{got} != {expected}"
machine.wait_for_unit("multi-user.target")
'' + nodes.machine.config.__testSteps;
'' + nodes.machine.__testSteps;
}

0 comments on commit 0a5542c

Please sign in to comment.