diff --git a/Dockerfile b/Dockerfile index 86b1af5b3..35574af53 100644 --- a/Dockerfile +++ b/Dockerfile @@ -75,7 +75,10 @@ COPY conversion /opt/dangerzone/dangerzone/conversion # Add the unprivileged user. # NOTE: A tmpfs will be mounted over /home/dangerzone directory, # so nothing within it from the image will be persisted. -RUN adduser -s /bin/true -h /home/dangerzone -D dangerzone +ARG DANGERZONE_UID=65042 +ARG DANGERZONE_GID=65042 +RUN addgroup -g "$DANGERZONE_GID" dangerzone && \ + adduser -u "$DANGERZONE_UID" -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone ########################################### # gVisor wrapper image @@ -87,19 +90,12 @@ RUN apk --no-cache -U upgrade && \ su-exec RUN mkdir --mode=0755 -p /dangerzone-image/rootfs COPY --from=dangerzone-image / /dangerzone-image/rootfs -RUN ARCH="$(uname -m)"; \ - URL="https://storage.googleapis.com/gvisor/releases/release/latest/${ARCH}"; \ - wget "${URL}/runsc" "${URL}/runsc.sha512" && \ +COPY gvisor_wrapper/entrypoint.py / +RUN GVISOR_URL="https://storage.googleapis.com/gvisor/releases/release/latest/$(uname -m)"; \ + wget "${GVISOR_URL}/runsc" "${GVISOR_URL}/runsc.sha512" && \ sha512sum -c runsc.sha512 && \ rm -f runsc.sha512 && \ - chmod 555 runsc && \ + chmod 555 runsc /entrypoint.py && \ mv runsc /usr/bin/ -COPY gvisor_wrapper/entrypoint.py gvisor_wrapper/sandboxed_entrypoint.sh / -RUN mv sandboxed_entrypoint.sh /dangerzone-image/rootfs/sandboxed_entrypoint.sh && \ - chmod 555 /entrypoint.py /dangerzone-image/rootfs/sandboxed_entrypoint.sh && \ - mkdir -p /wrapped-safezone /var/run/runsc - -# /safezone is a directory through which Pixels to PDF receives files -VOLUME /safezone ENTRYPOINT ["/entrypoint.py"] diff --git a/dangerzone/gvisor_wrapper/entrypoint.py b/dangerzone/gvisor_wrapper/entrypoint.py index d056791d5..bc32b8a90 100755 --- a/dangerzone/gvisor_wrapper/entrypoint.py +++ b/dangerzone/gvisor_wrapper/entrypoint.py @@ -15,56 +15,6 @@ # This script wraps the command-line arguments passed to it to run as an # unprivileged user in a gVisor sandbox. -# It is meant to work in both Docker and Podman, which differ in how this -# script is invokved. -# With Docker, the Docker daemon runs as root on the machine, which is -# likely a different user than the one running the Dangerzone application. -# With Podman, which we run in rootless mode, there is only one non-root user -# that is running Podman. In this case, we are UID 0 in the Podman-created -# user namespace, but this user maps to the user running the Dangerzone -# application. -# The script first tries to establish a "common denominator" setup between -# these two situations by checking the owner of the /safezone volume, which -# is mounted by Dangerzone and is owned by the user running this application. -# If this script is not running as this user (i.e. Docker in root mode), it -# re-executes itself as the user owning /safezone. This brings it to the same -# situation as Podman running in rootless mode: there is only one user mapped -# into this user namespace as UID 0, and that user is the person running the -# Dangerzone application on their machine. They do not have root in the -# initial user namespace. No other users are mapped in the user namespace -# we're in. -# However, we now have a second problem: we also want the application running -# within the sandbox to be running as a non-root user with minimal privileges. -# We cannot create a new user here, because such a user would be unmapped in -# the initial user namespace and any attempt to make it into a child user -# namespace (which starting a gVisor sandbox requires) would fail. -# Therefore, the only place where this new user can exist is within the -# gVisor sandbox. -# But now we have a new problem: This user will not have write access to the -# /safezone directory, and any file it does create would be mapped to a -# meaningless user on the host. -# So this script uses a two-volume approach. -# The /safezone directory on the host is mapped to the /host-safezone -# directory in the gVisor sandbox, while a new tmpfs volume is created -# as the sandbox's /safezone directory. -# Then, inside the sandbox right on startup, all files are moved from -# /host-safezone to /safezone and chown'd to the sandbox-only "dangerzone" -# user. Then, when the unprivileged command finishes running, all files -# in the sandbox's /safezone are chown'd back to the sandbox's root user -# (which corresponds to our root user, which in turn corresponds to the -# real user on the host running Dangerzone), and moved back to /host-safezone -# (which makes them show up in the /safezone volume of this container, which -# in turn means they are finally visible on the host). -# This approach is mostly transparent from the perspective of whoever is -# running this container, with the caveats that: -# - All documents in /safezone must fit in RAM, since they live in tmpfs. -# - The resulting documents are only visible to the host after the -# unprivileged command finishes running (as opposed to being available -# as conversion progresses). -# One alternative to this approach would be to only have the root user exist -# in the sandbox, and to use it directly. It would be possible to drop all -# capabilities from the OCI config below, but it does mean running as UID 0 -# within the sandbox. # Define flags. parser = argparse.ArgumentParser( @@ -77,14 +27,6 @@ parser.add_argument( "--pre_gvisor", action="store_true", help="Run command without gVisor wrapping" ) -parser.add_argument( - "--pre_new_userns", action="store_true", help="Run command before changing userns" -) -parser.add_argument( - "--pre_sandboxed_entrypoint", - action="store_true", - help="Run command in gVisor but without sandboxed_entrypoint.sh", -) parser.add_argument( "--gvisor_debug", action="store_true", help="Enable gVisor debug logging" ) @@ -144,174 +86,42 @@ parser_args.append("command") # To satisfy the parser's `command` argument. args = parser.parse_args(parser_args) -if args.pre_new_userns: - if args.gvisor_debug: - print( - "Executing command before userns switch:", - " ".join(shlex.quote(s) for s in wrapped_command), - file=sys.stderr, - ) - try: - os.execvp(wrapped_command[0], wrapped_command) - except Exception as e: - raise e.__class__("Process %s failed: %s" % (wrapped_command, e)) - else: - assert False, "This code should never be reachable" - -# Monkeypatch `os` module for things added in Python 3.12. -# This can go away once the python3 alpine package is updated to 3.12. -if "unshare" not in os.__dict__ or "CLONE_NEWUSER" not in os.__dict__: - import ctypes - - libc = ctypes.CDLL(None) - libc.unshare.argtypes = [ctypes.c_int] - get_errno_loc = libc.__errno_location - get_errno_loc.restype.restype = ctypes.POINTER(ctypes.c_int) # type: ignore[union-attr] - - def unshare_monkeypatch(flags: int) -> None: - rc = libc.unshare(flags) - if rc == -1: - raise Exception(os.strerror(get_errno_loc()[0])) - - os.unshare = unshare_monkeypatch # type: ignore[attr-defined] - os.CLONE_NEWUSER = 268435456 # type: ignore[attr-defined] - -# Check that we are running as the user that owns /safezone. -# If not, re-exec. -my_uid = os.getuid() -my_gid = os.getgid() -safezone_st = os.lstat("/safezone") - -if my_uid == 0 and (safezone_st.st_uid != my_uid or safezone_st.st_gid != my_gid): - # Need to switch into the user who owns the /safezone directory. - # This helps preserve the correct user permissions on Docker. - # The user and group for this UID/GID pair need to exist in the - # container too before we can use them; if they don't exist, - # create them. - # We use random group/user names in order to minimize risk of conflict - # with existing users in the container. - try: - group_name = grp.getgrgid(safezone_st.st_gid).gr_name - except KeyError: - add_group_argv = ( - "/usr/sbin/addgroup", - "-g", - str(safezone_st.st_gid), - "danger" - + "".join(random.choices(string.ascii_lowercase + string.digits, k=24)), - ) - if args.gvisor_debug: - print( - "Creating new group:", - " ".join(shlex.quote(s) for s in add_group_argv), - file=sys.stderr, - ) - subprocess.run(add_group_argv, check=True) - group_name = grp.getgrgid(safezone_st.st_gid).gr_name - try: - user_name = pwd.getpwuid(safezone_st.st_uid).pw_name - except KeyError: - add_user_argv = ( - "/usr/sbin/adduser", - "-u", - str(safezone_st.st_uid), - "-s", - "/bin/true", - "-G", - group_name, - "-D", - "-H", - "danger" - + "".join(random.choices(string.ascii_lowercase + string.digits, k=24)), - ) - if args.gvisor_debug: - print( - "Creating new user:", - " ".join(shlex.quote(s) for s in add_user_argv), - file=sys.stderr, - ) - subprocess.run(add_user_argv, check=True) - user_name = pwd.getpwuid(safezone_st.st_uid).pw_name - user_and_group = "%s:%s" % (user_name, group_name) - # Align permissions of rootfs and runsc state directory to the user we will - # run it as: - chown_argv = ( - "/bin/chown", - "-R", - user_and_group, - "/var/run/runsc", - "/wrapped-safezone", - "/dangerzone-image", - ) - if args.gvisor_debug: - print( - "Setting permissions to sandbox user:", - " ".join(shlex.quote(s) for s in add_group_argv), - file=sys.stderr, - ) - subprocess.run(chown_argv, check=True) - - # Switch to target user. - su_exec_argv = ("su-exec", user_and_group) + tuple(sys.argv) - if args.gvisor_debug: - print( - "Re-executing as", - user_and_group, - "->", - " ".join(shlex.quote(s) for s in su_exec_argv), - file=sys.stderr, - ) - try: - os.execv("/sbin/su-exec", su_exec_argv) - except Exception as e: - raise e.__class__("su-exec %s failed: %s" % (sys.argv, e)) - else: - assert False, "This code should never be reachable" - -if my_uid != 0: - # If we are not UID 0, create a user namespace where we are mapped to it. - if args.gvisor_debug: - print( - "Current UID/GID is %d:%d; creating new user namespace..." - % (my_uid, my_gid), - file=sys.stderr, - ) - os.unshare(os.CLONE_NEWUSER) # type: ignore[attr-defined] - with os.fdopen( - os.open("/proc/self/setgroups", flags=os.O_WRONLY), "wt" - ) as setgroups_fd: - setgroups_fd.write("deny") - with os.fdopen( - os.open("/proc/self/uid_map", flags=os.O_WRONLY), "wt" - ) as uid_map_fd: - uid_map_fd.write("0 %d 1" % (my_uid,)) - with os.fdopen( - os.open("/proc/self/gid_map", flags=os.O_WRONLY), "wt" - ) as gid_map_fd: - gid_map_fd.write("0 %d 1" % (my_gid,)) - # Re-exec. - if args.gvisor_debug: - print("Re-execing:", " ".join(shlex.quote(s) for s in sys.argv)) - try: - os.execvp(sys.argv[0], sys.argv) - except Exception as e: - raise e.__class__("Re-execing %s failed: %s" % (sys.argv, e)) - else: - assert False, "This code should never be reachable" +# Find the UID/GID of who we should run as within the sandbox. +sandboxed_uid = int(subprocess.check_output(( + 'chroot', + '/dangerzone-image/rootfs', + 'id', '-u', 'dangerzone', +))) +assert sandboxed_uid != 0, 'Unexpectedly read 0 as the sandboxed dangerzone UID' +sandboxed_gid = int(subprocess.check_output(( + 'chroot', + '/dangerzone-image/rootfs', + 'id', '-g', 'dangerzone', +))) +assert sandboxed_gid != 0, 'Unexpectedly read 0 as the sandboxed dangerzone GID' -# By this point, we are running as the same user that owns /safezone and -# that user is mapped to UID 0 in a dedicated user namespace. +# Wrap the command with `su-exec` to execute as the intended in-sandbox +# UID/GID, and execute `su-exec` as root. This requires the sandbox's initial +# process to have the CAP_SETUID and CAP_SETGID capabilities, but these are +# not inherited after exec. +# This can all be removed and simplified once gvisor.dev/issue/9918 is fixed. +gvisor_issue_9918_is_fixed = False +sandbox_capabilities = [] +if not gvisor_issue_9918_is_fixed and not args.pre_gvisor: + wrapped_command = ['su-exec', '%d:%d' % (sandboxed_uid, sandboxed_gid)] + wrapped_command + sandboxed_uid = 0 + sandboxed_gid = 0 + sandbox_capabilities = ["CAP_SETUID", "CAP_SETGID"] # Build and write container OCI config. -oci_command = wrapped_command -if not args.pre_sandboxed_entrypoint: - oci_command = ["/sandboxed_entrypoint.sh"] + oci_command - oci_config: dict[str, typing.Any] = { "ociVersion": "1.0.0", "process": { - "user": {"uid": 0, "gid": 0}, - "args": oci_command, + "user": { + "uid": sandboxed_uid, + "gid": sandboxed_gid, + }, + "args": wrapped_command, "env": [ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "PYTHONPATH=/opt/dangerzone", @@ -319,14 +129,10 @@ def unshare_monkeypatch(flags: int) -> None: ], "cwd": "/", "capabilities": { - # See the long comment above as to why this is needed. - # CAP_CHOWN is needed to chown the safezone files back and forth. - # CAP_SETUID and CAP_SETGID are required to switch to the - # unprivileged user. - "bounding": ["CAP_CHOWN", "CAP_SETUID", "CAP_SETGID"], - "effective": ["CAP_CHOWN", "CAP_SETUID", "CAP_SETGID"], - "inheritable": ["CAP_CHOWN", "CAP_SETUID", "CAP_SETGID"], - "permitted": ["CAP_CHOWN", "CAP_SETUID", "CAP_SETGID"], + "bounding": [], + "effective": sandbox_capabilities, + "inheritable": [], + "permitted": sandbox_capabilities, }, "rlimits": [ {"type": "RLIMIT_NOFILE", "hard": 4096, "soft": 4096}, @@ -340,21 +146,6 @@ def unshare_monkeypatch(flags: int) -> None: "type": "proc", "source": "proc", }, - # /safezone is a tmpfs which will be owned by the unprivileged user - # which lives only in the sandbox. See comment above. - { - "destination": "/safezone", - "type": "tmpfs", - "source": "tmpfs", - "options": ["nosuid", "noexec", "nodev"], - }, - # /host-safezone is where the host's /safezone is actually mounted. - { - "destination": "/host-safezone", - "type": "none", - "source": "/safezone", - "options": ["bind", "nosuid", "noexec", "nodev", "rw"], - }, { "destination": "/dev", "type": "tmpfs", @@ -408,7 +199,7 @@ def unshare_monkeypatch(flags: int) -> None: continue oci_config["process"]["env"].append("%s=%s" % (key, val)) if args.gvisor_debug: - print("Command inside gVisor sandbox:", oci_command, file=sys.stderr) + print("Command inside gVisor sandbox:", wrapped_command, file=sys.stderr) print("OCI config:", file=sys.stderr) json.dump(oci_config, sys.stderr, indent=2, sort_keys=True) # json.dump doesn't print a trailing newline, so print one here: @@ -450,9 +241,7 @@ def unshare_monkeypatch(flags: int) -> None: if args.gvisor_debug: print( - "Running", - runsc_binary, - "with command line:", + "Running gVisor with command line:", " ".join(shlex.quote(s) for s in runsc_argv), file=sys.stderr, ) diff --git a/dangerzone/gvisor_wrapper/sandboxed_entrypoint.sh b/dangerzone/gvisor_wrapper/sandboxed_entrypoint.sh deleted file mode 100755 index ea1721e00..000000000 --- a/dangerzone/gvisor_wrapper/sandboxed_entrypoint.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/sh - -# This file runs within the gVisor sandbox. -# Read `entrypoint.py` for why this is needed. - -set -euo pipefail - -# Move files over from /host-safezone to /safezone. -if [[ "$(ls -1 /host-safezone | wc -l)" -gt 0 ]]; then - mv /host-safezone/* /safezone/ -fi -# chown them as the unprivileged user. -chown -R dangerzone:dangerzone /safezone - -# Run the unprivileged command. -set +e -su-exec dangerzone:dangerzone "$@" -retcode="$?" -set -e - -# Move files back from /safezone to /host-safezone. -if [[ -d /safezone ]] && [[ "$(ls -1 /safezone | wc -l)" -gt 0 ]]; then - # chown them back to the user that exists on the host. - chown -R root:root /safezone - mv /safezone/* /host-safezone/ -fi - -# Mirror the exit code of the unprivileged command. -exit "$retcode" diff --git a/dangerzone/isolation_provider/container.py b/dangerzone/isolation_provider/container.py index 175686cf6..a2a6f4c72 100644 --- a/dangerzone/isolation_provider/container.py +++ b/dangerzone/isolation_provider/container.py @@ -56,6 +56,7 @@ def get_runtime_security_args() -> List[str]: if Container.get_runtime_name() == "podman": security_args = ["--log-driver", "none"] security_args += ["--security-opt", "no-new-privileges"] + security_args += ["--userns", "nomap"] else: security_args = ["--security-opt=no-new-privileges:true"] # Needed for running rootlesskit, which gVisor uses. cf: