Skip to content

Commit

Permalink
Remove support for the /safezone volume.
Browse files Browse the repository at this point in the history
Per discussion on #590,
the need for this volume will soon go away.

This makes gVisor integration much easier, because it removes the need
to preserve file access and ownership of the files in this volume from
within the gVisor sandbox. The `/sandboxed_entrypoint.sh` file is no
longer necessary, and the `/entrypoint.py` file is massively simplified.

This also allows the use of `--userns=nomap` in Podman.
  • Loading branch information
EtiennePerot committed Apr 15, 2024
1 parent 8f6ec3d commit 652a4b0
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 288 deletions.
20 changes: 8 additions & 12 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,10 @@ COPY conversion /opt/dangerzone/dangerzone/conversion
# Add the unprivileged user.
# NOTE: A tmpfs will be mounted over /home/dangerzone directory,
# so nothing within it from the image will be persisted.
RUN adduser -s /bin/true -h /home/dangerzone -D dangerzone
ARG DANGERZONE_UID=65042
ARG DANGERZONE_GID=65042
RUN addgroup -g "$DANGERZONE_GID" dangerzone && \
adduser -u "$DANGERZONE_UID" -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone

###########################################
# gVisor wrapper image
Expand All @@ -87,19 +90,12 @@ RUN apk --no-cache -U upgrade && \
su-exec
RUN mkdir --mode=0755 -p /dangerzone-image/rootfs
COPY --from=dangerzone-image / /dangerzone-image/rootfs
RUN ARCH="$(uname -m)"; \
URL="https://storage.googleapis.com/gvisor/releases/release/latest/${ARCH}"; \
wget "${URL}/runsc" "${URL}/runsc.sha512" && \
COPY gvisor_wrapper/entrypoint.py /
RUN GVISOR_URL="https://storage.googleapis.com/gvisor/releases/release/latest/$(uname -m)"; \
wget "${GVISOR_URL}/runsc" "${GVISOR_URL}/runsc.sha512" && \
sha512sum -c runsc.sha512 && \
rm -f runsc.sha512 && \
chmod 555 runsc && \
chmod 555 runsc /entrypoint.py && \
mv runsc /usr/bin/
COPY gvisor_wrapper/entrypoint.py gvisor_wrapper/sandboxed_entrypoint.sh /
RUN mv sandboxed_entrypoint.sh /dangerzone-image/rootfs/sandboxed_entrypoint.sh && \
chmod 555 /entrypoint.py /dangerzone-image/rootfs/sandboxed_entrypoint.sh && \
mkdir -p /wrapped-safezone /var/run/runsc

# /safezone is a directory through which Pixels to PDF receives files
VOLUME /safezone

ENTRYPOINT ["/entrypoint.py"]
283 changes: 36 additions & 247 deletions dangerzone/gvisor_wrapper/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,56 +15,6 @@

# This script wraps the command-line arguments passed to it to run as an
# unprivileged user in a gVisor sandbox.
# It is meant to work in both Docker and Podman, which differ in how this
# script is invokved.
# With Docker, the Docker daemon runs as root on the machine, which is
# likely a different user than the one running the Dangerzone application.
# With Podman, which we run in rootless mode, there is only one non-root user
# that is running Podman. In this case, we are UID 0 in the Podman-created
# user namespace, but this user maps to the user running the Dangerzone
# application.
# The script first tries to establish a "common denominator" setup between
# these two situations by checking the owner of the /safezone volume, which
# is mounted by Dangerzone and is owned by the user running this application.
# If this script is not running as this user (i.e. Docker in root mode), it
# re-executes itself as the user owning /safezone. This brings it to the same
# situation as Podman running in rootless mode: there is only one user mapped
# into this user namespace as UID 0, and that user is the person running the
# Dangerzone application on their machine. They do not have root in the
# initial user namespace. No other users are mapped in the user namespace
# we're in.
# However, we now have a second problem: we also want the application running
# within the sandbox to be running as a non-root user with minimal privileges.
# We cannot create a new user here, because such a user would be unmapped in
# the initial user namespace and any attempt to make it into a child user
# namespace (which starting a gVisor sandbox requires) would fail.
# Therefore, the only place where this new user can exist is within the
# gVisor sandbox.
# But now we have a new problem: This user will not have write access to the
# /safezone directory, and any file it does create would be mapped to a
# meaningless user on the host.
# So this script uses a two-volume approach.
# The /safezone directory on the host is mapped to the /host-safezone
# directory in the gVisor sandbox, while a new tmpfs volume is created
# as the sandbox's /safezone directory.
# Then, inside the sandbox right on startup, all files are moved from
# /host-safezone to /safezone and chown'd to the sandbox-only "dangerzone"
# user. Then, when the unprivileged command finishes running, all files
# in the sandbox's /safezone are chown'd back to the sandbox's root user
# (which corresponds to our root user, which in turn corresponds to the
# real user on the host running Dangerzone), and moved back to /host-safezone
# (which makes them show up in the /safezone volume of this container, which
# in turn means they are finally visible on the host).
# This approach is mostly transparent from the perspective of whoever is
# running this container, with the caveats that:
# - All documents in /safezone must fit in RAM, since they live in tmpfs.
# - The resulting documents are only visible to the host after the
# unprivileged command finishes running (as opposed to being available
# as conversion progresses).
# One alternative to this approach would be to only have the root user exist
# in the sandbox, and to use it directly. It would be possible to drop all
# capabilities from the OCI config below, but it does mean running as UID 0
# within the sandbox.

# Define flags.
parser = argparse.ArgumentParser(
Expand All @@ -77,14 +27,6 @@
parser.add_argument(
"--pre_gvisor", action="store_true", help="Run command without gVisor wrapping"
)
parser.add_argument(
"--pre_new_userns", action="store_true", help="Run command before changing userns"
)
parser.add_argument(
"--pre_sandboxed_entrypoint",
action="store_true",
help="Run command in gVisor but without sandboxed_entrypoint.sh",
)
parser.add_argument(
"--gvisor_debug", action="store_true", help="Enable gVisor debug logging"
)
Expand Down Expand Up @@ -144,189 +86,53 @@
parser_args.append("command") # To satisfy the parser's `command` argument.
args = parser.parse_args(parser_args)

if args.pre_new_userns:
if args.gvisor_debug:
print(
"Executing command before userns switch:",
" ".join(shlex.quote(s) for s in wrapped_command),
file=sys.stderr,
)
try:
os.execvp(wrapped_command[0], wrapped_command)
except Exception as e:
raise e.__class__("Process %s failed: %s" % (wrapped_command, e))
else:
assert False, "This code should never be reachable"

# Monkeypatch `os` module for things added in Python 3.12.
# This can go away once the python3 alpine package is updated to 3.12.
if "unshare" not in os.__dict__ or "CLONE_NEWUSER" not in os.__dict__:
import ctypes

libc = ctypes.CDLL(None)
libc.unshare.argtypes = [ctypes.c_int]
get_errno_loc = libc.__errno_location
get_errno_loc.restype.restype = ctypes.POINTER(ctypes.c_int) # type: ignore[union-attr]

def unshare_monkeypatch(flags: int) -> None:
rc = libc.unshare(flags)
if rc == -1:
raise Exception(os.strerror(get_errno_loc()[0]))

os.unshare = unshare_monkeypatch # type: ignore[attr-defined]
os.CLONE_NEWUSER = 268435456 # type: ignore[attr-defined]

# Check that we are running as the user that owns /safezone.
# If not, re-exec.
my_uid = os.getuid()
my_gid = os.getgid()
safezone_st = os.lstat("/safezone")

if my_uid == 0 and (safezone_st.st_uid != my_uid or safezone_st.st_gid != my_gid):
# Need to switch into the user who owns the /safezone directory.
# This helps preserve the correct user permissions on Docker.
# The user and group for this UID/GID pair need to exist in the
# container too before we can use them; if they don't exist,
# create them.
# We use random group/user names in order to minimize risk of conflict
# with existing users in the container.
try:
group_name = grp.getgrgid(safezone_st.st_gid).gr_name
except KeyError:
add_group_argv = (
"/usr/sbin/addgroup",
"-g",
str(safezone_st.st_gid),
"danger"
+ "".join(random.choices(string.ascii_lowercase + string.digits, k=24)),
)
if args.gvisor_debug:
print(
"Creating new group:",
" ".join(shlex.quote(s) for s in add_group_argv),
file=sys.stderr,
)
subprocess.run(add_group_argv, check=True)
group_name = grp.getgrgid(safezone_st.st_gid).gr_name
try:
user_name = pwd.getpwuid(safezone_st.st_uid).pw_name
except KeyError:
add_user_argv = (
"/usr/sbin/adduser",
"-u",
str(safezone_st.st_uid),
"-s",
"/bin/true",
"-G",
group_name,
"-D",
"-H",
"danger"
+ "".join(random.choices(string.ascii_lowercase + string.digits, k=24)),
)
if args.gvisor_debug:
print(
"Creating new user:",
" ".join(shlex.quote(s) for s in add_user_argv),
file=sys.stderr,
)
subprocess.run(add_user_argv, check=True)
user_name = pwd.getpwuid(safezone_st.st_uid).pw_name
user_and_group = "%s:%s" % (user_name, group_name)
# Align permissions of rootfs and runsc state directory to the user we will
# run it as:
chown_argv = (
"/bin/chown",
"-R",
user_and_group,
"/var/run/runsc",
"/wrapped-safezone",
"/dangerzone-image",
)
if args.gvisor_debug:
print(
"Setting permissions to sandbox user:",
" ".join(shlex.quote(s) for s in add_group_argv),
file=sys.stderr,
)
subprocess.run(chown_argv, check=True)

# Switch to target user.
su_exec_argv = ("su-exec", user_and_group) + tuple(sys.argv)
if args.gvisor_debug:
print(
"Re-executing as",
user_and_group,
"->",
" ".join(shlex.quote(s) for s in su_exec_argv),
file=sys.stderr,
)
try:
os.execv("/sbin/su-exec", su_exec_argv)
except Exception as e:
raise e.__class__("su-exec %s failed: %s" % (sys.argv, e))
else:
assert False, "This code should never be reachable"

if my_uid != 0:
# If we are not UID 0, create a user namespace where we are mapped to it.
if args.gvisor_debug:
print(
"Current UID/GID is %d:%d; creating new user namespace..."
% (my_uid, my_gid),
file=sys.stderr,
)
os.unshare(os.CLONE_NEWUSER) # type: ignore[attr-defined]
with os.fdopen(
os.open("/proc/self/setgroups", flags=os.O_WRONLY), "wt"
) as setgroups_fd:
setgroups_fd.write("deny")
with os.fdopen(
os.open("/proc/self/uid_map", flags=os.O_WRONLY), "wt"
) as uid_map_fd:
uid_map_fd.write("0 %d 1" % (my_uid,))
with os.fdopen(
os.open("/proc/self/gid_map", flags=os.O_WRONLY), "wt"
) as gid_map_fd:
gid_map_fd.write("0 %d 1" % (my_gid,))
# Re-exec.
if args.gvisor_debug:
print("Re-execing:", " ".join(shlex.quote(s) for s in sys.argv))
try:
os.execvp(sys.argv[0], sys.argv)
except Exception as e:
raise e.__class__("Re-execing %s failed: %s" % (sys.argv, e))
else:
assert False, "This code should never be reachable"
# Find the UID/GID of who we should run as within the sandbox.
sandboxed_uid = int(subprocess.check_output((
'chroot',
'/dangerzone-image/rootfs',
'id', '-u', 'dangerzone',
)))
assert sandboxed_uid != 0, 'Unexpectedly read 0 as the sandboxed dangerzone UID'
sandboxed_gid = int(subprocess.check_output((
'chroot',
'/dangerzone-image/rootfs',
'id', '-g', 'dangerzone',
)))
assert sandboxed_gid != 0, 'Unexpectedly read 0 as the sandboxed dangerzone GID'

# By this point, we are running as the same user that owns /safezone and
# that user is mapped to UID 0 in a dedicated user namespace.
# Wrap the command with `su-exec` to execute as the intended in-sandbox
# UID/GID, and execute `su-exec` as root. This requires the sandbox's initial
# process to have the CAP_SETUID and CAP_SETGID capabilities, but these are
# not inherited after exec.
# This can all be removed and simplified once gvisor.dev/issue/9918 is fixed.
gvisor_issue_9918_is_fixed = False
sandbox_capabilities = []
if not gvisor_issue_9918_is_fixed and not args.pre_gvisor:
wrapped_command = ['su-exec', '%d:%d' % (sandboxed_uid, sandboxed_gid)] + wrapped_command
sandboxed_uid = 0
sandboxed_gid = 0
sandbox_capabilities = ["CAP_SETUID", "CAP_SETGID"]

# Build and write container OCI config.
oci_command = wrapped_command
if not args.pre_sandboxed_entrypoint:
oci_command = ["/sandboxed_entrypoint.sh"] + oci_command

oci_config: dict[str, typing.Any] = {
"ociVersion": "1.0.0",
"process": {
"user": {"uid": 0, "gid": 0},
"args": oci_command,
"user": {
"uid": sandboxed_uid,
"gid": sandboxed_gid,
},
"args": wrapped_command,
"env": [
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"PYTHONPATH=/opt/dangerzone",
"TERM=xterm",
],
"cwd": "/",
"capabilities": {
# See the long comment above as to why this is needed.
# CAP_CHOWN is needed to chown the safezone files back and forth.
# CAP_SETUID and CAP_SETGID are required to switch to the
# unprivileged user.
"bounding": ["CAP_CHOWN", "CAP_SETUID", "CAP_SETGID"],
"effective": ["CAP_CHOWN", "CAP_SETUID", "CAP_SETGID"],
"inheritable": ["CAP_CHOWN", "CAP_SETUID", "CAP_SETGID"],
"permitted": ["CAP_CHOWN", "CAP_SETUID", "CAP_SETGID"],
"bounding": [],
"effective": sandbox_capabilities,
"inheritable": [],
"permitted": sandbox_capabilities,
},
"rlimits": [
{"type": "RLIMIT_NOFILE", "hard": 4096, "soft": 4096},
Expand All @@ -340,21 +146,6 @@ def unshare_monkeypatch(flags: int) -> None:
"type": "proc",
"source": "proc",
},
# /safezone is a tmpfs which will be owned by the unprivileged user
# which lives only in the sandbox. See comment above.
{
"destination": "/safezone",
"type": "tmpfs",
"source": "tmpfs",
"options": ["nosuid", "noexec", "nodev"],
},
# /host-safezone is where the host's /safezone is actually mounted.
{
"destination": "/host-safezone",
"type": "none",
"source": "/safezone",
"options": ["bind", "nosuid", "noexec", "nodev", "rw"],
},
{
"destination": "/dev",
"type": "tmpfs",
Expand Down Expand Up @@ -408,7 +199,7 @@ def unshare_monkeypatch(flags: int) -> None:
continue
oci_config["process"]["env"].append("%s=%s" % (key, val))
if args.gvisor_debug:
print("Command inside gVisor sandbox:", oci_command, file=sys.stderr)
print("Command inside gVisor sandbox:", wrapped_command, file=sys.stderr)
print("OCI config:", file=sys.stderr)
json.dump(oci_config, sys.stderr, indent=2, sort_keys=True)
# json.dump doesn't print a trailing newline, so print one here:
Expand Down Expand Up @@ -450,9 +241,7 @@ def unshare_monkeypatch(flags: int) -> None:

if args.gvisor_debug:
print(
"Running",
runsc_binary,
"with command line:",
"Running gVisor with command line:",
" ".join(shlex.quote(s) for s in runsc_argv),
file=sys.stderr,
)
Expand Down
Loading

0 comments on commit 652a4b0

Please sign in to comment.