From 9df2f6ffcc5b7a3d6561bac6632bc6497a81dbe5 Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Thu, 3 Feb 2022 14:37:57 -0800 Subject: [PATCH] Fix clearing set-uid and set-gid bits on a file when replying a write POSIX requires that set-uid and set-gid bits to be removed when an unprivileged user writes to a file and ZFS does that during normal operation. The problem arrises when the write is stored in the ZIL and replayed. During replay we have no access to original credentials of the process doing the write, so zfs_write() will be performed with the root credentials. When root is doing the write set-uid and set-gid bits are not removed from the file. To correct that, log a separate TX_SETATTR entry that removed those bits on first write to such file. Idea from: Christian Schwarz Add test for ZIL replay of setuid/setgid clearing. Improve various edge cases when clearing setid bits: - The setid bits can be readded during a single write, so make sure to check for them on every chunk write. - Log TX_SETATTR record at most once per transaction group (if the setid bits are keep coming back). - Move zfs_log_setattr() outside of zp->z_acl_lock. Reviewed-by: Dan McDonald Reviewed-by: Brian Behlendorf Co-authored-by: Christian Schwarz Signed-off-by: Pawel Jakub Dawidek Closes #13027 --- module/zfs/zfs_vnops.c | 111 ++++++++++--- tests/runfiles/common.run | 2 +- .../tests/functional/suid/Makefile.am | 1 + .../functional/suid/suid_write_to_file.c | 150 +++++++++--------- .../functional/suid/suid_write_to_none.ksh | 2 +- .../functional/suid/suid_write_to_sgid.ksh | 2 +- .../functional/suid/suid_write_to_suid.ksh | 2 +- .../suid/suid_write_to_suid_sgid.ksh | 2 +- .../functional/suid/suid_write_zil_replay.ksh | 99 ++++++++++++ 9 files changed, 266 insertions(+), 105 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/suid/suid_write_zil_replay.ksh diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 9bd75c011ef9..97eacdd6ef7d 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -301,6 +301,62 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) return (error); } +static void +zfs_clear_setid_bits_if_necessary(zfsvfs_t *zfsvfs, znode_t *zp, cred_t *cr, + uint64_t *clear_setid_bits_txgp, dmu_tx_t *tx) +{ + zilog_t *zilog = zfsvfs->z_log; + const uint64_t uid = KUID_TO_SUID(ZTOUID(zp)); + + ASSERT(clear_setid_bits_txgp != NULL); + ASSERT(tx != NULL); + + /* + * Clear Set-UID/Set-GID bits on successful write if not + * privileged and at least one of the execute bits is set. + * + * It would be nice to do this after all writes have + * been done, but that would still expose the ISUID/ISGID + * to another app after the partial write is committed. + * + * Note: we don't call zfs_fuid_map_id() here because + * user 0 is not an ephemeral uid. + */ + mutex_enter(&zp->z_acl_lock); + if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | (S_IXUSR >> 6))) != 0 && + (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && + secpolicy_vnode_setid_retain(zp, cr, + ((zp->z_mode & S_ISUID) != 0 && uid == 0)) != 0) { + uint64_t newmode; + + zp->z_mode &= ~(S_ISUID | S_ISGID); + newmode = zp->z_mode; + (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), + (void *)&newmode, sizeof (uint64_t), tx); + + mutex_exit(&zp->z_acl_lock); + + /* + * Make sure SUID/SGID bits will be removed when we replay the + * log. If the setid bits are keep coming back, don't log more + * than one TX_SETATTR per transaction group. + */ + if (*clear_setid_bits_txgp != dmu_tx_get_txg(tx)) { + vattr_t va; + + bzero(&va, sizeof (va)); + va.va_mask = AT_MODE; + va.va_nodeid = zp->z_id; + va.va_mode = newmode; + zfs_log_setattr(zilog, tx, TX_SETATTR, zp, &va, AT_MODE, + NULL); + *clear_setid_bits_txgp = dmu_tx_get_txg(tx); + } + } else { + mutex_exit(&zp->z_acl_lock); + } +} + /* * Write the bytes to a file. * @@ -326,6 +382,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) { int error = 0; ssize_t start_resid = zfs_uio_resid(uio); + uint64_t clear_setid_bits_txg = 0; /* * Fasttrack empty write @@ -504,6 +561,11 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) break; } + /* + * NB: We must call zfs_clear_setid_bits_if_necessary before + * committing the transaction! + */ + /* * If rangelock_enter() over-locked we grow the blocksize * and then reduce the lock range. This will only happen @@ -545,6 +607,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) zfs_uio_fault_disable(uio, B_FALSE); #ifdef __linux__ if (error == EFAULT) { + zfs_clear_setid_bits_if_necessary(zfsvfs, zp, + cr, &clear_setid_bits_txg, tx); dmu_tx_commit(tx); /* * Account for partial writes before @@ -562,7 +626,13 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) continue; } #endif - if (error != 0) { + /* + * On FreeBSD, EFAULT should be propagated back to the + * VFS, which will handle faulting and will retry. + */ + if (error != 0 && error != EFAULT) { + zfs_clear_setid_bits_if_necessary(zfsvfs, zp, + cr, &clear_setid_bits_txg, tx); dmu_tx_commit(tx); break; } @@ -587,6 +657,13 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) error = dmu_assign_arcbuf_by_dbuf( sa_get_db(zp->z_sa_hdl), woff, abuf, tx); if (error != 0) { + /* + * XXX This might not be necessary if + * dmu_assign_arcbuf_by_dbuf is guaranteed + * to be atomic. + */ + zfs_clear_setid_bits_if_necessary(zfsvfs, zp, + cr, &clear_setid_bits_txg, tx); dmu_return_arcbuf(abuf); dmu_tx_commit(tx); break; @@ -612,30 +689,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) break; } - /* - * Clear Set-UID/Set-GID bits on successful write if not - * privileged and at least one of the execute bits is set. - * - * It would be nice to do this after all writes have - * been done, but that would still expose the ISUID/ISGID - * to another app after the partial write is committed. - * - * Note: we don't call zfs_fuid_map_id() here because - * user 0 is not an ephemeral uid. - */ - mutex_enter(&zp->z_acl_lock); - if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | - (S_IXUSR >> 6))) != 0 && - (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && - secpolicy_vnode_setid_retain(zp, cr, - ((zp->z_mode & S_ISUID) != 0 && uid == 0)) != 0) { - uint64_t newmode; - zp->z_mode &= ~(S_ISUID | S_ISGID); - newmode = zp->z_mode; - (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), - (void *)&newmode, sizeof (uint64_t), tx); - } - mutex_exit(&zp->z_acl_lock); + zfs_clear_setid_bits_if_necessary(zfsvfs, zp, cr, + &clear_setid_bits_txg, tx); zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); @@ -658,8 +713,14 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); + /* + * NB: During replay, the TX_SETATTR record logged by + * zfs_clear_setid_bits_if_necessary must precede any of + * the TX_WRITE records logged here. + */ zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag, NULL, NULL); + dmu_tx_commit(tx); if (error != 0) diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 2d75d6d1ee89..7af877ed96d0 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -872,7 +872,7 @@ tags = ['functional', 'stat'] [tests/functional/suid] tests = ['suid_write_to_suid', 'suid_write_to_sgid', 'suid_write_to_suid_sgid', - 'suid_write_to_none'] + 'suid_write_to_none', 'suid_write_zil_replay'] tags = ['functional', 'suid'] [tests/functional/threadsappend] diff --git a/tests/zfs-tests/tests/functional/suid/Makefile.am b/tests/zfs-tests/tests/functional/suid/Makefile.am index 594d2b77ca8e..0145c1205fb3 100644 --- a/tests/zfs-tests/tests/functional/suid/Makefile.am +++ b/tests/zfs-tests/tests/functional/suid/Makefile.am @@ -7,6 +7,7 @@ dist_pkgdata_SCRIPTS = \ suid_write_to_sgid.ksh \ suid_write_to_suid_sgid.ksh \ suid_write_to_none.ksh \ + suid_write_zil_replay.ksh \ cleanup.ksh \ setup.ksh diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c b/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c index 571dc553bec2..f3febb903b59 100644 --- a/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c @@ -29,86 +29,16 @@ #include #include #include - -static void -test_stat_mode(mode_t extra) -{ - struct stat st; - int i, fd; - char fpath[1024]; - char *penv[] = {"TESTDIR", "TESTFILE0"}; - char buf[] = "test"; - mode_t res; - mode_t mode = 0777 | extra; - - /* - * Get the environment variable values. - */ - for (i = 0; i < sizeof (penv) / sizeof (char *); i++) { - if ((penv[i] = getenv(penv[i])) == NULL) { - fprintf(stderr, "getenv(penv[%d])\n", i); - exit(1); - } - } - - umask(0); - if (stat(penv[0], &st) == -1 && mkdir(penv[0], mode) == -1) { - perror("mkdir"); - exit(2); - } - - snprintf(fpath, sizeof (fpath), "%s/%s", penv[0], penv[1]); - unlink(fpath); - if (stat(fpath, &st) == 0) { - fprintf(stderr, "%s exists\n", fpath); - exit(3); - } - - fd = creat(fpath, mode); - if (fd == -1) { - perror("creat"); - exit(4); - } - close(fd); - - if (setuid(65534) == -1) { - perror("setuid"); - exit(5); - } - - fd = open(fpath, O_RDWR); - if (fd == -1) { - perror("open"); - exit(6); - } - - if (write(fd, buf, sizeof (buf)) == -1) { - perror("write"); - exit(7); - } - close(fd); - - if (stat(fpath, &st) == -1) { - perror("stat"); - exit(8); - } - unlink(fpath); - - /* Verify SUID/SGID are dropped */ - res = st.st_mode & (0777 | S_ISUID | S_ISGID); - if (res != (mode & 0777)) { - fprintf(stderr, "stat(2) %o\n", res); - exit(9); - } -} +#include int main(int argc, char *argv[]) { - const char *name; + const char *name, *phase; mode_t extra; + struct stat st; - if (argc < 2) { + if (argc < 3) { fprintf(stderr, "Invalid argc\n"); exit(1); } @@ -127,7 +57,77 @@ main(int argc, char *argv[]) exit(1); } - test_stat_mode(extra); + const char *testdir = getenv("TESTDIR"); + if (!testdir) { + fprintf(stderr, "getenv(TESTDIR)\n"); + exit(1); + } + + umask(0); + if (stat(testdir, &st) == -1 && mkdir(testdir, 0777) == -1) { + perror("mkdir"); + exit(2); + } + + char fpath[1024]; + snprintf(fpath, sizeof (fpath), "%s/%s", testdir, name); + + + phase = argv[2]; + if (strcmp(phase, "PRECRASH") == 0) { + + /* clean up last run */ + unlink(fpath); + if (stat(fpath, &st) == 0) { + fprintf(stderr, "%s exists\n", fpath); + exit(3); + } + + int fd; + + fd = creat(fpath, 0777 | extra); + if (fd == -1) { + perror("creat"); + exit(4); + } + close(fd); + + if (setuid(65534) == -1) { + perror("setuid"); + exit(5); + } + + fd = open(fpath, O_RDWR); + if (fd == -1) { + perror("open"); + exit(6); + } + + const char buf[] = "test"; + if (write(fd, buf, sizeof (buf)) == -1) { + perror("write"); + exit(7); + } + close(fd); + + } else if (strcmp(phase, "REPLAY") == 0) { + /* created in PRECRASH run */ + } else { + fprintf(stderr, "Invalid phase %s\n", phase); + exit(1); + } + + if (stat(fpath, &st) == -1) { + perror("stat"); + exit(8); + } + + /* Verify SUID/SGID are dropped */ + mode_t res = st.st_mode & (0777 | S_ISUID | S_ISGID); + if (res != 0777) { + fprintf(stderr, "stat(2) %o\n", res); + exit(9); + } return (0); } diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh index dd01978619f9..470350f960cf 100755 --- a/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh @@ -47,6 +47,6 @@ function cleanup log_onexit cleanup log_note "Verify write(2) to regular file by non-owner" -log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "NONE" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "NONE" "PRECRASH" log_pass "Verify write(2) to regular file by non-owner passed" diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh index 49ae2bd1b31e..3c95a402658e 100755 --- a/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh @@ -47,6 +47,6 @@ function cleanup log_onexit cleanup log_note "Verify write(2) to SGID file by non-owner" -log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SGID" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SGID" "PRECRASH" log_pass "Verify write(2) to SGID file by non-owner passed" diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh index 3983aad2e51d..4183cbeefc20 100755 --- a/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh @@ -47,6 +47,6 @@ function cleanup log_onexit cleanup log_note "Verify write(2) to SUID file by non-owner" -log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID" "PRECRASH" log_pass "Verify write(2) to SUID file by non-owner passed" diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh index a058c7e7d4bc..f7a08a55fc4b 100755 --- a/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh @@ -47,6 +47,6 @@ function cleanup log_onexit cleanup log_note "Verify write(2) to SUID/SGID file by non-owner" -log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID_SGID" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID_SGID" "PRECRASH" log_pass "Verify write(2) to SUID/SGID file by non-owner passed" diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_zil_replay.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_zil_replay.ksh new file mode 100755 index 000000000000..81f431f6b68b --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/suid_write_zil_replay.ksh @@ -0,0 +1,99 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/tests/functional/slog/slog.kshlib + +verify_runnable "global" + +function cleanup_fs +{ + cleanup +} + +log_assert "Verify ZIL replay results in correct SUID/SGID bits for unprivileged write to SUID/SGID files" +log_onexit cleanup_fs +log_must setup + +# +# 1. Create a file system (TESTFS) +# +log_must zpool destroy "$TESTPOOL" +log_must zpool create $TESTPOOL $VDEV log mirror $LDEV +log_must zfs set compression=on $TESTPOOL +log_must zfs create -o mountpoint="$TESTDIR" $TESTPOOL/$TESTFS + +# Make all the writes from suid_write_to_file.c sync +log_must zfs set sync=always "$TESTPOOL/$TESTFS" + +# +# This dd command works around an issue where ZIL records aren't created +# after freezing the pool unless a ZIL header already exists. Create a file +# synchronously to force ZFS to write one out. +# +log_must dd if=/dev/zero of=$TESTDIR/sync \ + conv=fdatasync,fsync bs=1 count=1 + +# +# 2. Freeze TESTFS +# +log_must zpool freeze $TESTPOOL + +# +# 3. Unprivileged write to a setuid file +# +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "NONE" "PRECRASH" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID" "PRECRASH" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SGID" "PRECRASH" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID_SGID" "PRECRASH" + +# +# 4. Unmount filesystem and export the pool +# +# At this stage TESTFS is empty again and frozen, the intent log contains +# a complete set of deltas to replay. +# +log_must zfs unmount $TESTPOOL/$TESTFS + +log_note "List transactions to replay:" +log_must zdb -iv $TESTPOOL/$TESTFS + +log_must zpool export $TESTPOOL + +# +# 5. Remount TESTFS +# +# Import the pool to unfreeze it and claim log blocks. It has to be +# `zpool import -f` because we can't write a frozen pool's labels! +# +log_must zpool import -f -d $VDIR $TESTPOOL + +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "NONE" "REPLAY" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID" "REPLAY" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SGID" "REPLAY" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID_SGID" "REPLAY" + +log_pass