Skip to content

Commit

Permalink
Fix clearing set-uid and set-gid bits on a file when replying a write
Browse files Browse the repository at this point in the history
POSIX requires that set-uid and set-gid bits to be removed when an
unprivileged user writes to a file and ZFS does that during normal
operation.

The problem arrises when the write is stored in the ZIL and replayed.
During replay we have no access to original credentials of the process
doing the write, so zfs_write() will be performed with the root
credentials. When root is doing the write set-uid and set-gid bits
are not removed from the file.

To correct that, log a separate TX_SETATTR entry that removed those bits
on first write to such file.

Idea from:	Christian Schwarz

Add test for ZIL replay of setuid/setgid clearing.

Improve various edge cases when clearing setid bits:
- The setid bits can be readded during a single write, so make sure to check
  for them on every chunk write.
- Log TX_SETATTR record at most once per transaction group (if the setid bits
  are keep coming back).
- Move zfs_log_setattr() outside of zp->z_acl_lock.

Reviewed-by: Dan McDonald <danmcd@joyent.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Co-authored-by: Christian Schwarz <me@cschwarz.com>
Signed-off-by: Pawel Jakub Dawidek <pawel@dawidek.net>
Closes openzfs#13027
  • Loading branch information
pjd authored and snajpa committed Oct 22, 2022
1 parent ff1e6bb commit 9df2f6f
Show file tree
Hide file tree
Showing 9 changed files with 266 additions and 105 deletions.
111 changes: 86 additions & 25 deletions module/zfs/zfs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,62 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
return (error);
}

static void
zfs_clear_setid_bits_if_necessary(zfsvfs_t *zfsvfs, znode_t *zp, cred_t *cr,
uint64_t *clear_setid_bits_txgp, dmu_tx_t *tx)
{
zilog_t *zilog = zfsvfs->z_log;
const uint64_t uid = KUID_TO_SUID(ZTOUID(zp));

ASSERT(clear_setid_bits_txgp != NULL);
ASSERT(tx != NULL);

/*
* Clear Set-UID/Set-GID bits on successful write if not
* privileged and at least one of the execute bits is set.
*
* It would be nice to do this after all writes have
* been done, but that would still expose the ISUID/ISGID
* to another app after the partial write is committed.
*
* Note: we don't call zfs_fuid_map_id() here because
* user 0 is not an ephemeral uid.
*/
mutex_enter(&zp->z_acl_lock);
if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | (S_IXUSR >> 6))) != 0 &&
(zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
secpolicy_vnode_setid_retain(zp, cr,
((zp->z_mode & S_ISUID) != 0 && uid == 0)) != 0) {
uint64_t newmode;

zp->z_mode &= ~(S_ISUID | S_ISGID);
newmode = zp->z_mode;
(void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
(void *)&newmode, sizeof (uint64_t), tx);

mutex_exit(&zp->z_acl_lock);

/*
* Make sure SUID/SGID bits will be removed when we replay the
* log. If the setid bits are keep coming back, don't log more
* than one TX_SETATTR per transaction group.
*/
if (*clear_setid_bits_txgp != dmu_tx_get_txg(tx)) {
vattr_t va;

bzero(&va, sizeof (va));
va.va_mask = AT_MODE;
va.va_nodeid = zp->z_id;
va.va_mode = newmode;
zfs_log_setattr(zilog, tx, TX_SETATTR, zp, &va, AT_MODE,
NULL);
*clear_setid_bits_txgp = dmu_tx_get_txg(tx);
}
} else {
mutex_exit(&zp->z_acl_lock);
}
}

/*
* Write the bytes to a file.
*
Expand All @@ -326,6 +382,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
{
int error = 0;
ssize_t start_resid = zfs_uio_resid(uio);
uint64_t clear_setid_bits_txg = 0;

/*
* Fasttrack empty write
Expand Down Expand Up @@ -504,6 +561,11 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
break;
}

/*
* NB: We must call zfs_clear_setid_bits_if_necessary before
* committing the transaction!
*/

/*
* If rangelock_enter() over-locked we grow the blocksize
* and then reduce the lock range. This will only happen
Expand Down Expand Up @@ -545,6 +607,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
zfs_uio_fault_disable(uio, B_FALSE);
#ifdef __linux__
if (error == EFAULT) {
zfs_clear_setid_bits_if_necessary(zfsvfs, zp,
cr, &clear_setid_bits_txg, tx);
dmu_tx_commit(tx);
/*
* Account for partial writes before
Expand All @@ -562,7 +626,13 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
continue;
}
#endif
if (error != 0) {
/*
* On FreeBSD, EFAULT should be propagated back to the
* VFS, which will handle faulting and will retry.
*/
if (error != 0 && error != EFAULT) {
zfs_clear_setid_bits_if_necessary(zfsvfs, zp,
cr, &clear_setid_bits_txg, tx);
dmu_tx_commit(tx);
break;
}
Expand All @@ -587,6 +657,13 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
error = dmu_assign_arcbuf_by_dbuf(
sa_get_db(zp->z_sa_hdl), woff, abuf, tx);
if (error != 0) {
/*
* XXX This might not be necessary if
* dmu_assign_arcbuf_by_dbuf is guaranteed
* to be atomic.
*/
zfs_clear_setid_bits_if_necessary(zfsvfs, zp,
cr, &clear_setid_bits_txg, tx);
dmu_return_arcbuf(abuf);
dmu_tx_commit(tx);
break;
Expand All @@ -612,30 +689,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
break;
}

/*
* Clear Set-UID/Set-GID bits on successful write if not
* privileged and at least one of the execute bits is set.
*
* It would be nice to do this after all writes have
* been done, but that would still expose the ISUID/ISGID
* to another app after the partial write is committed.
*
* Note: we don't call zfs_fuid_map_id() here because
* user 0 is not an ephemeral uid.
*/
mutex_enter(&zp->z_acl_lock);
if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) |
(S_IXUSR >> 6))) != 0 &&
(zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
secpolicy_vnode_setid_retain(zp, cr,
((zp->z_mode & S_ISUID) != 0 && uid == 0)) != 0) {
uint64_t newmode;
zp->z_mode &= ~(S_ISUID | S_ISGID);
newmode = zp->z_mode;
(void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
(void *)&newmode, sizeof (uint64_t), tx);
}
mutex_exit(&zp->z_acl_lock);
zfs_clear_setid_bits_if_necessary(zfsvfs, zp, cr,
&clear_setid_bits_txg, tx);

zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);

Expand All @@ -658,8 +713,14 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)

error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);

/*
* NB: During replay, the TX_SETATTR record logged by
* zfs_clear_setid_bits_if_necessary must precede any of
* the TX_WRITE records logged here.
*/
zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag,
NULL, NULL);

dmu_tx_commit(tx);

if (error != 0)
Expand Down
2 changes: 1 addition & 1 deletion tests/runfiles/common.run
Original file line number Diff line number Diff line change
Expand Up @@ -872,7 +872,7 @@ tags = ['functional', 'stat']

[tests/functional/suid]
tests = ['suid_write_to_suid', 'suid_write_to_sgid', 'suid_write_to_suid_sgid',
'suid_write_to_none']
'suid_write_to_none', 'suid_write_zil_replay']
tags = ['functional', 'suid']

[tests/functional/threadsappend]
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/functional/suid/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dist_pkgdata_SCRIPTS = \
suid_write_to_sgid.ksh \
suid_write_to_suid_sgid.ksh \
suid_write_to_none.ksh \
suid_write_zil_replay.ksh \
cleanup.ksh \
setup.ksh

Expand Down
150 changes: 75 additions & 75 deletions tests/zfs-tests/tests/functional/suid/suid_write_to_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,86 +29,16 @@
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>

static void
test_stat_mode(mode_t extra)
{
struct stat st;
int i, fd;
char fpath[1024];
char *penv[] = {"TESTDIR", "TESTFILE0"};
char buf[] = "test";
mode_t res;
mode_t mode = 0777 | extra;

/*
* Get the environment variable values.
*/
for (i = 0; i < sizeof (penv) / sizeof (char *); i++) {
if ((penv[i] = getenv(penv[i])) == NULL) {
fprintf(stderr, "getenv(penv[%d])\n", i);
exit(1);
}
}

umask(0);
if (stat(penv[0], &st) == -1 && mkdir(penv[0], mode) == -1) {
perror("mkdir");
exit(2);
}

snprintf(fpath, sizeof (fpath), "%s/%s", penv[0], penv[1]);
unlink(fpath);
if (stat(fpath, &st) == 0) {
fprintf(stderr, "%s exists\n", fpath);
exit(3);
}

fd = creat(fpath, mode);
if (fd == -1) {
perror("creat");
exit(4);
}
close(fd);

if (setuid(65534) == -1) {
perror("setuid");
exit(5);
}

fd = open(fpath, O_RDWR);
if (fd == -1) {
perror("open");
exit(6);
}

if (write(fd, buf, sizeof (buf)) == -1) {
perror("write");
exit(7);
}
close(fd);

if (stat(fpath, &st) == -1) {
perror("stat");
exit(8);
}
unlink(fpath);

/* Verify SUID/SGID are dropped */
res = st.st_mode & (0777 | S_ISUID | S_ISGID);
if (res != (mode & 0777)) {
fprintf(stderr, "stat(2) %o\n", res);
exit(9);
}
}
#include <stdbool.h>

int
main(int argc, char *argv[])
{
const char *name;
const char *name, *phase;
mode_t extra;
struct stat st;

if (argc < 2) {
if (argc < 3) {
fprintf(stderr, "Invalid argc\n");
exit(1);
}
Expand All @@ -127,7 +57,77 @@ main(int argc, char *argv[])
exit(1);
}

test_stat_mode(extra);
const char *testdir = getenv("TESTDIR");
if (!testdir) {
fprintf(stderr, "getenv(TESTDIR)\n");
exit(1);
}

umask(0);
if (stat(testdir, &st) == -1 && mkdir(testdir, 0777) == -1) {
perror("mkdir");
exit(2);
}

char fpath[1024];
snprintf(fpath, sizeof (fpath), "%s/%s", testdir, name);


phase = argv[2];
if (strcmp(phase, "PRECRASH") == 0) {

/* clean up last run */
unlink(fpath);
if (stat(fpath, &st) == 0) {
fprintf(stderr, "%s exists\n", fpath);
exit(3);
}

int fd;

fd = creat(fpath, 0777 | extra);
if (fd == -1) {
perror("creat");
exit(4);
}
close(fd);

if (setuid(65534) == -1) {
perror("setuid");
exit(5);
}

fd = open(fpath, O_RDWR);
if (fd == -1) {
perror("open");
exit(6);
}

const char buf[] = "test";
if (write(fd, buf, sizeof (buf)) == -1) {
perror("write");
exit(7);
}
close(fd);

} else if (strcmp(phase, "REPLAY") == 0) {
/* created in PRECRASH run */
} else {
fprintf(stderr, "Invalid phase %s\n", phase);
exit(1);
}

if (stat(fpath, &st) == -1) {
perror("stat");
exit(8);
}

/* Verify SUID/SGID are dropped */
mode_t res = st.st_mode & (0777 | S_ISUID | S_ISGID);
if (res != 0777) {
fprintf(stderr, "stat(2) %o\n", res);
exit(9);
}

return (0);
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,6 @@ function cleanup
log_onexit cleanup
log_note "Verify write(2) to regular file by non-owner"

log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "NONE"
log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "NONE" "PRECRASH"

log_pass "Verify write(2) to regular file by non-owner passed"
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,6 @@ function cleanup
log_onexit cleanup
log_note "Verify write(2) to SGID file by non-owner"

log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SGID"
log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SGID" "PRECRASH"

log_pass "Verify write(2) to SGID file by non-owner passed"
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,6 @@ function cleanup
log_onexit cleanup
log_note "Verify write(2) to SUID file by non-owner"

log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID"
log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID" "PRECRASH"

log_pass "Verify write(2) to SUID file by non-owner passed"
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,6 @@ function cleanup
log_onexit cleanup
log_note "Verify write(2) to SUID/SGID file by non-owner"

log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID_SGID"
log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID_SGID" "PRECRASH"

log_pass "Verify write(2) to SUID/SGID file by non-owner passed"
Loading

0 comments on commit 9df2f6f

Please sign in to comment.