Skip to content

Commit

Permalink
zfs_rename: support RENAME_* flags
Browse files Browse the repository at this point in the history
Implement support for Linux's RENAME_* flags (for renameat2). Aside from
being quite useful for userspace (providing race-free ways to exchange
paths and implement mv --no-clobber), they are used by overlayfs and are
thus required in order to use overlayfs-on-ZFS.

Unfortunately, more work is required in order use overlayfs-on-ZFS
(namely we have to remove our .d_revalidate hook, since overlayfs
refuses to use a filesystem with d_revalidate as an upperdir).

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
  • Loading branch information
cyphar committed Apr 27, 2019
1 parent 8d6383c commit e53ab06
Show file tree
Hide file tree
Showing 8 changed files with 158 additions and 24 deletions.
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ CONTRIBUTORS:
Albert Lee <trisk@nexenta.com>
Alec Salazar <alec.j.salazar@gmail.com>
Alejandro R. Sedeño <asedeno@mit.edu>
Aleksa Sarai <cyphar@cyphar.com>
Alek Pinchuk <alek@nexenta.com>
Alex Braunegg <alex.braunegg@gmail.com>
Alex McWhirter <alexmcwhirter@triadic.us>
Expand Down
13 changes: 13 additions & 0 deletions include/linux/vfs_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,19 @@ static inline void zfs_gid_write(struct inode *ip, gid_t gid)
#define zpl_follow_up(path) follow_up(path)
#endif

/*
* 3.15 API change
*/
#ifndef RENAME_NOREPLACE
#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */
#endif
#ifndef RENAME_EXCHANGE
#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */
#endif
#ifndef RENAME_WHITEOUT
#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */
#endif

/*
* 4.9 API change
*/
Expand Down
3 changes: 2 additions & 1 deletion include/sys/zfs_znode.h
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,8 @@ extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, znode_t *zp, char *name, char *link);
extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp);
znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp,
int vfsflags);
extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, offset_t off, ssize_t len, int ioflag,
zil_callback_t callback, void *callback_data);
Expand Down
1 change: 1 addition & 0 deletions include/sys/zil.h
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ typedef struct {

typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_vfsflags; /* flags passed to renameat(2) */
uint64_t lr_sdoid; /* obj id of source directory */
uint64_t lr_tdoid; /* obj id of target directory */
/* 2 strings: names of source and destination follow this */
Expand Down
4 changes: 3 additions & 1 deletion module/zfs/zfs_log.c
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,8 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
*/
void
zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp)
znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp,
int vfsflags)
{
itx_t *itx;
lr_rename_t *lr;
Expand All @@ -478,6 +479,7 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
lr = (lr_rename_t *)&itx->itx_lr;
lr->lr_sdoid = sdzp->z_id;
lr->lr_tdoid = tdzp->z_id;
lr->lr_vfsflags = vfsflags;
bcopy(sname, (char *)(lr + 1), snamesize);
bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize);
itx->itx_oid = szp->z_id;
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/zfs_replay.c
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,7 @@ zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap)
char *tname = sname + strlen(sname) + 1;
znode_t *sdzp, *tdzp;
int error;
int vflg = 0;
int vflg = lr->lr_vfsflags;

if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
Expand Down
151 changes: 133 additions & 18 deletions module/zfs/zfs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -3658,6 +3658,13 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
int error = 0;
int zflg = 0;
boolean_t waited = B_FALSE;
boolean_t sreplace = !!(flags & RENAME_EXCHANGE);
/* Needed for whiteout inode creation. */
vattr_t wo_vap;
uint64_t wo_projid;
boolean_t fuid_dirtied;
zfs_acl_ids_t acl_ids;
boolean_t have_acl = B_FALSE;

if (snm == NULL || tnm == NULL)
return (SET_ERROR(EINVAL));
Expand Down Expand Up @@ -3829,14 +3836,14 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
error = SET_ERROR(EXDEV);
goto out;
}
wo_projid = szp->z_projid;

/*
* Must have write access at the source to remove the old entry
* and write access at the target to create the new entry.
* Note that if target and source are the same, this can be
* done in a single check.
*/

if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
goto out;

Expand All @@ -3853,15 +3860,21 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
* Does target exist?
*/
if (tzp) {
if (flags & RENAME_NOREPLACE) {
error = SET_ERROR(EEXIST);
goto out;
}
/*
* Source and target must be the same type.
* Source and target must be the same type (unless exchanging).
*/
boolean_t s_is_dir = S_ISDIR(ZTOI(szp)->i_mode) != 0;
boolean_t t_is_dir = S_ISDIR(ZTOI(tzp)->i_mode) != 0;
if (!(flags & RENAME_EXCHANGE)) {
boolean_t s_is_dir = S_ISDIR(ZTOI(szp)->i_mode) != 0;
boolean_t t_is_dir = S_ISDIR(ZTOI(tzp)->i_mode) != 0;

if (s_is_dir != t_is_dir) {
error = SET_ERROR(s_is_dir ? ENOTDIR : EISDIR);
goto out;
if (s_is_dir != t_is_dir) {
error = SET_ERROR(s_is_dir ? ENOTDIR : EISDIR);
goto out;
}
}
/*
* POSIX dictates that when the source and target
Expand All @@ -3873,11 +3886,38 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
goto out;
}
}
/* Target must exist for RENAME_EXCHANGE. */
if (!tzp && flags & RENAME_EXCHANGE) {
error = SET_ERROR(ENOENT);
goto out;
}

/* Set up inode creation for RENAME_WHITEOUT. */
if (flags & RENAME_WHITEOUT) {
error = zfs_zaccess(sdzp, ACE_ADD_FILE, 0, B_FALSE, cr);
if (error)
goto out;

zpl_vap_init(&wo_vap, ZTOI(sdzp), S_IFCHR, cr);
/* Can't use of makedevice() here, so hard-code it. */
wo_vap.va_rdev = 0;

error = zfs_acl_ids_create(sdzp, 0, &wo_vap, cr, NULL,
&acl_ids);
if (error)
goto out;
have_acl = B_TRUE;

if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, wo_projid)) {
error = SET_ERROR(EDQUOT);
goto out;
}
}

tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
dmu_tx_hold_zap(tx, sdzp->z_id, sreplace, snm);
dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
if (sdzp != tdzp) {
dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
Expand All @@ -3887,7 +3927,21 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, tzp);
}
if (flags & RENAME_WHITEOUT) {
dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
ZFS_SA_BASE_ATTR_SIZE);

dmu_tx_hold_zap(tx, sdzp->z_id, TRUE, snm);
dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
if (!zfsvfs->z_use_sa &&
acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
0, acl_ids.z_aclp->z_acl_bytes);
}
}
fuid_dirtied = zfsvfs->z_fuid_dirty;
if (fuid_dirtied)
zfs_fuid_txhold(zfsvfs, tx);
zfs_sa_upgrade_txholds(tx, szp);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
Expand Down Expand Up @@ -3936,13 +3990,30 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
* Unlink the target.
*/
if (tzp) {
error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
int tzflg = zflg;

if (flags & RENAME_EXCHANGE) {
/* This inode will be re-linked soon. */
tzflg |= ZRENAMING;

tzp->z_pflags |= ZFS_AV_MODIFIED;
if (sdzp->z_pflags & ZFS_PROJINHERIT)
tzp->z_pflags |= ZFS_PROJINHERIT;

error = sa_update(tzp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
(void *)&tzp->z_pflags, sizeof (uint64_t), tx);
ASSERT0(error);
}
error = zfs_link_destroy(tdl, tzp, tx, tzflg, NULL);
if (error)
goto commit_link_szp;
}

/*
* Create a new link at the target.
* Create the new target links:
* * We always link the target.
* * RENAME_WHITEOUT: Create a whiteout inode in-place of the source.
* * RENAME_EXCHANGE: Link the old target to the source.
*/
error = zfs_link_create(tdl, szp, tx, ZRENAMING);
if (error) {
Expand All @@ -3955,13 +4026,49 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
goto commit_link_tzp;
}

if (flags & RENAME_EXCHANGE) {
error = zfs_link_create(sdl, tzp, tx, ZRENAMING);
/*
* The same argument as zfs_link_create() failing for szp
* applies here, since the source directory must have had an
* entry we are replacing.
*/
ASSERT3U(error, ==, 0);
if (error)
goto commit_unlink_td_szp;
}
if (flags & RENAME_WHITEOUT) {
znode_t *wzp;
uint64_t txtype;

zfs_mknode(sdzp, &wo_vap, tx, cr, 0, &wzp, &acl_ids);
error = zfs_link_create(sdl, wzp, tx, ZNEW);
if (error) {
zfs_znode_delete(wzp, tx);
remove_inode_hash(ZTOI(wzp));
goto commit_unlink_td_szp;
}

txtype = zfs_log_create_txtype(Z_FILE, NULL, &wo_vap);
if (flags & FIGNORECASE)
txtype |= TX_CI;
zfs_log_create(zilog, tx, txtype, sdzp, wzp, snm,
NULL, acl_ids.z_fuidp, &wo_vap);
}

if (fuid_dirtied)
zfs_fuid_sync(zfsvfs, tx);

zfs_log_rename(zilog, tx, TX_RENAME |
(flags & FIGNORECASE ? TX_CI : 0), sdzp,
sdl->dl_name, tdzp, tdl->dl_name, szp);
sdl->dl_name, tdzp, tdl->dl_name, szp, flags);

commit:
dmu_tx_commit(tx);
out:
if (have_acl)
zfs_acl_ids_free(&acl_ids);

if (zl != NULL)
zfs_rename_unlock(&zl);

Expand Down Expand Up @@ -3992,15 +4099,23 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
* Clean-up path for broken link state.
*
* At this point we are in a (very) bad state, so we need to do our
* best to correct the state. In particular, the nlink of szp is wrong
* because we were destroying and creating links with ZRENAMING.
* best to correct the state. In particular, all of the nlinks are
* wrong because we were destroying and creating links with ZRENAMING.
*
* In some form, all of thee operations have to resolve the state:
*
* * link_destroy() *must* succeed. Fortunately, this is very likely
* since we only just created it.
*
* link_create()s are allowed to fail (though they shouldn't because we
* only just unlinked them and are putting the entries back during
* clean-up). But if they fail, we can just forcefully drop the nlink
* value to (at the very least) avoid broken nlink values -- though in
* the case of non-empty directories we will have to panic.
* * link_create()s are allowed to fail (though they shouldn't because
* we only just unlinked them and are putting the entries back
* during clean-up). But if they fail, we can just forcefully drop
* the nlink value to (at the very least) avoid broken nlink values
* -- though in the case of non-empty directories we will have to
* panic (otherwise we'd have a leaked directory with a broken ..).
*/
commit_unlink_td_szp:
VERIFY3U(zfs_link_destroy(tdl, szp, tx, ZRENAMING, NULL), ==, 0);
commit_link_tzp:
if (tzp) {
if (zfs_link_create(tdl, tzp, tx, ZRENAMING))
Expand Down
7 changes: 4 additions & 3 deletions module/zfs/zpl_inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -410,13 +410,14 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry,
int error;
fstrans_cookie_t cookie;

/* We don't have renameat2(2) support */
if (flags)
/* We only support flags which are needed for overlayfs-on-ZFS. */
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return (-EINVAL);

crhold(cr);
cookie = spl_fstrans_mark();
error = -zfs_rename(sdip, dname(sdentry), tdip, dname(tdentry), cr, 0);
error = -zfs_rename(sdip, dname(sdentry), tdip, dname(tdentry), cr,
flags);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);
Expand Down

0 comments on commit e53ab06

Please sign in to comment.