Skip to content

Commit

Permalink
Remove zpl_revalidate: fix snapshot rollback
Browse files Browse the repository at this point in the history
Open files, which aren't present in the snapshot, which is being
roll-backed to, need to disappear from the visible VFS image of
the dataset.

Kernel provides d_drop function to drop invalid entry from
the dcache, but inode can be referenced by dentry multiple dentries.

The introduced zpl_d_drop_aliases function walks and invalidates
all aliases of an inode.

Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Pavel Snajdr <snajpa@snajpa.net>
Closes openzfs#9600
Closes openzfs#14070
  • Loading branch information
snajpa authored and andrewc12 committed Oct 29, 2022
1 parent 7082d60 commit 0c6ec62
Show file tree
Hide file tree
Showing 10 changed files with 59 additions and 56 deletions.
30 changes: 30 additions & 0 deletions config/kernel-dentry-alias.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
dnl #
dnl # 3.18 API change
dnl # Dentry aliases are in d_u struct dentry member
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U], [
ZFS_LINUX_TEST_SRC([dentry_alias_d_u], [
#include <linux/fs.h>
#include <linux/dcache.h>
#include <linux/list.h>
], [
struct inode *inode __attribute__ ((unused)) = NULL;
struct dentry *dentry __attribute__ ((unused)) = NULL;
hlist_for_each_entry(dentry, &inode->i_dentry,
d_u.d_alias) {
d_drop(dentry);
}
])
])

AC_DEFUN([ZFS_AC_KERNEL_DENTRY_ALIAS_D_U], [
AC_MSG_CHECKING([whether dentry aliases are in d_u member])
ZFS_LINUX_TEST_RESULT([dentry_alias_d_u], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_DENTRY_D_U_ALIASES, 1,
[dentry aliases are in d_u member])
],[
AC_MSG_RESULT(no)
])
])

2 changes: 2 additions & 0 deletions config/kernel.m4
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_SETATTR_PREPARE
ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED
ZFS_AC_KERNEL_SRC_DENTRY
ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U
ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE
ZFS_AC_KERNEL_SRC_SECURITY_INODE
ZFS_AC_KERNEL_SRC_FST_MOUNT
Expand Down Expand Up @@ -217,6 +218,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_SETATTR_PREPARE
ZFS_AC_KERNEL_INSERT_INODE_LOCKED
ZFS_AC_KERNEL_DENTRY
ZFS_AC_KERNEL_DENTRY_ALIAS_D_U
ZFS_AC_KERNEL_TRUNCATE_SETSIZE
ZFS_AC_KERNEL_SECURITY_INODE
ZFS_AC_KERNEL_FST_MOUNT
Expand Down
21 changes: 21 additions & 0 deletions include/os/linux/kernel/linux/dcache_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,25 @@ d_clear_d_op(struct dentry *dentry)
DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE);
}

/*
* Walk and invalidate all dentry aliases of an inode
* unless it's a mountpoint
*/
static inline void
zpl_d_drop_aliases(struct inode *inode)
{
struct dentry *dentry;
spin_lock(&inode->i_lock);
#ifdef HAVE_DENTRY_D_U_ALIASES
hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
#else
hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
#endif
if (!IS_ROOT(dentry) && !d_mountpoint(dentry) &&
(dentry->d_inode == inode)) {
d_drop(dentry);
}
}
spin_unlock(&inode->i_lock);
}
#endif /* _ZFS_DCACHE_H */
9 changes: 3 additions & 6 deletions include/os/linux/zfs/sys/trace_acl.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__field(boolean_t, z_is_sa)
__field(boolean_t, z_is_mapped)
__field(boolean_t, z_is_ctldir)
__field(boolean_t, z_is_stale)

__field(uint32_t, i_uid)
__field(uint32_t, i_gid)
Expand Down Expand Up @@ -99,7 +98,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__entry->z_is_sa = zn->z_is_sa;
__entry->z_is_mapped = zn->z_is_mapped;
__entry->z_is_ctldir = zn->z_is_ctldir;
__entry->z_is_stale = zn->z_is_stale;

__entry->i_uid = KUID_TO_SUID(ZTOI(zn)->i_uid);
__entry->i_gid = KGID_TO_SGID(ZTOI(zn)->i_gid);
Expand All @@ -121,9 +119,8 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
"zn_prefetch %u blksz %u seq %u "
"mapcnt %llu size %llu pflags %llu "
"sync_cnt %u sync_writes_cnt %u async_writes_cnt %u "
"mode 0x%x is_sa %d is_mapped %d "
"is_ctldir %d is_stale %d inode { "
"uid %u gid %u ino %lu nlink %u size %lli "
"mode 0x%x is_sa %d is_mapped %d is_ctldir %d "
"inode { uid %u gid %u ino %lu nlink %u size %lli "
"blkbits %u bytes %u mode 0x%x generation %x } } "
"ace { type %u flags %u access_mask %u } mask_matched %u",
__entry->z_id, __entry->z_unlinked, __entry->z_atime_dirty,
Expand All @@ -132,7 +129,7 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__entry->z_pflags, __entry->z_sync_cnt,
__entry->z_sync_writes_cnt, __entry->z_async_writes_cnt,
__entry->z_mode, __entry->z_is_sa, __entry->z_is_mapped,
__entry->z_is_ctldir, __entry->z_is_stale, __entry->i_uid,
__entry->z_is_ctldir, __entry->i_uid,
__entry->i_gid, __entry->i_ino, __entry->i_nlink,
__entry->i_size, __entry->i_blkbits,
__entry->i_bytes, __entry->i_mode, __entry->i_generation,
Expand Down
3 changes: 2 additions & 1 deletion include/os/linux/zfs/sys/zpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ extern const struct inode_operations zpl_inode_operations;
extern const struct inode_operations zpl_dir_inode_operations;
extern const struct inode_operations zpl_symlink_inode_operations;
extern const struct inode_operations zpl_special_inode_operations;
extern dentry_operations_t zpl_dentry_operations;

/* zpl_file.c */
extern const struct address_space_operations zpl_address_space_operations;
extern const struct file_operations zpl_file_operations;
extern const struct file_operations zpl_dir_file_operations;
Expand Down
1 change: 0 additions & 1 deletion include/sys/zfs_znode.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,6 @@ typedef struct znode {
boolean_t z_is_sa; /* are we native sa? */
boolean_t z_is_mapped; /* are we mmap'ed */
boolean_t z_is_ctldir; /* are we .zfs entry */
boolean_t z_is_stale; /* are we stale due to rollback? */
boolean_t z_suspended; /* extra ref from a suspend? */
uint_t z_blksz; /* block size in bytes */
uint_t z_seq; /* modification sequence number */
Expand Down
1 change: 0 additions & 1 deletion module/os/linux/zfs/zfs_ctldir.c
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,6 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
zp->z_is_sa = B_FALSE;
zp->z_is_mapped = B_FALSE;
zp->z_is_ctldir = B_TRUE;
zp->z_is_stale = B_FALSE;
zp->z_sa_hdl = NULL;
zp->z_blksz = 0;
zp->z_seq = 0;
Expand Down
3 changes: 1 addition & 2 deletions module/os/linux/zfs/zfs_vfsops.c
Original file line number Diff line number Diff line change
Expand Up @@ -1522,7 +1522,6 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
sb->s_op = &zpl_super_operations;
sb->s_xattr = zpl_xattr_handlers;
sb->s_export_op = &zpl_export_operations;
sb->s_d_op = &zpl_dentry_operations;

/* Set features for file system. */
zfs_set_fuid_feature(zfsvfs);
Expand Down Expand Up @@ -1881,8 +1880,8 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
zp = list_next(&zfsvfs->z_all_znodes, zp)) {
err2 = zfs_rezget(zp);
if (err2) {
zpl_d_drop_aliases(ZTOI(zp));
remove_inode_hash(ZTOI(zp));
zp->z_is_stale = B_TRUE;
}

/* see comment in zfs_suspend_fs() */
Expand Down
1 change: 0 additions & 1 deletion module/os/linux/zfs/zfs_znode.c
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,6 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
zp->z_atime_dirty = B_FALSE;
zp->z_is_mapped = B_FALSE;
zp->z_is_ctldir = B_FALSE;
zp->z_is_stale = B_FALSE;
zp->z_suspended = B_FALSE;
zp->z_sa_hdl = NULL;
zp->z_mapcnt = 0;
Expand Down
44 changes: 0 additions & 44 deletions module/os/linux/zfs/zpl_inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -728,46 +728,6 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
return (error);
}

static int
#ifdef HAVE_D_REVALIDATE_NAMEIDATA
zpl_revalidate(struct dentry *dentry, struct nameidata *nd)
{
unsigned int flags = (nd ? nd->flags : 0);
#else
zpl_revalidate(struct dentry *dentry, unsigned int flags)
{
#endif /* HAVE_D_REVALIDATE_NAMEIDATA */
/* CSTYLED */
zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
int error;

if (flags & LOOKUP_RCU)
return (-ECHILD);

/*
* After a rollback negative dentries created before the rollback
* time must be invalidated. Otherwise they can obscure files which
* are only present in the rolled back dataset.
*/
if (dentry->d_inode == NULL) {
spin_lock(&dentry->d_lock);
error = time_before(dentry->d_time, zfsvfs->z_rollback_time);
spin_unlock(&dentry->d_lock);

if (error)
return (0);
}

/*
* The dentry may reference a stale inode if a mounted file system
* was rolled back to a point in time where the object didn't exist.
*/
if (dentry->d_inode && ITOZ(dentry->d_inode)->z_is_stale)
return (0);

return (1);
}

const struct inode_operations zpl_inode_operations = {
.setattr = zpl_setattr,
.getattr = zpl_getattr,
Expand Down Expand Up @@ -856,7 +816,3 @@ const struct inode_operations zpl_special_inode_operations = {
.get_acl = zpl_get_acl,
#endif /* CONFIG_FS_POSIX_ACL */
};

dentry_operations_t zpl_dentry_operations = {
.d_revalidate = zpl_revalidate,
};

0 comments on commit 0c6ec62

Please sign in to comment.