Skip to content

Commit 2669b00

Browse files
Define sops->free_inode() to prevent use-after-free during lookup
On Linux, when doing path lookup with LOOKUP_RCU, dentry and inode can be dereferenced without refcounts and locks. For this reason, dentry and inode must only be freed after RCU grace period. However, zfs currently frees inode in zfs_inode_destroy synchronously and we can't use GPL-only call_rcu() in zfs directly. Fortunately, on Linux 5.2 and after, if we define sops->free_inode(), the kernel will do call_rcu() for us. This issue may be triggered more easily with init_on_free=1 boot parameter: BUG: kernel NULL pointer dereference, address: 0000000000000020 RIP: 0010:selinux_inode_permission+0x10e/0x1c0 Call Trace: ? show_trace_log_lvl+0x1be/0x2d9 ? show_trace_log_lvl+0x1be/0x2d9 ? show_trace_log_lvl+0x1be/0x2d9 ? security_inode_permission+0x37/0x60 ? __die_body.cold+0x8/0xd ? no_context+0x113/0x220 ? exc_page_fault+0x6d/0x130 ? asm_exc_page_fault+0x1e/0x30 ? selinux_inode_permission+0x10e/0x1c0 security_inode_permission+0x37/0x60 link_path_walk.part.0.constprop.0+0xb5/0x360 ? path_init+0x27d/0x3c0 path_lookupat+0x3e/0x1a0 filename_lookup+0xc0/0x1d0 ? __check_object_size.part.0+0x123/0x150 ? strncpy_from_user+0x4e/0x130 ? getname_flags.part.0+0x4b/0x1c0 vfs_statx+0x72/0x120 ? ioctl_has_perm.constprop.0.isra.0+0xbd/0x120 __do_sys_newlstat+0x39/0x70 ? __x64_sys_ioctl+0x8d/0xd0 do_syscall_64+0x30/0x40 entry_SYSCALL_64_after_hwframe+0x62/0xc7 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Rob Norris <rob.norris@klarasystems.com> Signed-off-by: Chunwei Chen <david.chen@nutanix.com> Co-authored-by: Chunwei Chen <david.chen@nutanix.com> Closes openzfs#17546
1 parent d7ab07d commit 2669b00

File tree

5 files changed

+54
-2
lines changed

5 files changed

+54
-2
lines changed

config/kernel-free-inode.m4

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
dnl #
2+
dnl # Linux 5.2 API change
3+
dnl #
4+
AC_DEFUN([ZFS_AC_KERNEL_SRC_SOPS_FREE_INODE], [
5+
ZFS_LINUX_TEST_SRC([super_operations_free_inode], [
6+
#include <linux/fs.h>
7+
8+
static void free_inode(struct inode *) { }
9+
10+
static struct super_operations sops __attribute__ ((unused)) = {
11+
.free_inode = free_inode,
12+
};
13+
],[])
14+
])
15+
16+
AC_DEFUN([ZFS_AC_KERNEL_SOPS_FREE_INODE], [
17+
AC_MSG_CHECKING([whether sops->free_inode() exists])
18+
ZFS_LINUX_TEST_RESULT([super_operations_free_inode], [
19+
AC_MSG_RESULT(yes)
20+
AC_DEFINE(HAVE_SOPS_FREE_INODE, 1, [sops->free_inode() exists])
21+
],[
22+
AC_MSG_RESULT(no)
23+
])
24+
])

config/kernel.m4

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
134134
ZFS_AC_KERNEL_SRC_PIN_USER_PAGES
135135
ZFS_AC_KERNEL_SRC_TIMER
136136
ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_WB_ERR
137+
ZFS_AC_KERNEL_SRC_SOPS_FREE_INODE
137138
case "$host_cpu" in
138139
powerpc*)
139140
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
@@ -252,6 +253,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
252253
ZFS_AC_KERNEL_PIN_USER_PAGES
253254
ZFS_AC_KERNEL_TIMER
254255
ZFS_AC_KERNEL_SUPER_BLOCK_S_WB_ERR
256+
ZFS_AC_KERNEL_SOPS_FREE_INODE
255257
case "$host_cpu" in
256258
powerpc*)
257259
ZFS_AC_KERNEL_CPU_HAS_FEATURE

include/os/linux/zfs/sys/zfs_znode_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ struct znode;
157157

158158
extern int zfs_sync(struct super_block *, int, cred_t *);
159159
extern int zfs_inode_alloc(struct super_block *, struct inode **ip);
160+
extern void zfs_inode_free(struct inode *);
160161
extern void zfs_inode_destroy(struct inode *);
161162
extern void zfs_mark_inode_dirty(struct inode *);
162163
extern boolean_t zfs_relatime_need_update(const struct inode *);

module/os/linux/zfs/zfs_znode_os.c

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,12 @@ zfs_inode_alloc(struct super_block *sb, struct inode **ip)
371371
return (0);
372372
}
373373

374+
void
375+
zfs_inode_free(struct inode *ip)
376+
{
377+
kmem_cache_free(znode_cache, ITOZ(ip));
378+
}
379+
374380
/*
375381
* Called in multiple places when an inode should be destroyed.
376382
*/
@@ -395,8 +401,15 @@ zfs_inode_destroy(struct inode *ip)
395401
nvlist_free(zp->z_xattr_cached);
396402
zp->z_xattr_cached = NULL;
397403
}
398-
399-
kmem_cache_free(znode_cache, zp);
404+
#ifndef HAVE_SOPS_FREE_INODE
405+
/*
406+
* inode needs to be freed in RCU callback. If we have
407+
* super_operations->free_inode, Linux kernel will do call_rcu
408+
* for us. But if we don't have it, since call_rcu is GPL-only
409+
* symbol, we can only free synchronously and accept the risk.
410+
*/
411+
zfs_inode_free(ip);
412+
#endif
400413
}
401414

402415
static void

module/os/linux/zfs/zpl_super.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,15 @@ zpl_inode_alloc(struct super_block *sb)
4545
return (ip);
4646
}
4747

48+
#ifdef HAVE_SOPS_FREE_INODE
49+
static void
50+
zpl_inode_free(struct inode *ip)
51+
{
52+
ASSERT(atomic_read(&ip->i_count) == 0);
53+
zfs_inode_free(ip);
54+
}
55+
#endif
56+
4857
static void
4958
zpl_inode_destroy(struct inode *ip)
5059
{
@@ -455,6 +464,9 @@ zpl_prune_sb(uint64_t nr_to_scan, void *arg)
455464

456465
const struct super_operations zpl_super_operations = {
457466
.alloc_inode = zpl_inode_alloc,
467+
#ifdef HAVE_SOPS_FREE_INODE
468+
.free_inode = zpl_inode_free,
469+
#endif
458470
.destroy_inode = zpl_inode_destroy,
459471
.dirty_inode = zpl_dirty_inode,
460472
.write_inode = NULL,

0 commit comments

Comments
 (0)