Skip to content

Commit ca5f21b

Browse files
jgunthorpeawilliam
authored andcommitted
vfio: Follow a strict lifetime for struct iommu_group
The iommu_group comes from the struct device that a driver has been bound to and then created a struct vfio_device against. To keep the iommu layer sane we want to have a simple rule that only an attached driver should be using the iommu API. Particularly only an attached driver should hold ownership. In VFIO's case since it uses the group APIs and it shares between different drivers it is a bit more complicated, but the principle still holds. Solve this by waiting for all users of the vfio_group to stop before allowing vfio_unregister_group_dev() to complete. This is done with a new completion to know when the users go away and an additional refcount to keep track of how many device drivers are sharing the vfio group. The last driver to be unregistered will clean up the group. This solves crashes in the S390 iommu driver that come because VFIO ends up racing releasing ownership (which attaches the default iommu_domain to the device) with the removal of that same device from the iommu driver. This is a side case that iommu drivers should not have to cope with. iommu driver failed to attach the default/blocking domain WARNING: CPU: 0 PID: 5082 at drivers/iommu/iommu.c:1961 iommu_detach_group+0x6c/0x80 Modules linked in: macvtap macvlan tap vfio_pci vfio_pci_core irqbypass vfio_virqfd kvm nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables nfnetlink mlx5_ib sunrpc ib_uverbs ism smc uvdevice ib_core s390_trng eadm_sch tape_3590 tape tape_class vfio_ccw mdev vfio_iommu_type1 vfio zcrypt_cex4 sch_fq_codel configfs ghash_s390 prng chacha_s390 libchacha aes_s390 mlx5_core des_s390 libdes sha3_512_s390 nvme sha3_256_s390 sha512_s390 sha256_s390 sha1_s390 sha_common nvme_core zfcp scsi_transport_fc pkey zcrypt rng_core autofs4 CPU: 0 PID: 5082 Comm: qemu-system-s39 Tainted: G W 6.0.0-rc3 ctrliq#5 Hardware name: IBM 3931 A01 782 (LPAR) Krnl PSW : 0704c00180000000 000000095bb10d28 (iommu_detach_group+0x70/0x80) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Krnl GPRS: 0000000000000001 0000000900000027 0000000000000039 000000095c97ffe0 00000000fffeffff 00000009fc290000 00000000af1fda50 00000000af590b58 00000000af1fdaf0 0000000135c7a320 0000000135e52258 0000000135e52200 00000000a29e8000 00000000af590b40 000000095bb10d24 0000038004b13c98 Krnl Code: 000000095bb10d18: c020003d56fc larl %r2,000000095c2bbb10 000000095bb10d1e: c0e50019d901 brasl %r14,000000095be4bf20 #000000095bb10d24: af000000 mc 0,0 >000000095bb10d28: b904002a lgr %r2,%r10 000000095bb10d2c: ebaff0a00004 lmg %r10,%r15,160(%r15) 000000095bb10d32: c0f4001aa867 brcl 15,000000095be65e00 000000095bb10d38: c004002168e0 brcl 0,000000095bf3def8 000000095bb10d3e: eb6ff0480024 stmg %r6,%r15,72(%r15) Call Trace: [<000000095bb10d28>] iommu_detach_group+0x70/0x80 ([<000000095bb10d24>] iommu_detach_group+0x6c/0x80) [<000003ff80243b0e>] vfio_iommu_type1_detach_group+0x136/0x6c8 [vfio_iommu_type1] [<000003ff80137780>] __vfio_group_unset_container+0x58/0x158 [vfio] [<000003ff80138a16>] vfio_group_fops_unl_ioctl+0x1b6/0x210 [vfio] pci 0004:00:00.0: Removing from iommu group 4 [<000000095b5b62e8>] __s390x_sys_ioctl+0xc0/0x100 [<000000095be5d3b4>] __do_syscall+0x1d4/0x200 [<000000095be6c072>] system_call+0x82/0xb0 Last Breaking-Event-Address: [<000000095be4bf80>] __warn_printk+0x60/0x68 It indicates that domain->ops->attach_dev() failed because the driver has already passed the point of destructing the device. Fixes: 9ac8545 ("iommu: Fix use-after-free in iommu_release_device") Reported-by: Matthew Rosato <mjrosato@linux.ibm.com> Tested-by: Matthew Rosato <mjrosato@linux.ibm.com> Reviewed-by: Yi Liu <yi.l.liu@intel.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Link: https://lore.kernel.org/r/0-v2-a3c5f4429e2a+55-iommu_group_lifetime_jgg@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
1 parent cdc71fe commit ca5f21b

File tree

2 files changed

+53
-23
lines changed

2 files changed

+53
-23
lines changed

drivers/vfio/vfio.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,15 @@ enum vfio_group_type {
4141
struct vfio_group {
4242
struct device dev;
4343
struct cdev cdev;
44+
/*
45+
* When drivers is non-zero a driver is attached to the struct device
46+
* that provided the iommu_group and thus the iommu_group is a valid
47+
* pointer. When drivers is 0 the driver is being detached. Once users
48+
* reaches 0 then the iommu_group is invalid.
49+
*/
50+
refcount_t drivers;
4451
refcount_t users;
52+
struct completion users_comp;
4553
unsigned int container_users;
4654
struct iommu_group *iommu_group;
4755
struct vfio_container *container;

drivers/vfio/vfio_main.c

Lines changed: 45 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,6 @@ static void vfio_release_device_set(struct vfio_device *device)
125125
xa_unlock(&vfio_device_set_xa);
126126
}
127127

128-
static void vfio_group_get(struct vfio_group *group);
129-
130128
/*
131129
* Group objects - create, release, get, put, search
132130
*/
@@ -137,7 +135,7 @@ __vfio_group_get_from_iommu(struct iommu_group *iommu_group)
137135

138136
list_for_each_entry(group, &vfio.group_list, vfio_next) {
139137
if (group->iommu_group == iommu_group) {
140-
vfio_group_get(group);
138+
refcount_inc(&group->drivers);
141139
return group;
142140
}
143141
}
@@ -189,6 +187,8 @@ static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
189187
group->cdev.owner = THIS_MODULE;
190188

191189
refcount_set(&group->users, 1);
190+
refcount_set(&group->drivers, 1);
191+
init_completion(&group->users_comp);
192192
init_rwsem(&group->group_rwsem);
193193
INIT_LIST_HEAD(&group->device_list);
194194
mutex_init(&group->device_lock);
@@ -247,8 +247,41 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
247247

248248
static void vfio_group_put(struct vfio_group *group)
249249
{
250-
if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock))
250+
if (refcount_dec_and_test(&group->users))
251+
complete(&group->users_comp);
252+
}
253+
254+
static void vfio_device_remove_group(struct vfio_device *device)
255+
{
256+
struct vfio_group *group = device->group;
257+
258+
if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
259+
iommu_group_remove_device(device->dev);
260+
261+
/* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */
262+
if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock))
251263
return;
264+
list_del(&group->vfio_next);
265+
266+
/*
267+
* We could concurrently probe another driver in the group that might
268+
* race vfio_device_remove_group() with vfio_get_group(), so we have to
269+
* ensure that the sysfs is all cleaned up under lock otherwise the
270+
* cdev_device_add() will fail due to the name aready existing.
271+
*/
272+
cdev_device_del(&group->cdev, &group->dev);
273+
mutex_unlock(&vfio.group_lock);
274+
275+
/* Matches the get from vfio_group_alloc() */
276+
vfio_group_put(group);
277+
278+
/*
279+
* Before we allow the last driver in the group to be unplugged the
280+
* group must be sanitized so nothing else is or can reference it. This
281+
* is because the group->iommu_group pointer should only be used so long
282+
* as a device driver is attached to a device in the group.
283+
*/
284+
wait_for_completion(&group->users_comp);
252285

253286
/*
254287
* These data structures all have paired operations that can only be
@@ -259,19 +292,11 @@ static void vfio_group_put(struct vfio_group *group)
259292
WARN_ON(!list_empty(&group->device_list));
260293
WARN_ON(group->container || group->container_users);
261294
WARN_ON(group->notifier.head);
262-
263-
list_del(&group->vfio_next);
264-
cdev_device_del(&group->cdev, &group->dev);
265-
mutex_unlock(&vfio.group_lock);
295+
group->iommu_group = NULL;
266296

267297
put_device(&group->dev);
268298
}
269299

270-
static void vfio_group_get(struct vfio_group *group)
271-
{
272-
refcount_inc(&group->users);
273-
}
274-
275300
/*
276301
* Device objects - create, release, get, put, search
277302
*/
@@ -494,6 +519,10 @@ static int __vfio_register_dev(struct vfio_device *device,
494519
struct vfio_device *existing_device;
495520
int ret;
496521

522+
/*
523+
* In all cases group is the output of one of the group allocation
524+
* functions and we have group->drivers incremented for us.
525+
*/
497526
if (IS_ERR(group))
498527
return PTR_ERR(group);
499528

@@ -533,10 +562,7 @@ static int __vfio_register_dev(struct vfio_device *device,
533562

534563
return 0;
535564
err_out:
536-
if (group->type == VFIO_NO_IOMMU ||
537-
group->type == VFIO_EMULATED_IOMMU)
538-
iommu_group_remove_device(device->dev);
539-
vfio_group_put(group);
565+
vfio_device_remove_group(device);
540566
return ret;
541567
}
542568

@@ -627,11 +653,7 @@ void vfio_unregister_group_dev(struct vfio_device *device)
627653
/* Balances device_add in register path */
628654
device_del(&device->device);
629655

630-
if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
631-
iommu_group_remove_device(device->dev);
632-
633-
/* Matches the get in vfio_register_group_dev() */
634-
vfio_group_put(group);
656+
vfio_device_remove_group(device);
635657
}
636658
EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
637659

@@ -884,7 +906,7 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
884906

885907
down_write(&group->group_rwsem);
886908

887-
/* users can be zero if this races with vfio_group_put() */
909+
/* users can be zero if this races with vfio_device_remove_group() */
888910
if (!refcount_inc_not_zero(&group->users)) {
889911
ret = -ENODEV;
890912
goto err_unlock;

0 commit comments

Comments
 (0)