Skip to content

Commit

Permalink
RDMA/mlx5: Fix implicit ODP use after free
Browse files Browse the repository at this point in the history
Prevent double queueing of implicit ODP mr destroy work by using
__xa_cmpxchg() to make sure this is the only time we are destroying this
specific mr.

Without this change, we could try to invalidate this mr twice, which in
turn could result in queuing a MR work destroy twice, and eventually the
second work could execute after the MR was freed due to the first work,
causing a user after free and trace below.

   refcount_t: underflow; use-after-free.
   WARNING: CPU: 2 PID: 12178 at lib/refcount.c:28 refcount_warn_saturate+0x12b/0x130
   Modules linked in: bonding ib_ipoib vfio_pci ip_gre geneve nf_tables ip6_gre gre ip6_tunnel tunnel6 ipip tunnel4 ib_umad rdma_ucm mlx5_vfio_pci vfio_pci_core vfio_iommu_type1 mlx5_ib vfio ib_uverbs mlx5_core iptable_raw openvswitch nsh rpcrdma ib_iser libiscsi scsi_transport_iscsi rdma_cm iw_cm ib_cm ib_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay zram zsmalloc fuse [last unloaded: ib_uverbs]
   CPU: 2 PID: 12178 Comm: kworker/u20:5 Not tainted 6.5.0-rc1_net_next_mlx5_58c644e #1
   Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
   Workqueue: events_unbound free_implicit_child_mr_work [mlx5_ib]
   RIP: 0010:refcount_warn_saturate+0x12b/0x130
   Code: 48 c7 c7 38 95 2a 82 c6 05 bc c6 fe 00 01 e8 0c 66 aa ff 0f 0b 5b c3 48 c7 c7 e0 94 2a 82 c6 05 a7 c6 fe 00 01 e8 f5 65 aa ff <0f> 0b 5b c3 90 8b 07 3d 00 00 00 c0 74 12 83 f8 01 74 13 8d 50 ff
   RSP: 0018:ffff8881008e3e40 EFLAGS: 00010286
   RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000027
   RDX: ffff88852c91b5c8 RSI: 0000000000000001 RDI: ffff88852c91b5c0
   RBP: ffff8881dacd4e00 R08: 00000000ffffffff R09: 0000000000000019
   R10: 000000000000072e R11: 0000000063666572 R12: ffff88812bfd9e00
   R13: ffff8881c792d200 R14: ffff88810011c005 R15: ffff8881002099c0
   FS:  0000000000000000(0000) GS:ffff88852c900000(0000) knlGS:0000000000000000
   CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
   CR2: 00007f5694b5e000 CR3: 00000001153f6003 CR4: 0000000000370ea0
   DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
   DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
   Call Trace:
    <TASK>
    ? refcount_warn_saturate+0x12b/0x130
    free_implicit_child_mr_work+0x180/0x1b0 [mlx5_ib]
    process_one_work+0x1cc/0x3c0
    worker_thread+0x218/0x3c0
    kthread+0xc6/0xf0
    ret_from_fork+0x1f/0x30
    </TASK>

Fixes: 5256edc ("RDMA/mlx5: Rework implicit ODP destroy")
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/r/c96b8645a81085abff739e6b06e286a350d1283d.1737274283.git.leon@kernel.org
Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
  • Loading branch information
PatrisiousHaddad authored and jgunthorpe committed Jan 21, 2025
1 parent abb604a commit d3d9304
Showing 1 changed file with 22 additions and 8 deletions.
30 changes: 22 additions & 8 deletions drivers/infiniband/hw/mlx5/odp.c
Original file line number Diff line number Diff line change
Expand Up @@ -228,13 +228,27 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
struct mlx5_ib_mr *imr = mr->parent;

/*
* If userspace is racing freeing the parent implicit ODP MR then we can
* loose the race with parent destruction. In this case
* mlx5_ib_free_odp_mr() will free everything in the implicit_children
* xarray so NOP is fine. This child MR cannot be destroyed here because
* we are under its umem_mutex.
*/
if (!refcount_inc_not_zero(&imr->mmkey.usecount))
return;

xa_erase(&imr->implicit_children, idx);
xa_lock(&imr->implicit_children);
if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_KERNEL) !=
mr) {
xa_unlock(&imr->implicit_children);
return;
}

if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault))
xa_erase(&mr_to_mdev(mr)->odp_mkeys,
mlx5_base_mkey(mr->mmkey.key));
__xa_erase(&mr_to_mdev(mr)->odp_mkeys,
mlx5_base_mkey(mr->mmkey.key));
xa_unlock(&imr->implicit_children);

/* Freeing a MR is a sleeping operation, so bounce to a work queue */
INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work);
Expand Down Expand Up @@ -502,18 +516,18 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
refcount_inc(&ret->mmkey.usecount);
goto out_lock;
}
xa_unlock(&imr->implicit_children);

if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) {
ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
&mr->mmkey, GFP_KERNEL);
ret = __xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
&mr->mmkey, GFP_KERNEL);
if (xa_is_err(ret)) {
ret = ERR_PTR(xa_err(ret));
xa_erase(&imr->implicit_children, idx);
goto out_mr;
__xa_erase(&imr->implicit_children, idx);
goto out_lock;
}
mr->mmkey.type = MLX5_MKEY_IMPLICIT_CHILD;
}
xa_unlock(&imr->implicit_children);
mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr);
return mr;

Expand Down

0 comments on commit d3d9304

Please sign in to comment.