Skip to content

Commit 18a1e0e

Browse files
ohartoovsmb49
authored andcommitted
RDMA/mlx5: Fix unsafe xarray access in implicit ODP handling
BugLink: https://bugs.launchpad.net/bugs/2122072 [ Upstream commit 2c6b640ea08bff1a192bf87fa45246ff1e40767c ] __xa_store() and __xa_erase() were used without holding the proper lock, which led to a lockdep warning due to unsafe RCU usage. This patch replaces them with xa_store() and xa_erase(), which perform the necessary locking internally. ============================= WARNING: suspicious RCPU usage 6.14.0-rc7_for_upstream_debug_2025_03_18_15_01 #1 Not tainted ----------------------------- ./include/linux/xarray.h:1211 suspicious rcu_dereference_protected() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 3 locks held by kworker/u136:0/219: at: process_one_work+0xbe4/0x15f0 process_one_work+0x75c/0x15f0 pagefault_mr+0x9a5/0x1390 [mlx5_ib] stack backtrace: CPU: 14 UID: 0 PID: 219 Comm: kworker/u136:0 Not tainted 6.14.0-rc7_for_upstream_debug_2025_03_18_15_01 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5_ib_page_fault mlx5_ib_eqe_pf_action [mlx5_ib] Call Trace: dump_stack_lvl+0xa8/0xc0 lockdep_rcu_suspicious+0x1e6/0x260 xas_create+0xb8a/0xee0 xas_store+0x73/0x14c0 __xa_store+0x13c/0x220 ? xa_store_range+0x390/0x390 ? spin_bug+0x1d0/0x1d0 pagefault_mr+0xcb5/0x1390 [mlx5_ib] ? _raw_spin_unlock+0x1f/0x30 mlx5_ib_eqe_pf_action+0x3be/0x2620 [mlx5_ib] ? lockdep_hardirqs_on_prepare+0x400/0x400 ? mlx5_ib_invalidate_range+0xcb0/0xcb0 [mlx5_ib] process_one_work+0x7db/0x15f0 ? pwq_dec_nr_in_flight+0xda0/0xda0 ? assign_work+0x168/0x240 worker_thread+0x57d/0xcd0 ? rescuer_thread+0xc40/0xc40 kthread+0x3b3/0x800 ? kthread_is_per_cpu+0xb0/0xb0 ? lock_downgrade+0x680/0x680 ? do_raw_spin_lock+0x12d/0x270 ? spin_bug+0x1d0/0x1d0 ? finish_task_switch.isra.0+0x284/0x9e0 ? lockdep_hardirqs_on_prepare+0x284/0x400 ? kthread_is_per_cpu+0xb0/0xb0 ret_from_fork+0x2d/0x70 ? kthread_is_per_cpu+0xb0/0xb0 ret_from_fork_asm+0x11/0x20 Fixes: d3d9304 ("RDMA/mlx5: Fix implicit ODP use after free") Link: https://patch.msgid.link/r/a85ddd16f45c8cb2bc0a188c2b0fcedfce975eb8.1750061791.git.leon@kernel.org Signed-off-by: Or Har-Toov <ohartoov@nvidia.com> Reviewed-by: Patrisious Haddad <phaddad@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Noah Wager <noah.wager@canonical.com> Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
1 parent 22a487d commit 18a1e0e

File tree

1 file changed

+4
-4
lines changed
  • drivers/infiniband/hw/mlx5

1 file changed

+4
-4
lines changed

drivers/infiniband/hw/mlx5/odp.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -247,8 +247,8 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
247247
}
248248

249249
if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault))
250-
__xa_erase(&mr_to_mdev(mr)->odp_mkeys,
251-
mlx5_base_mkey(mr->mmkey.key));
250+
xa_erase(&mr_to_mdev(mr)->odp_mkeys,
251+
mlx5_base_mkey(mr->mmkey.key));
252252
xa_unlock(&imr->implicit_children);
253253

254254
/* Freeing a MR is a sleeping operation, so bounce to a work queue */
@@ -521,8 +521,8 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
521521
}
522522

523523
if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) {
524-
ret = __xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
525-
&mr->mmkey, GFP_KERNEL);
524+
ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
525+
&mr->mmkey, GFP_KERNEL);
526526
if (xa_is_err(ret)) {
527527
ret = ERR_PTR(xa_err(ret));
528528
__xa_erase(&imr->implicit_children, idx);

0 commit comments

Comments
 (0)