Skip to content

Commit 5909ce5

Browse files
committed
IB/uverbs: Lock SRQ / CQ / PD objects in a consistent order
Since XRC support was added, the uverbs code has locked SRQ, CQ and PD objects needed during QP and SRQ creation in different orders depending on the the code path. This leads to the (at least theoretical) possibility of deadlock, and triggers the lockdep splat below. Fix this by making sure we always lock the SRQ first, then CQs and finally the PD. ====================================================== [ INFO: possible circular locking dependency detected ] 3.4.0-rc5+ #34 Not tainted ------------------------------------------------------- ibv_srq_pingpon/2484 is trying to acquire lock: (SRQ-uobj){+++++.}, at: [<ffffffffa00af51b>] idr_read_uobj+0x2f/0x4d [ib_uverbs] but task is already holding lock: (CQ-uobj){+++++.}, at: [<ffffffffa00af51b>] idr_read_uobj+0x2f/0x4d [ib_uverbs] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (CQ-uobj){+++++.}: [<ffffffff81070fd0>] lock_acquire+0xbf/0xfe [<ffffffff81384f28>] down_read+0x34/0x43 [<ffffffffa00af51b>] idr_read_uobj+0x2f/0x4d [ib_uverbs] [<ffffffffa00af542>] idr_read_obj+0x9/0x19 [ib_uverbs] [<ffffffffa00b16c3>] ib_uverbs_create_qp+0x180/0x684 [ib_uverbs] [<ffffffffa00ae3dd>] ib_uverbs_write+0xb7/0xc2 [ib_uverbs] [<ffffffff810fe47f>] vfs_write+0xa7/0xee [<ffffffff810fe65f>] sys_write+0x45/0x69 [<ffffffff8138cdf9>] system_call_fastpath+0x16/0x1b -> #1 (PD-uobj){++++++}: [<ffffffff81070fd0>] lock_acquire+0xbf/0xfe [<ffffffff81384f28>] down_read+0x34/0x43 [<ffffffffa00af51b>] idr_read_uobj+0x2f/0x4d [ib_uverbs] [<ffffffffa00af542>] idr_read_obj+0x9/0x19 [ib_uverbs] [<ffffffffa00af8ad>] __uverbs_create_xsrq+0x96/0x386 [ib_uverbs] [<ffffffffa00b31b9>] ib_uverbs_detach_mcast+0x1cd/0x1e6 [ib_uverbs] [<ffffffffa00ae3dd>] ib_uverbs_write+0xb7/0xc2 [ib_uverbs] [<ffffffff810fe47f>] vfs_write+0xa7/0xee [<ffffffff810fe65f>] sys_write+0x45/0x69 [<ffffffff8138cdf9>] system_call_fastpath+0x16/0x1b -> #0 (SRQ-uobj){+++++.}: [<ffffffff81070898>] __lock_acquire+0xa29/0xd06 [<ffffffff81070fd0>] lock_acquire+0xbf/0xfe [<ffffffff81384f28>] down_read+0x34/0x43 [<ffffffffa00af51b>] idr_read_uobj+0x2f/0x4d [ib_uverbs] [<ffffffffa00af542>] idr_read_obj+0x9/0x19 [ib_uverbs] [<ffffffffa00b1728>] ib_uverbs_create_qp+0x1e5/0x684 [ib_uverbs] [<ffffffffa00ae3dd>] ib_uverbs_write+0xb7/0xc2 [ib_uverbs] [<ffffffff810fe47f>] vfs_write+0xa7/0xee [<ffffffff810fe65f>] sys_write+0x45/0x69 [<ffffffff8138cdf9>] system_call_fastpath+0x16/0x1b other info that might help us debug this: Chain exists of: SRQ-uobj --> PD-uobj --> CQ-uobj Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(CQ-uobj); lock(PD-uobj); lock(CQ-uobj); lock(SRQ-uobj); *** DEADLOCK *** 3 locks held by ibv_srq_pingpon/2484: #0: (QP-uobj){+.+...}, at: [<ffffffffa00b162c>] ib_uverbs_create_qp+0xe9/0x684 [ib_uverbs] #1: (PD-uobj){++++++}, at: [<ffffffffa00af51b>] idr_read_uobj+0x2f/0x4d [ib_uverbs] #2: (CQ-uobj){+++++.}, at: [<ffffffffa00af51b>] idr_read_uobj+0x2f/0x4d [ib_uverbs] stack backtrace: Pid: 2484, comm: ibv_srq_pingpon Not tainted 3.4.0-rc5+ #34 Call Trace: [<ffffffff8137eff0>] print_circular_bug+0x1f8/0x209 [<ffffffff81070898>] __lock_acquire+0xa29/0xd06 [<ffffffffa00af37c>] ? __idr_get_uobj+0x20/0x5e [ib_uverbs] [<ffffffffa00af51b>] ? idr_read_uobj+0x2f/0x4d [ib_uverbs] [<ffffffff81070fd0>] lock_acquire+0xbf/0xfe [<ffffffffa00af51b>] ? idr_read_uobj+0x2f/0x4d [ib_uverbs] [<ffffffff81070eee>] ? lock_release+0x166/0x189 [<ffffffff81384f28>] down_read+0x34/0x43 [<ffffffffa00af51b>] ? idr_read_uobj+0x2f/0x4d [ib_uverbs] [<ffffffffa00af51b>] idr_read_uobj+0x2f/0x4d [ib_uverbs] [<ffffffffa00af542>] idr_read_obj+0x9/0x19 [ib_uverbs] [<ffffffffa00b1728>] ib_uverbs_create_qp+0x1e5/0x684 [ib_uverbs] [<ffffffff81070fec>] ? lock_acquire+0xdb/0xfe [<ffffffff81070c09>] ? lock_release_non_nested+0x94/0x213 [<ffffffff810d470f>] ? might_fault+0x40/0x90 [<ffffffff810d470f>] ? might_fault+0x40/0x90 [<ffffffffa00ae3dd>] ib_uverbs_write+0xb7/0xc2 [ib_uverbs] [<ffffffff810fe47f>] vfs_write+0xa7/0xee [<ffffffff810ff736>] ? fget_light+0x3b/0x99 [<ffffffff810fe65f>] sys_write+0x45/0x69 [<ffffffff8138cdf9>] system_call_fastpath+0x16/0x1b Reported-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
1 parent 3bea57a commit 5909ce5

File tree

1 file changed

+35
-31
lines changed

1 file changed

+35
-31
lines changed

drivers/infiniband/core/uverbs_cmd.c

Lines changed: 35 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1423,13 +1423,6 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
14231423
}
14241424
device = xrcd->device;
14251425
} else {
1426-
pd = idr_read_pd(cmd.pd_handle, file->ucontext);
1427-
scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
1428-
if (!pd || !scq) {
1429-
ret = -EINVAL;
1430-
goto err_put;
1431-
}
1432-
14331426
if (cmd.qp_type == IB_QPT_XRC_INI) {
14341427
cmd.max_recv_wr = cmd.max_recv_sge = 0;
14351428
} else {
@@ -1440,13 +1433,24 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
14401433
goto err_put;
14411434
}
14421435
}
1443-
rcq = (cmd.recv_cq_handle == cmd.send_cq_handle) ?
1444-
scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
1445-
if (!rcq) {
1446-
ret = -EINVAL;
1447-
goto err_put;
1436+
1437+
if (cmd.recv_cq_handle != cmd.send_cq_handle) {
1438+
rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0);
1439+
if (!rcq) {
1440+
ret = -EINVAL;
1441+
goto err_put;
1442+
}
14481443
}
14491444
}
1445+
1446+
scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq);
1447+
rcq = rcq ?: scq;
1448+
pd = idr_read_pd(cmd.pd_handle, file->ucontext);
1449+
if (!pd || !scq) {
1450+
ret = -EINVAL;
1451+
goto err_put;
1452+
}
1453+
14501454
device = pd->device;
14511455
}
14521456

@@ -2484,27 +2488,27 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
24842488
init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class);
24852489
down_write(&obj->uevent.uobject.mutex);
24862490

2487-
pd = idr_read_pd(cmd->pd_handle, file->ucontext);
2488-
if (!pd) {
2489-
ret = -EINVAL;
2490-
goto err;
2491-
}
2492-
24932491
if (cmd->srq_type == IB_SRQT_XRC) {
2494-
attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
2495-
if (!attr.ext.xrc.cq) {
2496-
ret = -EINVAL;
2497-
goto err_put_pd;
2498-
}
2499-
25002492
attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj);
25012493
if (!attr.ext.xrc.xrcd) {
25022494
ret = -EINVAL;
2503-
goto err_put_cq;
2495+
goto err;
25042496
}
25052497

25062498
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
25072499
atomic_inc(&obj->uxrcd->refcnt);
2500+
2501+
attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
2502+
if (!attr.ext.xrc.cq) {
2503+
ret = -EINVAL;
2504+
goto err_put_xrcd;
2505+
}
2506+
}
2507+
2508+
pd = idr_read_pd(cmd->pd_handle, file->ucontext);
2509+
if (!pd) {
2510+
ret = -EINVAL;
2511+
goto err_put_cq;
25082512
}
25092513

25102514
attr.event_handler = ib_uverbs_srq_event_handler;
@@ -2581,17 +2585,17 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
25812585
ib_destroy_srq(srq);
25822586

25832587
err_put:
2584-
if (cmd->srq_type == IB_SRQT_XRC) {
2585-
atomic_dec(&obj->uxrcd->refcnt);
2586-
put_uobj_read(xrcd_uobj);
2587-
}
2588+
put_pd_read(pd);
25882589

25892590
err_put_cq:
25902591
if (cmd->srq_type == IB_SRQT_XRC)
25912592
put_cq_read(attr.ext.xrc.cq);
25922593

2593-
err_put_pd:
2594-
put_pd_read(pd);
2594+
err_put_xrcd:
2595+
if (cmd->srq_type == IB_SRQT_XRC) {
2596+
atomic_dec(&obj->uxrcd->refcnt);
2597+
put_uobj_read(xrcd_uobj);
2598+
}
25952599

25962600
err:
25972601
put_uobj_write(&obj->uevent.uobject);

0 commit comments

Comments
 (0)