Skip to content

Commit

Permalink
svcrdma: Revert "svcrdma: Reduce Receive doorbell rate"
Browse files Browse the repository at this point in the history
I tested commit 43042b9 ("svcrdma: Reduce Receive doorbell
rate") with mlx4 (IB) and software iWARP and didn't find any
issues. However, I recently got my hardware iWARP setup back on
line (FastLinQ) and it's crashing hard on this commit (confirmed
via bisect).

The failure mode is complex.
 - After a connection is established, the first Receive completes
   normally.
 - But the second and third Receives have garbage in their Receive
   buffers. The server responds with ERR_VERS as a result.
 - When the client tears down the connection to retry, a couple
   of posted Receives flush twice, and that corrupts the recv_ctxt
   free list.
 - __svc_rdma_free then faults or loops infinitely while destroying
   the xprt's recv_ctxts.

Since 43042b9 ("svcrdma: Reduce Receive doorbell rate") does
not fix a bug but is a scalability enhancement, it's safe and
appropriate to revert it while working on a replacement.

Fixes: 43042b9 ("svcrdma: Reduce Receive doorbell rate")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
  • Loading branch information
chucklever committed Mar 11, 2021
1 parent b4250dd commit bade4be
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 44 deletions.
1 change: 0 additions & 1 deletion include/linux/sunrpc/svc_rdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ struct svcxprt_rdma {

wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */
unsigned long sc_flags;
u32 sc_pending_recvs;
struct list_head sc_read_complete_q;
struct work_struct sc_work;

Expand Down
82 changes: 39 additions & 43 deletions net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
Original file line number Diff line number Diff line change
Expand Up @@ -266,46 +266,33 @@ void svc_rdma_release_rqst(struct svc_rqst *rqstp)
svc_rdma_recv_ctxt_put(rdma, ctxt);
}

static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
unsigned int wanted, bool temp)
static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt)
{
const struct ib_recv_wr *bad_wr = NULL;
struct svc_rdma_recv_ctxt *ctxt;
struct ib_recv_wr *recv_chain;
int ret;

recv_chain = NULL;
while (wanted--) {
ctxt = svc_rdma_recv_ctxt_get(rdma);
if (!ctxt)
break;

trace_svcrdma_post_recv(ctxt);
ctxt->rc_temp = temp;
ctxt->rc_recv_wr.next = recv_chain;
recv_chain = &ctxt->rc_recv_wr;
rdma->sc_pending_recvs++;
}
if (!recv_chain)
return false;

ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr);
trace_svcrdma_post_recv(ctxt);
ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL);
if (ret)
goto err_post;
return true;
return 0;

err_post:
while (bad_wr) {
ctxt = container_of(bad_wr, struct svc_rdma_recv_ctxt,
rc_recv_wr);
bad_wr = bad_wr->next;
svc_rdma_recv_ctxt_put(rdma, ctxt);
}

trace_svcrdma_rq_post_err(rdma, ret);
/* Since we're destroying the xprt, no need to reset
* sc_pending_recvs. */
return false;
svc_rdma_recv_ctxt_put(rdma, ctxt);
return ret;
}

static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
{
struct svc_rdma_recv_ctxt *ctxt;

if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
return 0;
ctxt = svc_rdma_recv_ctxt_get(rdma);
if (!ctxt)
return -ENOMEM;
return __svc_rdma_post_recv(rdma, ctxt);
}

/**
Expand All @@ -316,30 +303,46 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
*/
bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
{
return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true);
struct svc_rdma_recv_ctxt *ctxt;
unsigned int i;
int ret;

for (i = 0; i < rdma->sc_max_requests; i++) {
ctxt = svc_rdma_recv_ctxt_get(rdma);
if (!ctxt)
return false;
ctxt->rc_temp = true;
ret = __svc_rdma_post_recv(rdma, ctxt);
if (ret)
return false;
}
return true;
}

/**
* svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
* @cq: Completion Queue context
* @wc: Work Completion object
*
* NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that
* the Receive completion handler could be running.
*/
static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
{
struct svcxprt_rdma *rdma = cq->cq_context;
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_recv_ctxt *ctxt;

rdma->sc_pending_recvs--;

/* WARNING: Only wc->wr_cqe and wc->status are reliable */
ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe);

trace_svcrdma_wc_receive(wc, &ctxt->rc_cid);
if (wc->status != IB_WC_SUCCESS)
goto flushed;

if (svc_rdma_post_recv(rdma))
goto post_err;

/* All wc fields are now known to be valid */
ctxt->rc_byte_len = wc->byte_len;

Expand All @@ -350,18 +353,11 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
spin_unlock(&rdma->sc_rq_dto_lock);
if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
svc_xprt_enqueue(&rdma->sc_xprt);

if (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags) &&
rdma->sc_pending_recvs < rdma->sc_max_requests)
if (!svc_rdma_refresh_recvs(rdma, RPCRDMA_MAX_RECV_BATCH,
false))
goto post_err;

return;

flushed:
svc_rdma_recv_ctxt_put(rdma, ctxt);
post_err:
svc_rdma_recv_ctxt_put(rdma, ctxt);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_xprt_enqueue(&rdma->sc_xprt);
}
Expand Down

0 comments on commit bade4be

Please sign in to comment.