Skip to content

Commit

Permalink
prov/efa: Register user recv buffer for zero-copy receive mode
Browse files Browse the repository at this point in the history
When user doesn't register the recv buffer, libfabric should
try to register it so it can be posted to device for receive.
If the registration failed, an EAGAIN will be returned.

Signed-off-by: Shi Jin <sjina@amazon.com>
  • Loading branch information
shijin-aws committed Jul 31, 2024
1 parent 9387629 commit 881ec72
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 15 deletions.
25 changes: 21 additions & 4 deletions prov/efa/src/rdm/efa_rdm_ep_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ struct efa_rdm_ope *efa_rdm_ep_alloc_rxe(struct efa_rdm_ep *ep, fi_addr_t addr,
*/
int efa_rdm_ep_post_user_recv_buf(struct efa_rdm_ep *ep, struct efa_rdm_ope *rxe, size_t flags)
{
struct efa_rdm_pke *pkt_entry;
struct efa_rdm_pke *pkt_entry = NULL;
size_t rx_iov_offset = 0;
int err, rx_iov_index = 0;

Expand All @@ -225,8 +225,21 @@ int efa_rdm_ep_post_user_recv_buf(struct efa_rdm_ep *ep, struct efa_rdm_ope *rxe
err = ofi_iov_locate(rxe->iov, rxe->iov_count, ep->msg_prefix_size, &rx_iov_index, &rx_iov_offset);
if (OFI_UNLIKELY(err)) {
EFA_WARN(FI_LOG_CQ, "ofi_iov_locate failure: %s (%d)\n", fi_strerror(-err), -err);
return err;
goto err_free;
}

efa_rdm_ope_try_fill_desc(rxe, rx_iov_index, FI_RECV);
if (!rxe->desc[rx_iov_index]) {
/* efa_rdm_ope_try_fill_desc() did not fill the desc,
* which means memory registration failed.
* return -FI_EAGAIN here will cause user to run progress
* engine, which will cause some memory registration
* in MR cache to be released.
*/
err = -FI_EAGAIN;
goto err_free;
}

assert(rx_iov_index < rxe->iov_count);
assert(rx_iov_offset < rxe->iov[rx_iov_index].iov_len);

Expand All @@ -236,18 +249,22 @@ int efa_rdm_ep_post_user_recv_buf(struct efa_rdm_ep *ep, struct efa_rdm_ope *rxe

err = efa_rdm_pke_recvv(&pkt_entry, 1);
if (OFI_UNLIKELY(err)) {
efa_rdm_pke_release_rx(pkt_entry);
EFA_WARN(FI_LOG_EP_CTRL,
"failed to post user supplied buffer %d (%s)\n", -err,
fi_strerror(-err));
return err;
goto err_free;
}

#if ENABLE_DEBUG
dlist_insert_tail(&pkt_entry->dbg_entry, &ep->rx_posted_buf_list);
#endif
ep->user_rx_pkts_posted++;
return 0;

err_free:
if (pkt_entry)
efa_rdm_pke_release_rx(pkt_entry);
return err;
}


Expand Down
14 changes: 3 additions & 11 deletions prov/efa/test/efa_unit_test_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -1023,7 +1023,7 @@ void test_efa_rdm_ep_zcpy_recv_cancel(struct efa_resource **state)
{
struct efa_resource *resource = *state;
struct fi_context cancel_context = {0};
struct efa_unit_test_buff recv_buff;
char recv_buff[16];

resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM);
assert_non_null(resource->hints);
Expand All @@ -1035,17 +1035,9 @@ void test_efa_rdm_ep_zcpy_recv_cancel(struct efa_resource **state)
/* enable zero-copy recv mode in ep */
test_efa_rdm_ep_use_zcpy_rx_impl(resource, true);

/* Construct a recv buffer with mr */
efa_unit_test_buff_construct(&recv_buff, resource, 16);

assert_int_equal(fi_recv(resource->ep, recv_buff.buff, recv_buff.size, fi_mr_desc(recv_buff.mr), FI_ADDR_UNSPEC, &cancel_context), 0);
/* fi_recv should work with a recv buffer with NULL desc */
assert_int_equal(fi_recv(resource->ep, recv_buff, 16, NULL, FI_ADDR_UNSPEC, &cancel_context), 0);

assert_int_equal(fi_cancel((struct fid *)resource->ep, &cancel_context), -FI_EOPNOTSUPP);

/**
* the buf is still posted to rdma-core, so unregistering mr can
* return non-zero. Currently ignore this failure.
*/
(void) fi_close(&recv_buff.mr->fid);
free(recv_buff.buff);
}

0 comments on commit 881ec72

Please sign in to comment.