Skip to content

Commit

Permalink
[v1.19.x] prov/efa: allocate pke_vec, recv_wr_vec, sge_vec from heap
Browse files Browse the repository at this point in the history
Currently, efa_rdm_pke_recvv and efa_rdm_ep_bulk_post_internal_rx_pkts
allocate pke_vec, recv_wr_vec, sge_vec from stack inside the function.
This patch moves these allocation to heap and only called once
during ep's creation.

Signed-off-by: Shi Jin <sjina@amazon.com>
(cherry picked from commit d81247e)
  • Loading branch information
shijin-aws committed Oct 23, 2023
1 parent 6759a1a commit 1ed26e0
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 30 deletions.
13 changes: 0 additions & 13 deletions prov/efa/src/dgram/efa_dgram_ep.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,6 @@ struct efa_send_wr {
struct ibv_sge sge[2];
};

struct efa_recv_wr {
/** @brief Work request struct used by rdma-core */
struct ibv_recv_wr wr;

/** @brief Scatter gather element array
*
* @details
* EFA device supports a maximum of 2 iov/SGE
* For receive, we only use 1 SGE
*/
struct ibv_sge sge[1];
};


int efa_dgram_ep_open(struct fid_domain *domain_fid, struct fi_info *info,
struct fid_ep **ep_fid, void *context);
Expand Down
8 changes: 8 additions & 0 deletions prov/efa/src/efa_base_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ int efa_base_ep_destruct(struct efa_base_ep *base_ep)
base_ep->util_ep_initialized = false;
}

if (base_ep->efa_recv_wr_vec)
free(base_ep->efa_recv_wr_vec);

return err;
}

Expand Down Expand Up @@ -299,6 +302,11 @@ int efa_base_ep_construct(struct efa_base_ep *base_ep,

base_ep->xmit_more_wr_tail = &base_ep->xmit_more_wr_head;
base_ep->recv_more_wr_tail = &base_ep->recv_more_wr_head;
base_ep->efa_recv_wr_vec = calloc(sizeof(struct efa_recv_wr), EFA_RDM_EP_MAX_WR_PER_IBV_POST_RECV);
if (!base_ep->efa_recv_wr_vec) {
EFA_WARN(FI_LOG_EP_CTRL, "cannot alloc memory for base_ep->efa_recv_wr_vec!\n");
return -FI_ENOMEM;
}
base_ep->efa_qp_enabled = false;
return 0;
}
Expand Down
14 changes: 14 additions & 0 deletions prov/efa/src/efa_base_ep.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,19 @@ struct efa_qp {

struct efa_av;

struct efa_recv_wr {
/** @brief Work request struct used by rdma-core */
struct ibv_recv_wr wr;

/** @brief Scatter gather element array
*
* @details
* EFA device supports a maximum of 2 iov/SGE
* For receive, we only use 1 SGE
*/
struct ibv_sge sge[1];
};

struct efa_base_ep {
struct util_ep util_ep;
struct efa_domain *domain;
Expand All @@ -68,6 +81,7 @@ struct efa_base_ep {
struct ibv_send_wr *xmit_more_wr_tail;
struct ibv_recv_wr recv_more_wr_head;
struct ibv_recv_wr *recv_more_wr_tail;
struct efa_recv_wr *efa_recv_wr_vec;
};

int efa_base_ep_bind_av(struct efa_base_ep *base_ep, struct efa_av *av);
Expand Down
1 change: 1 addition & 0 deletions prov/efa/src/rdm/efa_rdm_ep.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ struct efa_rdm_ep {
bool sendrecv_in_order_aligned_128_bytes; /**< whether to support in order send/recv of each aligned 128 bytes memory region */
bool write_in_order_aligned_128_bytes; /**< whether to support in order write of each aligned 128 bytes memory region */
char err_msg[EFA_RDM_ERROR_MSG_BUFFER_LENGTH]; /* A large enough buffer to store CQ/EQ error data used by e.g. fi_cq_readerr */
struct efa_rdm_pke **pke_vec;
};

int efa_rdm_ep_flush_queued_blocking_copy_to_hmem(struct efa_rdm_ep *ep);
Expand Down
10 changes: 10 additions & 0 deletions prov/efa/src/rdm/efa_rdm_ep_fiops.c
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,13 @@ int efa_rdm_ep_open(struct fid_domain *domain, struct fi_info *info,
if (ret)
goto err_close_core_cq;

efa_rdm_ep->pke_vec = calloc(sizeof(struct efa_rdm_pke *), EFA_RDM_EP_MAX_WR_PER_IBV_POST_RECV);
if (!efa_rdm_ep->pke_vec) {
EFA_WARN(FI_LOG_EP_CTRL, "cannot alloc memory for efa_rdm_ep->pke_vec!\n");
ret = -FI_ENOMEM;
goto err_close_core_cq;
}

*ep = &efa_rdm_ep->base_ep.util_ep.ep_fid;
(*ep)->msg = &efa_rdm_msg_ops;
(*ep)->rma = &efa_rdm_rma_ops;
Expand Down Expand Up @@ -845,6 +852,9 @@ static int efa_rdm_ep_close(struct fid *fid)
efa_rdm_ep->peer_srx_ep = NULL;
}
efa_rdm_ep_destroy_buffer_pools(efa_rdm_ep);

if (efa_rdm_ep->pke_vec)
free(efa_rdm_ep->pke_vec);
free(efa_rdm_ep);
return retv;
}
Expand Down
11 changes: 5 additions & 6 deletions prov/efa/src/rdm/efa_rdm_ep_progress.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,23 +54,22 @@
*/
int efa_rdm_ep_bulk_post_internal_rx_pkts(struct efa_rdm_ep *ep)
{
struct efa_rdm_pke *pke_vec[EFA_RDM_EP_MAX_WR_PER_IBV_POST_RECV];
int i, err;

if (ep->efa_rx_pkts_to_post == 0)
return 0;

assert(ep->efa_rx_pkts_to_post + ep->efa_rx_pkts_posted <= ep->efa_max_outstanding_rx_ops);
for (i = 0; i < ep->efa_rx_pkts_to_post; ++i) {
pke_vec[i] = efa_rdm_pke_alloc(ep, ep->efa_rx_pkt_pool,
ep->pke_vec[i] = efa_rdm_pke_alloc(ep, ep->efa_rx_pkt_pool,
EFA_RDM_PKE_FROM_EFA_RX_POOL);
assert(pke_vec[i]);
assert(ep->pke_vec[i]);
}

err = efa_rdm_pke_recvv(pke_vec, ep->efa_rx_pkts_to_post);
err = efa_rdm_pke_recvv(ep->pke_vec, ep->efa_rx_pkts_to_post);
if (OFI_UNLIKELY(err)) {
for (i = 0; i < ep->efa_rx_pkts_to_post; ++i)
efa_rdm_pke_release_rx(pke_vec[i]);
efa_rdm_pke_release_rx(ep->pke_vec[i]);

EFA_WARN(FI_LOG_EP_CTRL,
"failed to post buf %d (%s)\n", -err,
Expand All @@ -80,7 +79,7 @@ int efa_rdm_ep_bulk_post_internal_rx_pkts(struct efa_rdm_ep *ep)

#if ENABLE_DEBUG
for (i = 0; i < ep->efa_rx_pkts_to_post; ++i) {
dlist_insert_tail(&pke_vec[i]->dbg_entry,
dlist_insert_tail(&ep->pke_vec[i]->dbg_entry,
&ep->rx_posted_buf_list);
}
#endif
Expand Down
21 changes: 10 additions & 11 deletions prov/efa/src/rdm/efa_rdm_pke.c
Original file line number Diff line number Diff line change
Expand Up @@ -633,8 +633,7 @@ ssize_t efa_rdm_pke_recvv(struct efa_rdm_pke **pke_vec,
int pke_cnt)
{
struct efa_rdm_ep *ep;
struct ibv_recv_wr recv_wr_vec[EFA_RDM_EP_MAX_WR_PER_IBV_POST_RECV], *bad_wr;
struct ibv_sge sge_vec[EFA_RDM_EP_MAX_WR_PER_IBV_POST_RECV];
struct ibv_recv_wr *bad_wr;
int i, err;

assert(pke_cnt);
Expand All @@ -643,22 +642,22 @@ ssize_t efa_rdm_pke_recvv(struct efa_rdm_pke **pke_vec,
assert(ep);

for (i = 0; i < pke_cnt; ++i) {
recv_wr_vec[i].wr_id = (uintptr_t)pke_vec[i];
recv_wr_vec[i].num_sge = 1; /* Always post one iov/SGE */
recv_wr_vec[i].sg_list = &sge_vec[i];
ep->base_ep.efa_recv_wr_vec[i].wr.wr_id = (uintptr_t)pke_vec[i];
ep->base_ep.efa_recv_wr_vec[i].wr.num_sge = 1; /* Always post one iov/SGE */
ep->base_ep.efa_recv_wr_vec[i].wr.sg_list = ep->base_ep.efa_recv_wr_vec[i].sge;
assert(pke_vec[i]->pkt_size > 0);
recv_wr_vec[i].sg_list[0].length = pke_vec[i]->pkt_size;
recv_wr_vec[i].sg_list[0].lkey = ((struct efa_mr *) pke_vec[i]->mr)->ibv_mr->lkey;
recv_wr_vec[i].sg_list[0].addr = (uintptr_t)pke_vec[i]->wiredata;
recv_wr_vec[i].next = NULL;
ep->base_ep.efa_recv_wr_vec[i].wr.sg_list[0].length = pke_vec[i]->pkt_size;
ep->base_ep.efa_recv_wr_vec[i].wr.sg_list[0].lkey = ((struct efa_mr *) pke_vec[i]->mr)->ibv_mr->lkey;
ep->base_ep.efa_recv_wr_vec[i].wr.sg_list[0].addr = (uintptr_t)pke_vec[i]->wiredata;
ep->base_ep.efa_recv_wr_vec[i].wr.next = NULL;
if (i > 0)
recv_wr_vec[i-1].next = &recv_wr_vec[i];
ep->base_ep.efa_recv_wr_vec[i-1].wr.next = &ep->base_ep.efa_recv_wr_vec[i].wr;
#if HAVE_LTTNG
efa_tracepoint_wr_id_post_recv(pke_vec[i]);
#endif
}

err = ibv_post_recv(ep->base_ep.qp->ibv_qp, &recv_wr_vec[0], &bad_wr);
err = ibv_post_recv(ep->base_ep.qp->ibv_qp, &ep->base_ep.efa_recv_wr_vec[0].wr, &bad_wr);
if (OFI_UNLIKELY(err)) {
err = (err == ENOMEM) ? -FI_EAGAIN : -err;
}
Expand Down

0 comments on commit 1ed26e0

Please sign in to comment.