diff --git a/prov/efa/src/dgram/efa_dgram_ep.h b/prov/efa/src/dgram/efa_dgram_ep.h index 5e30fd7a05b..3b0b803c7cc 100644 --- a/prov/efa/src/dgram/efa_dgram_ep.h +++ b/prov/efa/src/dgram/efa_dgram_ep.h @@ -58,19 +58,6 @@ struct efa_send_wr { struct ibv_sge sge[2]; }; -struct efa_recv_wr { - /** @brief Work request struct used by rdma-core */ - struct ibv_recv_wr wr; - - /** @brief Scatter gather element array - * - * @details - * EFA device supports a maximum of 2 iov/SGE - * For receive, we only use 1 SGE - */ - struct ibv_sge sge[1]; -}; - int efa_dgram_ep_open(struct fid_domain *domain_fid, struct fi_info *info, struct fid_ep **ep_fid, void *context); diff --git a/prov/efa/src/efa_base_ep.c b/prov/efa/src/efa_base_ep.c index 6c395d71feb..f81534df93b 100644 --- a/prov/efa/src/efa_base_ep.c +++ b/prov/efa/src/efa_base_ep.c @@ -100,6 +100,9 @@ int efa_base_ep_destruct(struct efa_base_ep *base_ep) base_ep->util_ep_initialized = false; } + if (base_ep->efa_recv_wr_vec) + free(base_ep->efa_recv_wr_vec); + return err; } @@ -299,6 +302,11 @@ int efa_base_ep_construct(struct efa_base_ep *base_ep, base_ep->xmit_more_wr_tail = &base_ep->xmit_more_wr_head; base_ep->recv_more_wr_tail = &base_ep->recv_more_wr_head; + base_ep->efa_recv_wr_vec = calloc(sizeof(struct efa_recv_wr), EFA_RDM_EP_MAX_WR_PER_IBV_POST_RECV); + if (!base_ep->efa_recv_wr_vec) { + EFA_WARN(FI_LOG_EP_CTRL, "cannot alloc memory for base_ep->efa_recv_wr_vec!\n"); + return -FI_ENOMEM; + } base_ep->efa_qp_enabled = false; return 0; } diff --git a/prov/efa/src/efa_base_ep.h b/prov/efa/src/efa_base_ep.h index e1d69bc00f4..f5bf1e45616 100644 --- a/prov/efa/src/efa_base_ep.h +++ b/prov/efa/src/efa_base_ep.h @@ -51,6 +51,19 @@ struct efa_qp { struct efa_av; +struct efa_recv_wr { + /** @brief Work request struct used by rdma-core */ + struct ibv_recv_wr wr; + + /** @brief Scatter gather element array + * + * @details + * EFA device supports a maximum of 2 iov/SGE + * For receive, we only use 1 SGE + */ + struct ibv_sge sge[1]; +}; + struct efa_base_ep { struct util_ep util_ep; struct efa_domain *domain; @@ -68,6 +81,7 @@ struct efa_base_ep { struct ibv_send_wr *xmit_more_wr_tail; struct ibv_recv_wr recv_more_wr_head; struct ibv_recv_wr *recv_more_wr_tail; + struct efa_recv_wr *efa_recv_wr_vec; }; int efa_base_ep_bind_av(struct efa_base_ep *base_ep, struct efa_av *av); diff --git a/prov/efa/src/rdm/efa_rdm_ep.h b/prov/efa/src/rdm/efa_rdm_ep.h index 6aa9328010f..2ecd9f51c06 100644 --- a/prov/efa/src/rdm/efa_rdm_ep.h +++ b/prov/efa/src/rdm/efa_rdm_ep.h @@ -232,6 +232,7 @@ struct efa_rdm_ep { bool sendrecv_in_order_aligned_128_bytes; /**< whether to support in order send/recv of each aligned 128 bytes memory region */ bool write_in_order_aligned_128_bytes; /**< whether to support in order write of each aligned 128 bytes memory region */ char err_msg[EFA_RDM_ERROR_MSG_BUFFER_LENGTH]; /* A large enough buffer to store CQ/EQ error data used by e.g. fi_cq_readerr */ + struct efa_rdm_pke **pke_vec; }; int efa_rdm_ep_flush_queued_blocking_copy_to_hmem(struct efa_rdm_ep *ep); diff --git a/prov/efa/src/rdm/efa_rdm_ep_fiops.c b/prov/efa/src/rdm/efa_rdm_ep_fiops.c index 2d6f35f26e9..20adbc50d1b 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_fiops.c +++ b/prov/efa/src/rdm/efa_rdm_ep_fiops.c @@ -543,6 +543,13 @@ int efa_rdm_ep_open(struct fid_domain *domain, struct fi_info *info, if (ret) goto err_close_core_cq; + efa_rdm_ep->pke_vec = calloc(sizeof(struct efa_rdm_pke *), EFA_RDM_EP_MAX_WR_PER_IBV_POST_RECV); + if (!efa_rdm_ep->pke_vec) { + EFA_WARN(FI_LOG_EP_CTRL, "cannot alloc memory for efa_rdm_ep->pke_vec!\n"); + ret = -FI_ENOMEM; + goto err_close_core_cq; + } + *ep = &efa_rdm_ep->base_ep.util_ep.ep_fid; (*ep)->msg = &efa_rdm_msg_ops; (*ep)->rma = &efa_rdm_rma_ops; @@ -845,6 +852,9 @@ static int efa_rdm_ep_close(struct fid *fid) efa_rdm_ep->peer_srx_ep = NULL; } efa_rdm_ep_destroy_buffer_pools(efa_rdm_ep); + + if (efa_rdm_ep->pke_vec) + free(efa_rdm_ep->pke_vec); free(efa_rdm_ep); return retv; } diff --git a/prov/efa/src/rdm/efa_rdm_ep_progress.c b/prov/efa/src/rdm/efa_rdm_ep_progress.c index 06b981d9e4e..a592d3af222 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_progress.c +++ b/prov/efa/src/rdm/efa_rdm_ep_progress.c @@ -54,7 +54,6 @@ */ int efa_rdm_ep_bulk_post_internal_rx_pkts(struct efa_rdm_ep *ep) { - struct efa_rdm_pke *pke_vec[EFA_RDM_EP_MAX_WR_PER_IBV_POST_RECV]; int i, err; if (ep->efa_rx_pkts_to_post == 0) @@ -62,15 +61,15 @@ int efa_rdm_ep_bulk_post_internal_rx_pkts(struct efa_rdm_ep *ep) assert(ep->efa_rx_pkts_to_post + ep->efa_rx_pkts_posted <= ep->efa_max_outstanding_rx_ops); for (i = 0; i < ep->efa_rx_pkts_to_post; ++i) { - pke_vec[i] = efa_rdm_pke_alloc(ep, ep->efa_rx_pkt_pool, + ep->pke_vec[i] = efa_rdm_pke_alloc(ep, ep->efa_rx_pkt_pool, EFA_RDM_PKE_FROM_EFA_RX_POOL); - assert(pke_vec[i]); + assert(ep->pke_vec[i]); } - err = efa_rdm_pke_recvv(pke_vec, ep->efa_rx_pkts_to_post); + err = efa_rdm_pke_recvv(ep->pke_vec, ep->efa_rx_pkts_to_post); if (OFI_UNLIKELY(err)) { for (i = 0; i < ep->efa_rx_pkts_to_post; ++i) - efa_rdm_pke_release_rx(pke_vec[i]); + efa_rdm_pke_release_rx(ep->pke_vec[i]); EFA_WARN(FI_LOG_EP_CTRL, "failed to post buf %d (%s)\n", -err, @@ -80,7 +79,7 @@ int efa_rdm_ep_bulk_post_internal_rx_pkts(struct efa_rdm_ep *ep) #if ENABLE_DEBUG for (i = 0; i < ep->efa_rx_pkts_to_post; ++i) { - dlist_insert_tail(&pke_vec[i]->dbg_entry, + dlist_insert_tail(&ep->pke_vec[i]->dbg_entry, &ep->rx_posted_buf_list); } #endif diff --git a/prov/efa/src/rdm/efa_rdm_pke.c b/prov/efa/src/rdm/efa_rdm_pke.c index 96ad3a41f2b..dddc7e337b2 100644 --- a/prov/efa/src/rdm/efa_rdm_pke.c +++ b/prov/efa/src/rdm/efa_rdm_pke.c @@ -633,8 +633,7 @@ ssize_t efa_rdm_pke_recvv(struct efa_rdm_pke **pke_vec, int pke_cnt) { struct efa_rdm_ep *ep; - struct ibv_recv_wr recv_wr_vec[EFA_RDM_EP_MAX_WR_PER_IBV_POST_RECV], *bad_wr; - struct ibv_sge sge_vec[EFA_RDM_EP_MAX_WR_PER_IBV_POST_RECV]; + struct ibv_recv_wr *bad_wr; int i, err; assert(pke_cnt); @@ -643,22 +642,22 @@ ssize_t efa_rdm_pke_recvv(struct efa_rdm_pke **pke_vec, assert(ep); for (i = 0; i < pke_cnt; ++i) { - recv_wr_vec[i].wr_id = (uintptr_t)pke_vec[i]; - recv_wr_vec[i].num_sge = 1; /* Always post one iov/SGE */ - recv_wr_vec[i].sg_list = &sge_vec[i]; + ep->base_ep.efa_recv_wr_vec[i].wr.wr_id = (uintptr_t)pke_vec[i]; + ep->base_ep.efa_recv_wr_vec[i].wr.num_sge = 1; /* Always post one iov/SGE */ + ep->base_ep.efa_recv_wr_vec[i].wr.sg_list = ep->base_ep.efa_recv_wr_vec[i].sge; assert(pke_vec[i]->pkt_size > 0); - recv_wr_vec[i].sg_list[0].length = pke_vec[i]->pkt_size; - recv_wr_vec[i].sg_list[0].lkey = ((struct efa_mr *) pke_vec[i]->mr)->ibv_mr->lkey; - recv_wr_vec[i].sg_list[0].addr = (uintptr_t)pke_vec[i]->wiredata; - recv_wr_vec[i].next = NULL; + ep->base_ep.efa_recv_wr_vec[i].wr.sg_list[0].length = pke_vec[i]->pkt_size; + ep->base_ep.efa_recv_wr_vec[i].wr.sg_list[0].lkey = ((struct efa_mr *) pke_vec[i]->mr)->ibv_mr->lkey; + ep->base_ep.efa_recv_wr_vec[i].wr.sg_list[0].addr = (uintptr_t)pke_vec[i]->wiredata; + ep->base_ep.efa_recv_wr_vec[i].wr.next = NULL; if (i > 0) - recv_wr_vec[i-1].next = &recv_wr_vec[i]; + ep->base_ep.efa_recv_wr_vec[i-1].wr.next = &ep->base_ep.efa_recv_wr_vec[i].wr; #if HAVE_LTTNG efa_tracepoint_wr_id_post_recv(pke_vec[i]); #endif } - err = ibv_post_recv(ep->base_ep.qp->ibv_qp, &recv_wr_vec[0], &bad_wr); + err = ibv_post_recv(ep->base_ep.qp->ibv_qp, &ep->base_ep.efa_recv_wr_vec[0].wr, &bad_wr); if (OFI_UNLIKELY(err)) { err = (err == ENOMEM) ? -FI_EAGAIN : -err; }