From b3523c1138c94cc1b362eb519044e2c74a5672ad Mon Sep 17 00:00:00 2001 From: Shi Jin Date: Sat, 21 Oct 2023 18:03:09 +0000 Subject: [PATCH] prov/efa: close shm resource when it is disabled in ep When shm is disabled during ep enable, we should shut down shm resources as they will never be used. This will reduce unnecessary cost when poking shm ep/cq/av in the later operations. Signed-off-by: Shi Jin --- prov/efa/src/efa_domain.c | 18 ++++- prov/efa/src/rdm/efa_rdm_atomic.c | 8 +- prov/efa/src/rdm/efa_rdm_cq.c | 13 ++- prov/efa/src/rdm/efa_rdm_ep.h | 1 - prov/efa/src/rdm/efa_rdm_ep_fiops.c | 121 ++++++++++++++++++++-------- prov/efa/src/rdm/efa_rdm_msg.c | 24 +++--- prov/efa/src/rdm/efa_rdm_rma.c | 14 ++-- prov/efa/test/efa_unit_test_cq.c | 2 +- prov/efa/test/efa_unit_test_ep.c | 4 +- prov/efa/test/efa_unit_test_rnr.c | 2 +- 10 files changed, 141 insertions(+), 66 deletions(-) diff --git a/prov/efa/src/efa_domain.c b/prov/efa/src/efa_domain.c index 704b6f45e30..328fd932d5c 100644 --- a/prov/efa/src/efa_domain.c +++ b/prov/efa/src/efa_domain.c @@ -139,9 +139,25 @@ static int efa_domain_init_qp_table(struct efa_domain *efa_domain) static int efa_domain_init_rdm(struct efa_domain *efa_domain, struct fi_info *info) { int err; + bool enable_shm = efa_env.enable_shm_transfer; + + /* App provided hints supercede environmental variables. + * + * Using the shm provider comes with some overheads, so avoid + * initializing the provider if the app provides a hint that it does not + * require node-local communication. We can still loopback over the EFA + * device in cases where the app violates the hint and continues + * communicating with node-local peers. + * + */ + if ((info->caps & FI_REMOTE_COMM) + /* but not local communication */ + && !(info->caps & FI_LOCAL_COMM)) { + enable_shm = false; + } efa_domain->shm_info = NULL; - if (efa_env.enable_shm_transfer) + if (enable_shm) efa_shm_info_create(info, &efa_domain->shm_info); else EFA_INFO(FI_LOG_CORE, "EFA will not use SHM for intranode communication because FI_EFA_ENABLE_SHM_TRANSFER=0\n"); diff --git a/prov/efa/src/rdm/efa_rdm_atomic.c b/prov/efa/src/rdm/efa_rdm_atomic.c index f0fe471e6b1..f1028cd9d35 100644 --- a/prov/efa/src/rdm/efa_rdm_atomic.c +++ b/prov/efa/src/rdm/efa_rdm_atomic.c @@ -236,7 +236,7 @@ efa_rdm_atomic_inject(struct fid_ep *ep, return err; peer = efa_rdm_ep_get_peer(efa_rdm_ep, dest_addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { if (!(efa_rdm_ep_domain(efa_rdm_ep)->shm_info->domain_attr->mr_mode & FI_MR_VIRT_ADDR)) remote_addr = 0; @@ -293,7 +293,7 @@ efa_rdm_atomic_writemsg(struct fid_ep *ep, peer = efa_rdm_ep_get_peer(efa_rdm_ep, msg->addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { efa_rdm_atomic_init_shm_msg(efa_rdm_ep, &shm_msg, msg, rma_iov, shm_desc); shm_msg.addr = peer->shm_fiaddr; return fi_atomicmsg(efa_rdm_ep->shm_ep, &shm_msg, flags); @@ -386,7 +386,7 @@ efa_rdm_atomic_readwritemsg(struct fid_ep *ep, peer = efa_rdm_ep_get_peer(efa_rdm_ep, msg->addr); assert(peer); - if (peer->is_local & efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { efa_rdm_atomic_init_shm_msg(efa_rdm_ep, &shm_msg, msg, shm_rma_iov, shm_desc); shm_msg.addr = peer->shm_fiaddr; efa_rdm_get_desc_for_shm(result_count, result_desc, shm_res_desc); @@ -498,7 +498,7 @@ efa_rdm_atomic_compwritemsg(struct fid_ep *ep, peer = efa_rdm_ep_get_peer(efa_rdm_ep, msg->addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { efa_rdm_atomic_init_shm_msg(efa_rdm_ep, &shm_msg, msg, shm_rma_iov, shm_desc); shm_msg.addr = peer->shm_fiaddr; efa_rdm_get_desc_for_shm(result_count, result_desc, shm_res_desc); diff --git a/prov/efa/src/rdm/efa_rdm_cq.c b/prov/efa/src/rdm/efa_rdm_cq.c index 0dc70b2fe31..27a38024be3 100644 --- a/prov/efa/src/rdm/efa_rdm_cq.c +++ b/prov/efa/src/rdm/efa_rdm_cq.c @@ -94,13 +94,18 @@ static ssize_t efa_rdm_cq_readfrom(struct fid_cq *cq_fid, void *buf, size_t coun ofi_genlock_lock(srx_ctx->lock); - if (cq->shm_cq) + if (cq->shm_cq) { fi_cq_read(cq->shm_cq, NULL, 0); - ret = ofi_cq_read_entries(&cq->util_cq, buf, count, src_addr); + /* + * This call aims to check whether there are completions written + * by shm. So we only want to call this when shm cq is available. + */ + ret = ofi_cq_read_entries(&cq->util_cq, buf, count, src_addr); - if (ret > 0) - goto out; + if (ret > 0) + goto out; + } ret = ofi_cq_readfrom(&cq->util_cq.cq_fid, buf, count, src_addr); diff --git a/prov/efa/src/rdm/efa_rdm_ep.h b/prov/efa/src/rdm/efa_rdm_ep.h index 4705426cc63..6aa9328010f 100644 --- a/prov/efa/src/rdm/efa_rdm_ep.h +++ b/prov/efa/src/rdm/efa_rdm_ep.h @@ -85,7 +85,6 @@ struct efa_rdm_ep { enum ibv_cq_ex_type ibv_cq_ex_type; /* shm provider fid */ - bool use_shm_for_tx; struct fid_ep *shm_ep; /* diff --git a/prov/efa/src/rdm/efa_rdm_ep_fiops.c b/prov/efa/src/rdm/efa_rdm_ep_fiops.c index 43f08ac548a..2d6f35f26e9 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_fiops.c +++ b/prov/efa/src/rdm/efa_rdm_ep_fiops.c @@ -888,41 +888,97 @@ void efa_rdm_ep_set_extra_info(struct efa_rdm_ep *ep) } /** - * @brief set the "use_shm_for_tx" field of efa_rdm_ep - * The field is set based on various factors, including - * environment variables, user hints, user's fi_setopt() - * calls. - * This function should be called during call to fi_enable(), - * after user called fi_setopt(). - * - * @param[in,out] ep endpoint to set the field + * @brief Close all shm resources bound to the efa ep and domain. + * This function will do this cleanup as best effort. When there is failure + * to clean up shm resource, it will still move forward by setting the resource + * pointer to NULL so it won't be used later. + * + * @param efa_rdm_ep pointer to efa_rdm_ep. */ -static -void efa_rdm_ep_set_use_shm_for_tx(struct efa_rdm_ep *ep) +static void efa_rdm_ep_close_shm_resources(struct efa_rdm_ep *efa_rdm_ep) { - if (!efa_rdm_ep_domain(ep)->shm_domain) { - ep->use_shm_for_tx = false; - return; + int ret; + struct efa_domain *efa_domain; + struct efa_av *efa_av; + struct efa_rdm_cq *efa_rdm_cq; + + if (efa_rdm_ep->shm_ep) { + ret = fi_close(&efa_rdm_ep->shm_ep->fid); + if (ret) + EFA_WARN(FI_LOG_EP_CTRL, "Unable to close shm ep\n"); + efa_rdm_ep->shm_ep = NULL; } - assert(ep->user_info); + efa_av = efa_rdm_ep->base_ep.av; + if (efa_av->shm_rdm_av) { + ret = fi_close(&efa_av->shm_rdm_av->fid); + if (ret) + EFA_WARN(FI_LOG_EP_CTRL, "Unable to close shm av\n"); + efa_av->shm_rdm_av = NULL; + } - /* App provided hints supercede environmental variables. - * - * Using the shm provider comes with some overheads, so avoid - * initializing the provider if the app provides a hint that it does not - * require node-local communication. We can still loopback over the EFA - * device in cases where the app violates the hint and continues - * communicating with node-local peers. - * - * aws-ofi-nccl relies on this feature. + efa_rdm_cq = container_of(efa_rdm_ep->base_ep.util_ep.tx_cq, struct efa_rdm_cq, util_cq); + if (efa_rdm_cq->shm_cq) { + ret = fi_close(&efa_rdm_cq->shm_cq->fid); + if (ret) + EFA_WARN(FI_LOG_EP_CTRL, "Unable to close shm cq\n"); + efa_rdm_cq->shm_cq = NULL; + } + + efa_rdm_cq = container_of(efa_rdm_ep->base_ep.util_ep.rx_cq, struct efa_rdm_cq, util_cq); + if (efa_rdm_cq->shm_cq) { + ret = fi_close(&efa_rdm_cq->shm_cq->fid); + if (ret) + EFA_WARN(FI_LOG_EP_CTRL, "Unable to close shm cq\n"); + efa_rdm_cq->shm_cq = NULL; + } + + efa_domain = efa_rdm_ep_domain(efa_rdm_ep); + + if (efa_domain->shm_domain) { + ret = fi_close(&efa_domain->shm_domain->fid); + if (ret) + EFA_WARN(FI_LOG_EP_CTRL, "Unable to close shm domain\n"); + efa_domain->shm_domain = NULL; + } + + if (efa_domain->fabric->shm_fabric) { + ret = fi_close(&efa_domain->fabric->shm_fabric->fid); + if (ret) + EFA_WARN(FI_LOG_EP_CTRL, "Unable to close shm fabric\n"); + efa_domain->fabric->shm_fabric = NULL; + } + + if (efa_domain->shm_info) { + fi_freeinfo(efa_domain->shm_info); + efa_domain->shm_info = NULL; + } +} + +/** + * @brief update the shm resources based on the + * the current ep status. When cuda_api_permitted + * is set as false via fi_setopt, shm should be + * shut down. This function must be called inside + * fi_enable which is called after fi_setopt. + * + * @param[in,out] ep efa_rdm_ep + */ +static +void efa_rdm_ep_update_shm(struct efa_rdm_ep *ep) +{ + bool use_shm; + + /* + * when efa_env.enable_shm_transfer is false + * , shm resources won't be created. */ - if ((ep->user_info->caps & FI_REMOTE_COMM) - /* but not local communication */ - && !(ep->user_info->caps & FI_LOCAL_COMM)) { - ep->use_shm_for_tx = false; + if (!efa_rdm_ep_domain(ep)->shm_domain) return; - } + + use_shm = true; + + assert(ep->user_info); /* * shm provider must make cuda calls to transfer cuda memory. @@ -935,12 +991,11 @@ void efa_rdm_ep_set_use_shm_for_tx(struct efa_rdm_ep *ep) if ((ep->user_info->caps & FI_HMEM) && hmem_ops[FI_HMEM_CUDA].initialized && !ep->cuda_api_permitted) { - ep->use_shm_for_tx = false; - return; + use_shm = false; } - ep->use_shm_for_tx = efa_env.enable_shm_transfer; - return; + if (!use_shm) + efa_rdm_ep_close_shm_resources(ep); } @@ -987,7 +1042,7 @@ static int efa_rdm_ep_ctrl(struct fid *fid, int command, void *arg) EFA_WARN(FI_LOG_EP_CTRL, "libfabric %s efa endpoint created! address: %s\n", fi_tostr("1", FI_TYPE_VERSION), ep_addr_str); - efa_rdm_ep_set_use_shm_for_tx(ep); + efa_rdm_ep_update_shm(ep); /* Enable shm provider endpoint & post recv buff. * Once core ep enabled, 18 bytes efa_addr (16 bytes raw + 2 bytes qpn) is set. diff --git a/prov/efa/src/rdm/efa_rdm_msg.c b/prov/efa/src/rdm/efa_rdm_msg.c index e89c0ffacde..292becbbda2 100644 --- a/prov/efa/src/rdm/efa_rdm_msg.c +++ b/prov/efa/src/rdm/efa_rdm_msg.c @@ -253,7 +253,7 @@ ssize_t efa_rdm_msg_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, peer = efa_rdm_ep_get_peer(efa_rdm_ep, msg->addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { shm_msg = (struct fi_msg *)msg; if (msg->desc) { efa_desc = msg->desc; @@ -292,7 +292,7 @@ ssize_t efa_rdm_msg_sendv(struct fid_ep *ep, const struct iovec *iov, peer = efa_rdm_ep_get_peer(efa_rdm_ep, dest_addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { if (desc) efa_rdm_get_desc_for_shm(count, desc, shm_desc); return fi_sendv(efa_rdm_ep->shm_ep, iov, shm_desc, count, peer->shm_fiaddr, context); @@ -320,7 +320,7 @@ ssize_t efa_rdm_msg_send(struct fid_ep *ep, const void *buf, size_t len, peer = efa_rdm_ep_get_peer(efa_rdm_ep, dest_addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { if (desc) efa_rdm_get_desc_for_shm(1, &desc, shm_desc); return fi_send(efa_rdm_ep->shm_ep, buf, len, desc? shm_desc[0] : NULL, peer->shm_fiaddr, context); @@ -351,7 +351,7 @@ ssize_t efa_rdm_msg_senddata(struct fid_ep *ep, const void *buf, size_t len, peer = efa_rdm_ep_get_peer(efa_rdm_ep, dest_addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { if (desc) efa_rdm_get_desc_for_shm(1, &desc, shm_desc); return fi_senddata(efa_rdm_ep->shm_ep, buf, len, desc? shm_desc[0] : NULL, data, peer->shm_fiaddr, context); @@ -382,7 +382,7 @@ ssize_t efa_rdm_msg_inject(struct fid_ep *ep, const void *buf, size_t len, peer = efa_rdm_ep_get_peer(efa_rdm_ep, dest_addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { return fi_inject(efa_rdm_ep->shm_ep, buf, len, peer->shm_fiaddr); } @@ -413,7 +413,7 @@ ssize_t efa_rdm_msg_injectdata(struct fid_ep *ep, const void *buf, peer = efa_rdm_ep_get_peer(efa_rdm_ep, dest_addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { return fi_injectdata(efa_rdm_ep->shm_ep, buf, len, data, peer->shm_fiaddr); } @@ -451,7 +451,7 @@ ssize_t efa_rdm_msg_tsendmsg(struct fid_ep *ep_fid, const struct fi_msg_tagged * peer = efa_rdm_ep_get_peer(efa_rdm_ep, tmsg->addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { shm_tmsg = (struct fi_msg_tagged *)tmsg; if (tmsg->desc) { efa_desc = tmsg->desc; @@ -491,7 +491,7 @@ ssize_t efa_rdm_msg_tsendv(struct fid_ep *ep_fid, const struct iovec *iov, return ret; assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { if (desc) efa_rdm_get_desc_for_shm(count, desc, shm_desc); return fi_tsendv(efa_rdm_ep->shm_ep, iov, desc? shm_desc : NULL, count, peer->shm_fiaddr, tag, context); @@ -526,7 +526,7 @@ ssize_t efa_rdm_msg_tsend(struct fid_ep *ep_fid, const void *buf, size_t len, peer = efa_rdm_ep_get_peer(efa_rdm_ep, dest_addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { if (desc) efa_rdm_get_desc_for_shm(1, &desc, shm_desc); return fi_tsend(efa_rdm_ep->shm_ep, buf, len, desc? shm_desc[0] : NULL, peer->shm_fiaddr, tag, context); @@ -558,7 +558,7 @@ ssize_t efa_rdm_msg_tsenddata(struct fid_ep *ep_fid, const void *buf, size_t len peer = efa_rdm_ep_get_peer(efa_rdm_ep, dest_addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { if (desc) efa_rdm_get_desc_for_shm(1, &desc, shm_desc); return fi_tsenddata(efa_rdm_ep->shm_ep, buf, len, desc? shm_desc[0] : NULL, data, peer->shm_fiaddr, tag, context); @@ -589,7 +589,7 @@ ssize_t efa_rdm_msg_tinject(struct fid_ep *ep_fid, const void *buf, size_t len, peer = efa_rdm_ep_get_peer(efa_rdm_ep, dest_addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { return fi_tinject(efa_rdm_ep->shm_ep, buf, len, peer->shm_fiaddr, tag); } @@ -619,7 +619,7 @@ ssize_t efa_rdm_msg_tinjectdata(struct fid_ep *ep_fid, const void *buf, size_t l peer = efa_rdm_ep_get_peer(efa_rdm_ep, dest_addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { return fi_tinjectdata(efa_rdm_ep->shm_ep, buf, len, data, peer->shm_fiaddr, tag); } diff --git a/prov/efa/src/rdm/efa_rdm_rma.c b/prov/efa/src/rdm/efa_rdm_rma.c index bae945ba150..cbe8e7f0b2c 100644 --- a/prov/efa/src/rdm/efa_rdm_rma.c +++ b/prov/efa/src/rdm/efa_rdm_rma.c @@ -147,7 +147,7 @@ ssize_t efa_rdm_rma_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg, uin goto out; } - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { tmp_addr = msg->addr; tmp_desc = msg->desc; msg_clone = (struct fi_msg_rma *)msg; @@ -246,7 +246,7 @@ ssize_t efa_rdm_rma_readv(struct fid_ep *ep, const struct iovec *iov, void **des peer = efa_rdm_ep_get_peer(efa_rdm_ep, src_addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { if (desc) efa_rdm_get_desc_for_shm(iov_count, desc, shm_desc); @@ -290,7 +290,7 @@ ssize_t efa_rdm_rma_read(struct fid_ep *ep, void *buf, size_t len, void *desc, peer = efa_rdm_ep_get_peer(efa_rdm_ep, src_addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { if (desc) efa_rdm_get_desc_for_shm(1, &desc, shm_desc); @@ -468,7 +468,7 @@ ssize_t efa_rdm_rma_writemsg(struct fid_ep *ep, goto out; } - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { tmp_addr = msg->addr; tmp_desc = msg->desc; msg_clone = (struct fi_msg_rma *)msg; @@ -526,7 +526,7 @@ ssize_t efa_rdm_rma_writev(struct fid_ep *ep, const struct iovec *iov, void **de peer = efa_rdm_ep_get_peer(efa_rdm_ep, dest_addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { if (desc) efa_rdm_get_desc_for_shm(iov_count, desc, shm_desc); return fi_writev(efa_rdm_ep->shm_ep, iov, desc? shm_desc : NULL, iov_count, peer->shm_fiaddr, addr, key, context); @@ -569,7 +569,7 @@ ssize_t efa_rdm_rma_write(struct fid_ep *ep, const void *buf, size_t len, void * peer = efa_rdm_ep_get_peer(efa_rdm_ep, dest_addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { if (desc) efa_rdm_get_desc_for_shm(1, &desc, shm_desc); return fi_write(efa_rdm_ep->shm_ep, buf, len, desc? shm_desc[0] : NULL, peer->shm_fiaddr, addr, key, context); @@ -599,7 +599,7 @@ ssize_t efa_rdm_rma_writedata(struct fid_ep *ep, const void *buf, size_t len, peer = efa_rdm_ep_get_peer(efa_rdm_ep, dest_addr); assert(peer); - if (peer->is_local && efa_rdm_ep->use_shm_for_tx) { + if (peer->is_local && efa_rdm_ep->shm_ep) { if (desc) efa_rdm_get_desc_for_shm(1, &desc, shm_desc); return fi_writedata(efa_rdm_ep->shm_ep, buf, len, desc? shm_desc[0] : NULL, data, peer->shm_fiaddr, addr, key, context); diff --git a/prov/efa/test/efa_unit_test_cq.c b/prov/efa/test/efa_unit_test_cq.c index 7eadb008aee..d98a387c136 100644 --- a/prov/efa/test/efa_unit_test_cq.c +++ b/prov/efa/test/efa_unit_test_cq.c @@ -108,7 +108,7 @@ static void test_rdm_cq_read_bad_send_status(struct efa_resource *resource, /* set use_shm_for_tx to false to force efa_rdm_ep to use efa device to send, * which means use EFA device to send. */ - efa_rdm_ep->use_shm_for_tx = false; + efa_rdm_ep->shm_ep = false; ret = fi_getname(&resource->ep->fid, &raw_addr, &raw_addr_len); assert_int_equal(ret, 0); diff --git a/prov/efa/test/efa_unit_test_ep.c b/prov/efa/test/efa_unit_test_ep.c index 988c49b28aa..4d961ca5017 100644 --- a/prov/efa/test/efa_unit_test_ep.c +++ b/prov/efa/test/efa_unit_test_ep.c @@ -114,7 +114,7 @@ void test_efa_rdm_ep_handshake_exchange_host_id(struct efa_resource **state, uin efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_ep->host_id = g_efa_unit_test_mocks.local_host_id; - efa_rdm_ep->use_shm_for_tx = false; + efa_rdm_ep->shm_ep = NULL; /* Create and register a fake peer */ assert_int_equal(fi_getname(&resource->ep->fid, &raw_addr, &raw_addr_len), 0); @@ -368,7 +368,7 @@ void test_efa_rdm_ep_dc_atomic_error_handling(struct efa_resource **state) msg.op = FI_SUM; efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); - efa_rdm_ep->use_shm_for_tx = false; + efa_rdm_ep->shm_ep = NULL; /* set peer->flag to EFA_RDM_PEER_REQ_SENT will make efa_rdm_atomic() think * a REQ packet has been sent to the peer (so no need to send again) * handshake has not been received, so we do not know whether the peer support DC diff --git a/prov/efa/test/efa_unit_test_rnr.c b/prov/efa/test/efa_unit_test_rnr.c index 5060a7c07ee..5b2da7364e5 100644 --- a/prov/efa/test/efa_unit_test_rnr.c +++ b/prov/efa/test/efa_unit_test_rnr.c @@ -37,7 +37,7 @@ void test_efa_rnr_queue_and_resend(struct efa_resource **state) efa_rdm_ep->base_ep.qp->ibv_qp_ex->wr_complete = &efa_mock_ibv_wr_complete_no_op; assert_true(dlist_empty(&efa_rdm_ep->txe_list)); - efa_rdm_ep->use_shm_for_tx = false; + efa_rdm_ep->shm_ep = NULL; ret = fi_send(resource->ep, send_buff.buff, send_buff.size, fi_mr_desc(send_buff.mr), peer_addr, NULL /* context */); assert_int_equal(ret, 0); assert_false(dlist_empty(&efa_rdm_ep->txe_list));