From 59ed7207a881cbe946e482623c70296ca6beed92 Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Mon, 13 Nov 2023 14:11:54 +0000 Subject: [PATCH] prov/verbs: Async route resolution and non-blocking EP creation The patch implements the last changes to make the EP creation non-blocking. The address is now resolved asynchronously when connection is established, which does no longer block the caller. The connection initiator creates the QP when the route is resolved. As for the connection target, it creates the QP when the EP is enabled (same as it was before). Note that the call to rdma_resolve_addr() in vrb_msg_ep_connect() becomes asynchronous because the EP CM id is associated with an event channel when the endpoint is bound to an event queue, which happend before the EP is enabled. Signed-off-by: Sylvain Didelot --- prov/verbs/src/verbs_cm.c | 12 ++++++++---- prov/verbs/src/verbs_ep.c | 18 ++++++++++-------- prov/verbs/src/verbs_init.c | 20 -------------------- 3 files changed, 18 insertions(+), 32 deletions(-) diff --git a/prov/verbs/src/verbs_cm.c b/prov/verbs/src/verbs_cm.c index fc6f62c3f72..037d2ee576f 100644 --- a/prov/verbs/src/verbs_cm.c +++ b/prov/verbs/src/verbs_cm.c @@ -190,11 +190,15 @@ vrb_msg_ep_connect(struct fid_ep *ep_fid, const void *addr, ofi_genlock_lock(&vrb_ep2_progress(ep)->ep_lock); assert(ep->state == VRB_IDLE); - ep->state = VRB_RESOLVE_ROUTE; - ret = rdma_resolve_route(ep->id, VERBS_RESOLVE_TIMEOUT); - if (ret) { + ep->state = VRB_RESOLVE_ADDR; + if (rdma_resolve_addr(ep->id, ep->info_attr.src_addr, + ep->info_attr.dest_addr, VERBS_RESOLVE_TIMEOUT)) { ret = -errno; - VRB_WARN_ERRNO(FI_LOG_EP_CTRL, "rdma_resolve_route"); + VRB_WARN_ERRNO(FI_LOG_EP_CTRL, "rdma_resolve_addr"); + ofi_straddr_log(&vrb_prov, FI_LOG_WARN, FI_LOG_EP_CTRL, + "src addr", ep->info_attr.src_addr); + ofi_straddr_log(&vrb_prov, FI_LOG_WARN, FI_LOG_EP_CTRL, + "dst addr", ep->info_attr.dest_addr); free(ep->cm_priv_data); ep->cm_priv_data = NULL; ep->state = VRB_IDLE; diff --git a/prov/verbs/src/verbs_ep.c b/prov/verbs/src/verbs_ep.c index f710f0cf320..2a7fdf86a3c 100644 --- a/prov/verbs/src/verbs_ep.c +++ b/prov/verbs/src/verbs_ep.c @@ -1033,15 +1033,17 @@ static int vrb_ep_enable(struct fid_ep *ep_fid) return -FI_EINVAL; } - ret = rdma_create_qp(ep->id, domain->pd, &attr); - if (ret) { - VRB_WARN_ERRNO(FI_LOG_EP_CTRL, "rdma_create_qp"); - return -errno; - } + if (ep->state == VRB_REQ_RCVD) { + ret = rdma_create_qp(ep->id, domain->pd, &attr); + if (ret) { + VRB_WARN_ERRNO(FI_LOG_EP_CTRL, "rdma_create_qp"); + return -errno; + } - /* Allow shared XRC INI QP not controlled by RDMA CM - * to share same post functions as RC QP. */ - ep->ibv_qp = ep->id->qp; + /* Allow shared XRC INI QP not controlled by RDMA CM + * to share same post functions as RC QP. */ + ep->ibv_qp = ep->id->qp; + } break; case FI_EP_DGRAM: assert(domain); diff --git a/prov/verbs/src/verbs_init.c b/prov/verbs/src/verbs_init.c index 5b1c6b765fd..a9fb6a629df 100644 --- a/prov/verbs/src/verbs_init.c +++ b/prov/verbs/src/verbs_init.c @@ -338,29 +338,9 @@ int vrb_create_ep(struct vrb_ep *ep, enum rdma_port_space ps, goto err1; } - /* TODO convert this call to non-blocking (use event channel) as well: - * This may likely be needed for better scaling when running large - * MPI jobs. - * Making this non-blocking would mean we can't create QP at EP enable - * time. We need to wait for RDMA_CM_EVENT_ADDR_RESOLVED event before - * creating the QP using rdma_create_qp. It would also require a SW - * receive queue to store recvs posted by app after enabling the EP. - */ - if (rdma_resolve_addr(*id, rai->ai_src_addr, rai->ai_dst_addr, - VERBS_RESOLVE_TIMEOUT)) { - ret = -errno; - VRB_WARN_ERRNO(FI_LOG_EP_CTRL, "rdma_resolve_addr"); - ofi_straddr_log(&vrb_prov, FI_LOG_WARN, FI_LOG_EP_CTRL, - "src addr", rai->ai_src_addr); - ofi_straddr_log(&vrb_prov, FI_LOG_WARN, FI_LOG_EP_CTRL, - "dst addr", rai->ai_dst_addr); - goto err2; - } rdma_freeaddrinfo(rai); return 0; -err2: - rdma_destroy_id(*id); err1: rdma_freeaddrinfo(rai); return ret;