From 39b5740118e09f57a7d217c2a00e016117c4356d Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Mon, 25 Mar 2024 10:34:43 +0100 Subject: [PATCH] prov/verbs: Allow for large TX queues with limited (or no) inline data Using large TX queues with the verbs provider would cause fi_getinfo() to return an empty list of verbs adapters because the call to ibv_create_qp() executed as part of fi_getinfo() would fail with EINVAL. The failure happens because the code allocates the QP with the maximum amount of inline data supported by the adapter, which is empirically determined by vrb_find_max_inline(). The problem is that using inline data limits the TX queue size that can be allocated. The patch removes vrb_get_qp_cap(), whose the sole purpose is to set the maximum inline data size returned by vrb_find_max_inline(). This operation can be done in vrb_get_device_attrs() directly. Signed-off-by: Sylvain Didelot --- prov/verbs/src/verbs_info.c | 79 +++---------------------------------- prov/verbs/src/verbs_init.c | 11 +++++- prov/verbs/src/verbs_ofi.h | 3 +- 3 files changed, 16 insertions(+), 77 deletions(-) diff --git a/prov/verbs/src/verbs_info.c b/prov/verbs/src/verbs_info.c index e4def4c6d5d..a2b06a8a0ce 100644 --- a/prov/verbs/src/verbs_info.c +++ b/prov/verbs/src/verbs_info.c @@ -426,76 +426,6 @@ static int vrb_rai_to_fi(struct rdma_addrinfo *rai, struct fi_info *fi) return FI_SUCCESS; } -static inline int vrb_get_qp_cap(struct ibv_context *ctx, - struct fi_info *info, uint32_t protocol) -{ - struct ibv_pd *pd; - struct ibv_cq *cq; - struct ibv_qp *qp; - struct ibv_qp_init_attr init_attr; - enum ibv_qp_type qp_type; - int ret = 0; - - pd = ibv_alloc_pd(ctx); - if (!pd) { - VRB_WARN_ERRNO(FI_LOG_FABRIC, "ibv_alloc_pd"); - return -errno; - } - - cq = ibv_create_cq(ctx, 1, NULL, NULL, 0); - if (!cq) { - VRB_WARN_ERRNO(FI_LOG_FABRIC, "ibv_create_cq"); - ret = -errno; - goto err1; - } - - if (protocol == FI_PROTO_RDMA_CM_IB_XRC) - qp_type = IBV_QPT_XRC_SEND; - else - qp_type = (info->ep_attr->type != FI_EP_DGRAM) ? - IBV_QPT_RC : IBV_QPT_UD; - - memset(&init_attr, 0, sizeof init_attr); - init_attr.send_cq = cq; - - assert(info->tx_attr->size && - info->tx_attr->iov_limit && - info->rx_attr->size && - info->rx_attr->iov_limit); - - init_attr.cap.max_send_wr = MIN(vrb_gl_data.def_tx_size, - info->tx_attr->size); - init_attr.cap.max_send_sge = MIN(vrb_gl_data.def_tx_iov_limit, - info->tx_attr->iov_limit); - - if (qp_type != IBV_QPT_XRC_SEND) { - init_attr.recv_cq = cq; - init_attr.cap.max_recv_wr = MIN(vrb_gl_data.def_rx_size, - info->rx_attr->size); - init_attr.cap.max_recv_sge = MIN(vrb_gl_data.def_rx_iov_limit, - info->rx_attr->iov_limit); - } - init_attr.cap.max_inline_data = vrb_find_max_inline(pd, ctx, qp_type); - init_attr.qp_type = qp_type; - - qp = ibv_create_qp(pd, &init_attr); - if (!qp) { - VRB_WARN_ERRNO(FI_LOG_FABRIC, "ibv_create_qp"); - ret = -errno; - goto err2; - } - - info->tx_attr->inject_size = init_attr.cap.max_inline_data; - - ibv_destroy_qp(qp); -err2: - ibv_destroy_cq(cq); -err1: - ibv_dealloc_pd(pd); - - return ret; -} - static int vrb_mtu_type_to_len(enum ibv_mtu mtu_type) { switch (mtu_type) { @@ -552,6 +482,7 @@ static int vrb_get_device_attrs(struct ibv_context *ctx, enum fi_log_level level = vrb_gl_data.msg.prefer_xrc ? FI_LOG_WARN : FI_LOG_INFO; const char *dev_name = ibv_get_device_name(ctx->device); + enum ibv_qp_type qp_type; ret = ibv_query_device(ctx, &device_attr); if (ret) { @@ -595,11 +526,13 @@ static int vrb_get_device_attrs(struct ibv_context *ctx, if (protocol == FI_PROTO_RDMA_CM_IB_XRC) { info->rx_attr->iov_limit = MIN(info->rx_attr->iov_limit, 1); info->ep_attr->rx_ctx_cnt = FI_SHARED_CONTEXT; + qp_type = IBV_QPT_XRC_SEND; + } else { + qp_type = (info->ep_attr->type != FI_EP_DGRAM) ? + IBV_QPT_RC : IBV_QPT_UD; } - ret = vrb_get_qp_cap(ctx, info, protocol); - if (ret) - return ret; + info->tx_attr->inject_size = vrb_find_max_inline(ctx, qp_type); for (port_num = 1; port_num < device_attr.phys_port_cnt + 1; port_num++) { ret = ibv_query_port(ctx, port_num, &port_attr); diff --git a/prov/verbs/src/verbs_init.c b/prov/verbs/src/verbs_init.c index 05183d0f9d4..6aca05ac01d 100644 --- a/prov/verbs/src/verbs_init.c +++ b/prov/verbs/src/verbs_init.c @@ -461,10 +461,10 @@ void vrb_set_rnr_timer(struct ibv_qp *qp) vrb_dbg_query_qp_attr(qp); } -int vrb_find_max_inline(struct ibv_pd *pd, struct ibv_context *context, - enum ibv_qp_type qp_type) +int vrb_find_max_inline(struct ibv_context *context, enum ibv_qp_type qp_type) { struct ibv_qp_init_attr qp_attr; + struct ibv_pd *pd; struct ibv_qp *qp = NULL; struct ibv_cq *cq; int max_inline = 2; @@ -472,6 +472,9 @@ int vrb_find_max_inline(struct ibv_pd *pd, struct ibv_context *context, const char *dev_name = ibv_get_device_name(context->device); uint8_t i; + pd = ibv_alloc_pd(context); + assert(pd); + for (i = 0; i < count_of(verbs_dev_presets); i++) { if (!strncmp(dev_name, verbs_dev_presets[i].dev_name_prefix, strlen(verbs_dev_presets[i].dev_name_prefix))) @@ -544,6 +547,10 @@ int vrb_find_max_inline(struct ibv_pd *pd, struct ibv_context *context, ibv_destroy_cq(cq); } + if (pd) { + ibv_dealloc_pd(pd); + } + return rst; } diff --git a/prov/verbs/src/verbs_ofi.h b/prov/verbs/src/verbs_ofi.h index 82454763efe..a19b0d3150c 100644 --- a/prov/verbs/src/verbs_ofi.h +++ b/prov/verbs/src/verbs_ofi.h @@ -878,8 +878,7 @@ int vrb_query_atomic(struct fid_domain *domain_fid, enum fi_datatype datatype, uint64_t flags); void vrb_set_rnr_timer(struct ibv_qp *qp); void vrb_cleanup_cq(struct vrb_ep *cur_ep); -int vrb_find_max_inline(struct ibv_pd *pd, struct ibv_context *context, - enum ibv_qp_type qp_type); +int vrb_find_max_inline(struct ibv_context *context, enum ibv_qp_type qp_type); struct vrb_dgram_av { struct util_av util_av;