Skip to content

Commit

Permalink
prov/verbs: Allow for large TX queues with limited (or no) inline data
Browse files Browse the repository at this point in the history
Using large TX queues with the verbs provider would cause fi_getinfo()
to return an empty list of verbs adapters because the call to
ibv_create_qp() executed as part of fi_getinfo() would fail with EINVAL.

The failure happens because the code allocates the QP with the maximum
amount of inline data supported by the adapter, which is empirically
determined by vrb_find_max_inline(). The problem is that using inline
data limits the TX queue size that can be allocated.

The fix implemented in this patch is to set max_inline_data = 0 when
the QP is created, then update info->tx_attr->inject_size with the value
returned by vrb_find_max_inline() after the QP is created. The code in
vrb_find_max_inline() guarantees that the calculated inline value is
correct as it is also tested with a fake QP creation.

Signed-off-by: Sylvain Didelot <sdidelot@ddn.com>
  • Loading branch information
sydidelot committed Apr 2, 2024
1 parent e3fc8d7 commit 5b5ed9f
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 77 deletions.
79 changes: 6 additions & 73 deletions prov/verbs/src/verbs_info.c
Original file line number Diff line number Diff line change
Expand Up @@ -426,76 +426,6 @@ static int vrb_rai_to_fi(struct rdma_addrinfo *rai, struct fi_info *fi)
return FI_SUCCESS;
}

static inline int vrb_get_qp_cap(struct ibv_context *ctx,
struct fi_info *info, uint32_t protocol)
{
struct ibv_pd *pd;
struct ibv_cq *cq;
struct ibv_qp *qp;
struct ibv_qp_init_attr init_attr;
enum ibv_qp_type qp_type;
int ret = 0;

pd = ibv_alloc_pd(ctx);
if (!pd) {
VRB_WARN_ERRNO(FI_LOG_FABRIC, "ibv_alloc_pd");
return -errno;
}

cq = ibv_create_cq(ctx, 1, NULL, NULL, 0);
if (!cq) {
VRB_WARN_ERRNO(FI_LOG_FABRIC, "ibv_create_cq");
ret = -errno;
goto err1;
}

if (protocol == FI_PROTO_RDMA_CM_IB_XRC)
qp_type = IBV_QPT_XRC_SEND;
else
qp_type = (info->ep_attr->type != FI_EP_DGRAM) ?
IBV_QPT_RC : IBV_QPT_UD;

memset(&init_attr, 0, sizeof init_attr);
init_attr.send_cq = cq;

assert(info->tx_attr->size &&
info->tx_attr->iov_limit &&
info->rx_attr->size &&
info->rx_attr->iov_limit);

init_attr.cap.max_send_wr = MIN(vrb_gl_data.def_tx_size,
info->tx_attr->size);
init_attr.cap.max_send_sge = MIN(vrb_gl_data.def_tx_iov_limit,
info->tx_attr->iov_limit);

if (qp_type != IBV_QPT_XRC_SEND) {
init_attr.recv_cq = cq;
init_attr.cap.max_recv_wr = MIN(vrb_gl_data.def_rx_size,
info->rx_attr->size);
init_attr.cap.max_recv_sge = MIN(vrb_gl_data.def_rx_iov_limit,
info->rx_attr->iov_limit);
}
init_attr.cap.max_inline_data = vrb_find_max_inline(pd, ctx, qp_type);
init_attr.qp_type = qp_type;

qp = ibv_create_qp(pd, &init_attr);
if (!qp) {
VRB_WARN_ERRNO(FI_LOG_FABRIC, "ibv_create_qp");
ret = -errno;
goto err2;
}

info->tx_attr->inject_size = init_attr.cap.max_inline_data;

ibv_destroy_qp(qp);
err2:
ibv_destroy_cq(cq);
err1:
ibv_dealloc_pd(pd);

return ret;
}

static int vrb_mtu_type_to_len(enum ibv_mtu mtu_type)
{
switch (mtu_type) {
Expand Down Expand Up @@ -552,6 +482,7 @@ static int vrb_get_device_attrs(struct ibv_context *ctx,
enum fi_log_level level =
vrb_gl_data.msg.prefer_xrc ? FI_LOG_WARN : FI_LOG_INFO;
const char *dev_name = ibv_get_device_name(ctx->device);
enum ibv_qp_type qp_type;

ret = ibv_query_device(ctx, &device_attr);
if (ret) {
Expand Down Expand Up @@ -595,11 +526,13 @@ static int vrb_get_device_attrs(struct ibv_context *ctx,
if (protocol == FI_PROTO_RDMA_CM_IB_XRC) {
info->rx_attr->iov_limit = MIN(info->rx_attr->iov_limit, 1);
info->ep_attr->rx_ctx_cnt = FI_SHARED_CONTEXT;
qp_type = IBV_QPT_XRC_SEND;
} else {
qp_type = (info->ep_attr->type != FI_EP_DGRAM) ?
IBV_QPT_RC : IBV_QPT_UD;
}

ret = vrb_get_qp_cap(ctx, info, protocol);
if (ret)
return ret;
info->tx_attr->inject_size = vrb_find_max_inline(ctx, qp_type);

for (port_num = 1; port_num < device_attr.phys_port_cnt + 1; port_num++) {
ret = ibv_query_port(ctx, port_num, &port_attr);
Expand Down
11 changes: 9 additions & 2 deletions prov/verbs/src/verbs_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -461,17 +461,20 @@ void vrb_set_rnr_timer(struct ibv_qp *qp)
vrb_dbg_query_qp_attr(qp);
}

int vrb_find_max_inline(struct ibv_pd *pd, struct ibv_context *context,
enum ibv_qp_type qp_type)
int vrb_find_max_inline(struct ibv_context *context, enum ibv_qp_type qp_type)
{
struct ibv_qp_init_attr qp_attr;
struct ibv_pd *pd;
struct ibv_qp *qp = NULL;
struct ibv_cq *cq;
int max_inline = 2;
int rst = 0;
const char *dev_name = ibv_get_device_name(context->device);
uint8_t i;

pd = ibv_alloc_pd(context);
assert(pd);

for (i = 0; i < count_of(verbs_dev_presets); i++) {
if (!strncmp(dev_name, verbs_dev_presets[i].dev_name_prefix,
strlen(verbs_dev_presets[i].dev_name_prefix)))
Expand Down Expand Up @@ -544,6 +547,10 @@ int vrb_find_max_inline(struct ibv_pd *pd, struct ibv_context *context,
ibv_destroy_cq(cq);
}

if (pd) {
ibv_dealloc_pd(pd);
}

return rst;
}

Expand Down
3 changes: 1 addition & 2 deletions prov/verbs/src/verbs_ofi.h
Original file line number Diff line number Diff line change
Expand Up @@ -878,8 +878,7 @@ int vrb_query_atomic(struct fid_domain *domain_fid, enum fi_datatype datatype,
uint64_t flags);
void vrb_set_rnr_timer(struct ibv_qp *qp);
void vrb_cleanup_cq(struct vrb_ep *cur_ep);
int vrb_find_max_inline(struct ibv_pd *pd, struct ibv_context *context,
enum ibv_qp_type qp_type);
int vrb_find_max_inline(struct ibv_context *context, enum ibv_qp_type qp_type);

struct vrb_dgram_av {
struct util_av util_av;
Expand Down

0 comments on commit 5b5ed9f

Please sign in to comment.