From 7f9b5601d802ab2c5ab82b4e6f4ca8b8146395e7 Mon Sep 17 00:00:00 2001 From: Jessie Yang Date: Wed, 25 Sep 2024 16:16:19 -0700 Subject: [PATCH] prov/efa: Use tclass to prioritize the messages from an ep To prioritize the messages from a given endpoint, user can specify ` fi_info->tx_attr->tclass = FI_TC_LOW_LATENCY` in the fi_endpoint() call to set the service level in rdma-core. All other tclass values will be ignored. Signed-off-by: Jessie Yang --- man/fi_efa.7.md | 2 ++ prov/efa/configure.m4 | 9 +++++++++ prov/efa/src/efa_base_ep.c | 8 ++++++-- prov/efa/src/efa_base_ep.h | 4 +++- prov/efa/src/rdm/efa_rdm_ep_fiops.c | 4 ++-- 5 files changed, 22 insertions(+), 5 deletions(-) diff --git a/man/fi_efa.7.md b/man/fi_efa.7.md index 02ef1d80b73..b6eefc19dc1 100644 --- a/man/fi_efa.7.md +++ b/man/fi_efa.7.md @@ -205,6 +205,8 @@ struct fi_efa_mr_attr { **query_mr()** returns 0 on success, or the value of errno on failure (which indicates the failure reason). +# Traffic Class (tclass) in EFA +To prioritize the messages from a given endpoint, user can specify `fi_info->tx_attr->tclass = FI_TC_LOW_LATENCY` in the fi_endpoint() call to set the service level in rdma-core. All other tclass values will be ignored. # RUNTIME PARAMETERS diff --git a/prov/efa/configure.m4 b/prov/efa/configure.m4 index f807ce9bc51..71152f72ed4 100644 --- a/prov/efa/configure.m4 +++ b/prov/efa/configure.m4 @@ -77,6 +77,7 @@ AC_DEFUN([FI_EFA_CONFIGURE],[ efadv_support_extended_cq=0 have_efa_dmabuf_mr=0 have_efadv_query_mr=0 + have_efadv_sl=0 dnl $have_neuron is defined at top-level configure.ac AM_CONDITIONAL([HAVE_NEURON], [ test x"$have_neuron" = x1 ]) @@ -159,6 +160,11 @@ AC_DEFUN([FI_EFA_CONFIGURE],[ [], [have_efadv_query_mr=0], [[#include ]]) + + AC_CHECK_MEMBER(struct efadv_qp_init_attr.sl, + [have_efadv_sl=1], + [have_efadv_sl=0], + [[#include ]]) ]) AC_DEFINE_UNQUOTED([HAVE_RDMA_SIZE], @@ -188,6 +194,9 @@ AC_DEFUN([FI_EFA_CONFIGURE],[ AC_DEFINE_UNQUOTED([HAVE_EFADV_QUERY_MR], [$have_efadv_query_mr], [Indicates if efadv_query_mr verbs is available]) + AC_DEFINE_UNQUOTED([HAVE_EFADV_SL], + [$have_efadv_sl], + [Indicates if efadv_qp_init_attr has sl]) CPPFLAGS=$save_CPPFLAGS diff --git a/prov/efa/src/efa_base_ep.c b/prov/efa/src/efa_base_ep.c index 8c55fee2387..d022c0e3ef6 100644 --- a/prov/efa/src/efa_base_ep.c +++ b/prov/efa/src/efa_base_ep.c @@ -167,7 +167,7 @@ static int efa_base_ep_modify_qp_rst2rts(struct efa_base_ep *base_ep, * @param init_attr_ex ibv_qp_init_attr_ex * @return int 0 on success, negative integer on failure */ -int efa_qp_create(struct efa_qp **qp, struct ibv_qp_init_attr_ex *init_attr_ex) +int efa_qp_create(struct efa_qp **qp, struct ibv_qp_init_attr_ex *init_attr_ex, uint32_t tclass) { struct efadv_qp_init_attr efa_attr = { 0 }; @@ -185,6 +185,10 @@ int efa_qp_create(struct efa_qp **qp, struct ibv_qp_init_attr_ex *init_attr_ex) efa_attr.flags |= EFADV_QP_FLAGS_UNSOLICITED_WRITE_RECV; #endif efa_attr.driver_qp_type = EFADV_QP_DRIVER_TYPE_SRD; +#if HAVE_EFADV_SL + if (tclass == FI_TC_LOW_LATENCY) + efa_attr.sl = EFA_QP_DEFAULT_SERVICE_LEVEL; +#endif (*qp)->ibv_qp = efadv_create_qp_ex( init_attr_ex->pd->context, init_attr_ex, &efa_attr, sizeof(struct efadv_qp_init_attr)); @@ -206,7 +210,7 @@ int efa_base_ep_create_qp(struct efa_base_ep *base_ep, { int ret; - ret = efa_qp_create(&base_ep->qp, init_attr_ex); + ret = efa_qp_create(&base_ep->qp, init_attr_ex, base_ep->info->tx_attr->tclass); if (ret) return ret; diff --git a/prov/efa/src/efa_base_ep.h b/prov/efa/src/efa_base_ep.h index bbcd0d26a2d..6cde8f9f4bf 100644 --- a/prov/efa/src/efa_base_ep.h +++ b/prov/efa/src/efa_base_ep.h @@ -12,6 +12,8 @@ #include "ofi_util.h" #include "rdm/efa_rdm_protocol.h" +#define EFA_QP_DEFAULT_SERVICE_LEVEL 8 + struct efa_qp { struct ibv_qp *ibv_qp; struct ibv_qp_ex *ibv_qp_ex; @@ -72,7 +74,7 @@ int efa_base_ep_construct(struct efa_base_ep *base_ep, int efa_base_ep_getname(fid_t fid, void *addr, size_t *addrlen); -int efa_qp_create(struct efa_qp **qp, struct ibv_qp_init_attr_ex *init_attr_ex); +int efa_qp_create(struct efa_qp **qp, struct ibv_qp_init_attr_ex *init_attr_ex, uint32_t tclass); void efa_qp_destruct(struct efa_qp *qp); diff --git a/prov/efa/src/rdm/efa_rdm_ep_fiops.c b/prov/efa/src/rdm/efa_rdm_ep_fiops.c index abc4f89c720..1ce5041ba36 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_fiops.c +++ b/prov/efa/src/rdm/efa_rdm_ep_fiops.c @@ -105,7 +105,7 @@ int efa_rdm_ep_create_base_ep_ibv_qp(struct efa_rdm_ep *ep) * without any headers. */ if (ep->use_zcpy_rx) { - ret = efa_qp_create(&ep->base_ep.user_recv_qp, &attr_ex); + ret = efa_qp_create(&ep->base_ep.user_recv_qp, &attr_ex, ep->base_ep.info->tx_attr->tclass); if (ret) { efa_base_ep_destruct_qp(&ep->base_ep); return ret; @@ -1642,7 +1642,7 @@ int efa_rdm_ep_check_qp_in_order_aligned_128_bytes(struct efa_rdm_ep *ep, /* Create a dummy qp for query only */ efa_rdm_ep_construct_ibv_qp_init_attr_ex(ep, &attr_ex, ibv_cq_ex, ibv_cq_ex); - ret = efa_qp_create(&qp, &attr_ex); + ret = efa_qp_create(&qp, &attr_ex, FI_TC_UNSPEC); if (ret) goto out;