Skip to content

Commit

Permalink
prov/efa: Introduce efa specific domain operations
Browse files Browse the repository at this point in the history
Make efa onboard the fi_open_ops API, which allows user to
access efa specific domain ops. The usage of this API
is documented in fi_efa.7.md.

Also added a unit test.

Signed-off-by: Shi Jin <sjina@amazon.com>
  • Loading branch information
shijin-aws committed Jan 23, 2024
1 parent 4729af9 commit 13a56e7
Show file tree
Hide file tree
Showing 9 changed files with 235 additions and 2 deletions.
63 changes: 63 additions & 0 deletions man/fi_efa.7.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,69 @@ provider for AWS Neuron or Habana SynapseAI.
delivered to the target buffer only once. If endpoint is not able to support
this feature, it will return -FI_EOPNOTSUPP for the call to fi_setopt().

# PROVIDER SPECIFIC DOMAIN OPS
The efa provider exports extensions for operations
that are not provided by the standard libfabric interface. These extensions
are available via the "`fi_ext_efa.h`" header file.

## Domain Operation Extension

Domain operation extension is obtained by calling `fi_open_ops`
(see [`fi_domain(3)`](fi_domain.3.html))
```c
int fi_open_ops(struct fid *domain, const char *name, uint64_t flags,
void **ops, void *context);
```
and requesting `FI_EFA_DOMAIN_OPS` in `name`. `fi_open_ops` returns `ops` as
the pointer to the function table `fi_efa_ops_domain` defined as follows:
```c
struct fi_efa_ops_domain {
int (*query_mr)(struct fid_mr *mr, struct fi_efa_mr_attr *mr_attr);
};
```

It contains the following operations

### query_mr
This op query an existing memory registration as input, and outputs the efa
specific mr attribute which is defined as follows

```c
struct fi_efa_mr_attr {
uint16_t ic_id_validity;
uint16_t recv_ic_id;
uint16_t rdma_read_ic_id;
uint16_t rdma_recv_ic_id;
};
```

*ic_id_validity*
: Validity mask of interconnect id fields. Currently the following bits are supported in the mask:

FI_EFA_MR_ATTR_RECV_IC_ID:
recv_ic_id has a valid value.

FI_EFA_MR_ATTR_RDMA_READ_IC_ID:
rdma_read_ic_id has a valid value.

FI_EFA_MR_ATTR_RDMA_RECV_IC_ID:
rdma_recv_ic_id has a valid value.

*recv_ic_id*
: Physical interconnect used by the device to reach the MR for receive operation. It is only valid when `ic_id_validity` has the `FI_EFA_MR_ATTR_RECV_IC_ID` bit.

*rdma_read_ic_id*
: Physical interconnect used by the device to reach the MR for RDMA read operation. It is only valid when `ic_id_validity` has the `FI_EFA_MR_ATTR_RDMA_READ_IC_ID` bit.

*rdma_recv_ic_id*
: Physical interconnect used by the device to reach the MR for RDMA write receive. It is only valid when `ic_id_validity` has the `FI_EFA_MR_ATTR_RDMA_RECV_IC_ID` bit.

#### Return value
**query_mr()** returns 0 on success, or the value of errno on failure
(which indicates the failure reason).


# RUNTIME PARAMETERS

*FI_EFA_TX_SIZE*
Expand Down
9 changes: 9 additions & 0 deletions prov/efa/Makefile.include
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ _efa_headers = \
prov/efa/src/efa_tp.h \
prov/efa/src/efa_prov.h \
prov/efa/src/efa_env.h \
prov/efa/src/fi_ext_efa.h \
prov/efa/src/dgram/efa_dgram_ep.h \
prov/efa/src/dgram/efa_dgram_cq.h \
prov/efa/src/rdm/efa_rdm_peer.h \
Expand Down Expand Up @@ -132,6 +133,7 @@ nodist_prov_efa_test_efa_unit_test_SOURCES = \
prov/efa/test/efa_unit_tests.c \
prov/efa/test/efa_unit_test_mocks.c \
prov/efa/test/efa_unit_test_common.c \
prov/efa/test/efa_unit_test_domain.c \
prov/efa/test/efa_unit_test_ep.c \
prov/efa/test/efa_unit_test_av.c \
prov/efa/test/efa_unit_test_cq.c \
Expand Down Expand Up @@ -164,6 +166,10 @@ if HAVE_NEURON
prov_efa_test_efa_unit_test_LDFLAGS += -Wl,--wrap=neuron_alloc
endif HAVE_NEURON

if HAVE_EFADV_QUERY_MR
prov_efa_test_efa_unit_test_LDFLAGS += -Wl,--wrap=efadv_query_mr
endif HAVE_EFADV_QUERY_MR

prov_efa_test_efa_unit_test_LIBS = $(efa_LIBS) $(linkback)

endif ENABLE_EFA_UNIT_TEST
Expand All @@ -173,6 +179,9 @@ efa_CPPFLAGS += \
-I$(top_srcdir)/prov/efa/src/dgram/ \
-I$(top_srcdir)/prov/efa/src/rdm/

rdmainclude_HEADERS += \
prov/efa/src/fi_ext_efa.h

if HAVE_EFA_DL
pkglib_LTLIBRARIES += libefa-fi.la
libefa_fi_la_SOURCES = $(_efa_files) $(_efa_headers) $(common_srcs)
Expand Down
22 changes: 22 additions & 0 deletions prov/efa/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ AC_DEFUN([FI_EFA_CONFIGURE],[
efa_support_data_in_order_aligned_128_byte=0
efadv_support_extended_cq=0
have_efa_dmabuf_mr=0
have_efadv_query_mr=0
dnl $have_neuron is defined at top-level configure.ac
AM_CONDITIONAL([HAVE_NEURON], [ test x"$have_neuron" = x1 ])
Expand Down Expand Up @@ -135,6 +136,23 @@ AC_DEFUN([FI_EFA_CONFIGURE],[
[],
[efadv_support_extended_cq=0],
[[#include <infiniband/efadv.h>]])
dnl For efadv_query_mr, we check several things,
dnl and if any of them fail, we disable it
have_efadv_query_mr=1
AC_CHECK_DECL([efadv_query_mr],
[],
[have_efadv_query_mr=0],
[[#include <infiniband/efadv.h>]])
AC_CHECK_MEMBER([struct efadv_mr_attr.rdma_recv_ic_id],
[],
[have_efadv_query_mr=0],
[[#include <infiniband/efadv.h>]])
dnl there is more symbols in the enum, only check one of them
AC_CHECK_DECL([EFADV_MR_ATTR_VALIDITY_RDMA_READ_IC_ID],
[],
[have_efadv_query_mr=0],
[[#include <infiniband/efadv.h>]])
])
AC_DEFINE_UNQUOTED([HAVE_RDMA_SIZE],
Expand All @@ -158,6 +176,9 @@ AC_DEFUN([FI_EFA_CONFIGURE],[
AC_DEFINE_UNQUOTED([HAVE_EFA_DMABUF_MR],
[$have_efa_dmabuf_mr],
[Indicates if ibv_reg_dmabuf_mr verbs is available])
AC_DEFINE_UNQUOTED([HAVE_EFADV_QUERY_MR],
[$have_efadv_query_mr],
[Indicates if efadv_query_mr verbs is available])
CPPFLAGS=$save_CPPFLAGS
Expand Down Expand Up @@ -202,6 +223,7 @@ AC_DEFUN([FI_EFA_CONFIGURE],[
AC_DEFINE_UNQUOTED([EFA_UNIT_TEST], [$efa_unit_test], [EFA unit testing])
AM_CONDITIONAL([HAVE_EFADV_CQ_EX], [ test $efadv_support_extended_cq = 1])
AM_CONDITIONAL([HAVE_EFADV_QUERY_MR], [ test $have_efadv_query_mr = 1])
AM_CONDITIONAL([ENABLE_EFA_UNIT_TEST], [ test x"$enable_efa_unit_test" != xno])
AC_SUBST(efa_CPPFLAGS)
Expand Down
1 change: 1 addition & 0 deletions prov/efa/src/efa.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
#include "rdm/efa_rdm_pke.h"
#include "rdm/efa_rdm_peer.h"
#include "rdm/efa_rdm_util.h"
#include "fi_ext_efa.h"

#define EFA_ABI_VER_MAX_LEN 8

Expand Down
79 changes: 78 additions & 1 deletion prov/efa/src/efa_domain.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,15 @@ struct dlist_entry g_efa_domain_list;

static int efa_domain_close(fid_t fid);

static int efa_domain_ops_open(struct fid *fid, const char *ops_name,
uint64_t flags, void **ops, void *context);

static struct fi_ops efa_ops_domain_fid = {
.size = sizeof(struct fi_ops),
.close = efa_domain_close,
.bind = fi_no_bind,
.control = fi_no_control,
.ops_open = fi_no_ops_open,
.ops_open = efa_domain_ops_open,
};

static struct fi_ops_domain efa_ops_domain_dgram = {
Expand Down Expand Up @@ -380,3 +383,77 @@ static int efa_domain_close(fid_t fid)
return 0;
}

/**
* @brief Query EFA specific Memory Region attributes
*
* @param mr ptr to fid_mr
* @param mr_attr ptr to fi_efa_mr_attr
* @return int 0 on success, negative integer on failure
*/
#if HAVE_EFADV_QUERY_MR

static int
efa_domain_query_mr(struct fid_mr *mr_fid, struct fi_efa_mr_attr *mr_attr)
{
struct efadv_mr_attr attr = {0};
struct efa_mr *efa_mr;
int ret;

memset(mr_attr, 0, sizeof(*mr_attr));

efa_mr = container_of(mr_fid, struct efa_mr, mr_fid);
ret = efadv_query_mr(efa_mr->ibv_mr, &attr, sizeof(attr));
if (ret) {
EFA_WARN(FI_LOG_DOMAIN, "efadv_query_mr failed. err: %d\n", ret);
return ret;
}

/* Translate the validity masks and bus_id from efadv_mr_attr to fi_efa_mr_attr */
if (attr.ic_id_validity & EFADV_MR_ATTR_VALIDITY_RECV_IC_ID) {
mr_attr->recv_ic_id = attr.recv_ic_id;
mr_attr->ic_id_validity |= FI_EFA_MR_ATTR_RECV_IC_ID;
}

if (attr.ic_id_validity & EFADV_MR_ATTR_VALIDITY_RDMA_READ_IC_ID) {
mr_attr->rdma_read_ic_id = attr.rdma_read_ic_id;
mr_attr->ic_id_validity |= FI_EFA_MR_ATTR_RDMA_READ_IC_ID;
}

if (attr.ic_id_validity & EFADV_MR_ATTR_VALIDITY_RDMA_RECV_IC_ID) {
mr_attr->rdma_recv_ic_id = attr.rdma_recv_ic_id;
mr_attr->ic_id_validity |= FI_EFA_MR_ATTR_RDMA_RECV_IC_ID;
}

return FI_SUCCESS;
}

#else

static int
efa_domain_query_mr(struct fid_mr *mr, struct fi_efa_mr_attr *mr_attr)
{
return -FI_ENOSYS;
}

#endif /* HAVE_EFADV_QUERY_MR */

static struct fi_efa_ops_domain efa_ops_domain = {
.query_mr = efa_domain_query_mr,
};

static int
efa_domain_ops_open(struct fid *fid, const char *ops_name, uint64_t flags,
void **ops, void *context)
{
int ret = FI_SUCCESS;

if (strcmp(ops_name, FI_EFA_DOMAIN_OPS) == 0) {
*ops = &efa_ops_domain;
} else {
EFA_WARN(FI_LOG_DOMAIN,
"Unknown ops name: %s\n", ops_name);
ret = -FI_EINVAL;
}

return ret;
}
47 changes: 46 additions & 1 deletion prov/efa/test/efa_unit_test_mocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,9 @@ struct efa_unit_test_mocks g_efa_unit_test_mocks = {
#endif
.ofi_copy_from_hmem_iov = __real_ofi_copy_from_hmem_iov,
.ibv_is_fork_initialized = __real_ibv_is_fork_initialized,
#if HAVE_EFADV_QUERY_MR
.efadv_query_mr = __real_efadv_query_mr,
#endif
};

struct ibv_ah *__wrap_ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
Expand Down Expand Up @@ -303,4 +306,46 @@ enum ibv_fork_status __wrap_ibv_is_fork_initialized(void)
enum ibv_fork_status efa_mock_ibv_is_fork_initialized_return_mock(void)
{
return mock();
}
}

#if HAVE_EFADV_QUERY_MR
int __wrap_efadv_query_mr(struct ibv_mr *ibv_mr, struct efadv_mr_attr *attr, uint32_t inlen)
{
return g_efa_unit_test_mocks.efadv_query_mr(ibv_mr, attr, inlen);
}

/* set recv_ic_id as 0 */
int efa_mock_efadv_query_mr_recv_ic_id_0(struct ibv_mr *ibv_mr, struct efadv_mr_attr *attr, uint32_t inlen)
{
attr->ic_id_validity = EFADV_MR_ATTR_VALIDITY_RECV_IC_ID;
attr->recv_ic_id = 0;
return 0;
}

/* set rdma_read_ic_id id as 1 */
int efa_mock_efadv_query_mr_rdma_read_ic_id_1(struct ibv_mr *ibv_mr, struct efadv_mr_attr *attr, uint32_t inlen)
{
attr->ic_id_validity = EFADV_MR_ATTR_VALIDITY_RDMA_READ_IC_ID;
attr->rdma_read_ic_id = 1;
return 0;
}

/* set rdma_recv_ic_id id as 2 */
int efa_mock_efadv_query_mr_rdma_recv_ic_id_2(struct ibv_mr *ibv_mr, struct efadv_mr_attr *attr, uint32_t inlen)
{
attr->ic_id_validity = EFADV_MR_ATTR_VALIDITY_RDMA_RECV_IC_ID;
attr->rdma_recv_ic_id = 2;
return 0;
}

/* set recv_ic_id id as 0, rdma_read_ic_id as 1 */
int efa_mock_efadv_query_mr_recv_and_rdma_read_ic_id_0_1(struct ibv_mr *ibv_mr, struct efadv_mr_attr *attr, uint32_t inlen)
{
attr->ic_id_validity = EFADV_MR_ATTR_VALIDITY_RECV_IC_ID;
attr->recv_ic_id = 0;
attr->ic_id_validity |= EFADV_MR_ATTR_VALIDITY_RDMA_READ_IC_ID;
attr->rdma_read_ic_id = 1;
return 0;
}

#endif /* HAVE_EFADV_QUERY_MR */
12 changes: 12 additions & 0 deletions prov/efa/test/efa_unit_test_mocks.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ struct efa_unit_test_mocks
size_t hmem_iov_count, uint64_t hmem_iov_offset);

enum ibv_fork_status (*ibv_is_fork_initialized)(void);

#if HAVE_EFADV_QUERY_MR
int (*efadv_query_mr)(struct ibv_mr *ibv_mr, struct efadv_mr_attr *attr, uint32_t inlen);
#endif
};

struct ibv_cq_ex *efa_mock_create_cq_ex_return_null(struct ibv_context *context, struct ibv_cq_init_attr_ex *init_attr);
Expand Down Expand Up @@ -134,6 +138,14 @@ void *__real_neuron_alloc(void **handle, size_t size);
void *efa_mock_neuron_alloc_return_null(void **handle, size_t size);
#endif

#if HAVE_EFADV_QUERY_MR
int __real_efadv_query_mr(struct ibv_mr *ibv_mr, struct efadv_mr_attr *attr, uint32_t inlen);
int efa_mock_efadv_query_mr_recv_ic_id_0(struct ibv_mr *ibv_mr, struct efadv_mr_attr *attr, uint32_t inlen);
int efa_mock_efadv_query_mr_rdma_read_ic_id_1(struct ibv_mr *ibv_mr, struct efadv_mr_attr *attr, uint32_t inlen);
int efa_mock_efadv_query_mr_rdma_recv_ic_id_2(struct ibv_mr *ibv_mr, struct efadv_mr_attr *attr, uint32_t inlen);
int efa_mock_efadv_query_mr_recv_and_rdma_read_ic_id_0_1(struct ibv_mr *ibv_mr, struct efadv_mr_attr *attr, uint32_t inlen);
#endif

enum ibv_fork_status __real_ibv_is_fork_initialized(void);

enum ibv_fork_status efa_mock_ibv_is_fork_initialized_return_mock(void);
Expand Down
2 changes: 2 additions & 0 deletions prov/efa/test/efa_unit_tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ int main(void)
cmocka_unit_test_setup_teardown(test_efa_rdm_peer_get_runt_size_host_memory_normal, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_peer_select_readbase_rtm_no_runt, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_peer_select_readbase_rtm_do_runt, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_domain_open_ops_wrong_name, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_domain_open_ops_mr_query, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
};

cmocka_set_message_output(CM_OUTPUT_XML);
Expand Down
2 changes: 2 additions & 0 deletions prov/efa/test/efa_unit_tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,5 +150,7 @@ void test_efa_rdm_peer_get_runt_size_host_memory_exceeding_total_len();
void test_efa_rdm_peer_get_runt_size_host_memory_normal();
void test_efa_rdm_peer_select_readbase_rtm_no_runt();
void test_efa_rdm_peer_select_readbase_rtm_do_runt();
void test_efa_domain_open_ops_wrong_name();
void test_efa_domain_open_ops_mr_query();

#endif

0 comments on commit 13a56e7

Please sign in to comment.