Skip to content

Commit

Permalink
prov/efa: Introduce efa specific domain operations
Browse files Browse the repository at this point in the history
Make efa onboard the fi_open_ops API, which allows
user to access efa specific domain ops. The usage of
this API is documented in fi_efa.7.md.

Also added a suite of efa unit test.

Signed-off-by: Shi Jin <sjina@amazon.com>
  • Loading branch information
shijin-aws committed Dec 8, 2023
1 parent aa6a980 commit 53a0595
Show file tree
Hide file tree
Showing 11 changed files with 454 additions and 2 deletions.
71 changes: 71 additions & 0 deletions man/fi_efa.7.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,77 @@ provider for AWS Neuron or Habana SynapseAI.
delivered to the target buffer only once. If endpoint is not able to support
this feature, it will return -FI_EOPNOTSUPP for the call to fi_setopt().

# PROVIDER SPECIFIC DOMAIN OPS
The efa provider exports extensions for additional operations
that are not provided by the standard libfabric interface. These extensions
are available via the "`fi_ext_efa.h`" header file.

## Domain Operation Extension

Domain operation extension is obtained by calling `fi_open_ops`
(see [`fi_domain`(3)](fi_domain.3.html))
```c
int fi_open_ops(struct fid *domain, const char *name, uint64_t flags,
void **ops, void *context);
```
and requesting `FI_EFA_DOMAIN_OPS` in `name`. `fi_open_ops` returns `ops` as
the pointer to the function table `fi_efa_ops_domain` defined as follows:
```c
struct fi_efa_ops_domain {
int (*query_mr)(struct fid_mr *mr, struct fi_efa_mr_attr *mr_attr);
};
```

It contains the following operations

### query_mr
This op query an existing memory registration (mr) as input, and outputs the efa
specific mr attribute which is defined as follows

```c
struct fi_efa_mr_attr {
uint16_t pci_bus_id_validity;
uint16_t recv_pci_bus_id;
uint16_t rdma_read_pci_bus_id;
uint16_t rdma_recv_pci_bus_id;
};
```

*pci_bus_id_validity*
: Validity mask of PCI bus id fields. Currently the following bits are supported in the mask:

```c
enum {
FI_EFA_MR_ATTR_VALIDITY_RECV_PCI_BUS_ID = 1 << 0,
FI_EFA_MR_ATTR_VALIDITY_RDMA_READ_PCI_BUS_ID = 1 << 1,
FI_EFA_MR_ATTR_VALIDITY_RDMA_RECV_PCI_BUS_ID = 1 << 2,
};
```

FI_EFA_MR_ATTR_VALIDITY_RECV_PCI_BUS_ID:
recv_pci_bus_id has a valid value.

FI_EFA_MR_ATTR_VALIDITY_RDMA_READ_PCI_BUS_ID:
rdma_read_pci_bus_id has a valid value.

FI_EFA_MR_ATTR_VALIDITY_RDMA_RECV_PCI_BUS_ID:
rdma_recv_pci_bus_id has a valid value.

*recv_pci_bus_id*
: Physical PCIe bus used by the device to reach the MR for receive operation. It is only valid when `pci_bus_id_validity` has the `FI_EFA_MR_ATTR_VALIDITY_RECV_PCI_BUS_ID` bit.

*rdma_read_pci_bus_id*
: Physical PCIe bus used by the device to reach the MR for RDMA read operation. It is only valid when `pci_bus_id_validity` has the `FI_EFA_MR_ATTR_VALIDITY_RDMA_READ_PCI_BUS_ID` bit.

*rdma_recv_pci_bus_id*
: Physical PCIe bus used by the device to reach the MR for RDMA write receive. It is only valid when `pci_bus_id_validity` has the `FI_EFA_MR_ATTR_VALIDITY_RDMA_RECV_PCI_BUS_ID` bit.

#### Return value
**query_mr()** returns 0 on success, or the value of errno on failure
(which indicates the failure reason).


# RUNTIME PARAMETERS

*FI_EFA_TX_SIZE*
Expand Down
9 changes: 9 additions & 0 deletions prov/efa/Makefile.include
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ _efa_headers = \
prov/efa/src/efa_tp.h \
prov/efa/src/efa_prov.h \
prov/efa/src/efa_env.h \
prov/efa/src/fi_ext_efa.h \
prov/efa/src/dgram/efa_dgram_ep.h \
prov/efa/src/dgram/efa_dgram_cq.h \
prov/efa/src/rdm/efa_rdm_peer.h \
Expand Down Expand Up @@ -132,6 +133,7 @@ nodist_prov_efa_test_efa_unit_test_SOURCES = \
prov/efa/test/efa_unit_tests.c \
prov/efa/test/efa_unit_test_mocks.c \
prov/efa/test/efa_unit_test_common.c \
prov/efa/test/efa_unit_test_domain.c \
prov/efa/test/efa_unit_test_ep.c \
prov/efa/test/efa_unit_test_av.c \
prov/efa/test/efa_unit_test_cq.c \
Expand Down Expand Up @@ -164,6 +166,10 @@ if HAVE_NEURON
prov_efa_test_efa_unit_test_LDFLAGS += -Wl,--wrap=neuron_alloc
endif HAVE_NEURON

if HAVE_EFADV_QUERY_MR
prov_efa_test_efa_unit_test_LDFLAGS += -Wl,--wrap=efadv_query_mr
endif HAVE_EFADV_QUERY_MR

prov_efa_test_efa_unit_test_LIBS = $(efa_LIBS) $(linkback)

endif ENABLE_EFA_UNIT_TEST
Expand All @@ -173,6 +179,9 @@ efa_CPPFLAGS += \
-I$(top_srcdir)/prov/efa/src/dgram/ \
-I$(top_srcdir)/prov/efa/src/rdm/

rdmainclude_HEADERS += \
prov/efa/src/fi_ext_efa.h

if HAVE_EFA_DL
pkglib_LTLIBRARIES += libefa-fi.la
libefa_fi_la_SOURCES = $(_efa_files) $(_efa_headers) $(common_srcs)
Expand Down
22 changes: 22 additions & 0 deletions prov/efa/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ AC_DEFUN([FI_EFA_CONFIGURE],[
efa_support_data_in_order_aligned_128_byte=0
efadv_support_extended_cq=0
hav_efa_dmabuf_mr=0
have_efadv_query_mr=0
dnl $have_neuron is defined at top-level configure.ac
AM_CONDITIONAL([HAVE_NEURON], [ test x"$have_neuron" = x1 ])
Expand Down Expand Up @@ -135,6 +136,23 @@ AC_DEFUN([FI_EFA_CONFIGURE],[
[],
[efadv_support_extended_cq=0],
[[#include <infiniband/efadv.h>]])
dnl For efadv_query_mr, we check several things,
dnl and if any of them fail, we disable it
have_efadv_query_mr=1
AC_CHECK_DECL([efadv_query_mr],
[],
[have_efadv_query_mr=0],
[[#include <infiniband/efadv.h>]])
AC_CHECK_MEMBER([struct efadv_mr_attr.rdma_recv_pci_bus_id],
[],
[have_efadv_query_mr=0],
[[#include <infiniband/efadv.h>]])
dnl there is more symbols in the enum, only check one of them
AC_CHECK_DECL([EFADV_MR_ATTR_VALIDITY_RDMA_READ_PCI_BUS_ID],
[],
[have_efadv_query_mr=0],
[[#include <infiniband/efadv.h>]])
])
AC_DEFINE_UNQUOTED([HAVE_RDMA_SIZE],
Expand All @@ -158,6 +176,9 @@ AC_DEFUN([FI_EFA_CONFIGURE],[
AC_DEFINE_UNQUOTED([HAVE_EFA_DMABUF_MR],
[$have_efa_dmabuf_mr],
[Indicates if ibv_reg_dmabuf_mr verbs is available])
AC_DEFINE_UNQUOTED([HAVE_EFADV_QUERY_MR],
[$have_efadv_query_mr],
[Indicates if efadv_query_mr verbs is available])
CPPFLAGS=$save_CPPFLAGS
Expand Down Expand Up @@ -202,6 +223,7 @@ AC_DEFUN([FI_EFA_CONFIGURE],[
AC_DEFINE_UNQUOTED([EFA_UNIT_TEST], [$efa_unit_test], [EFA unit testing])
AM_CONDITIONAL([HAVE_EFADV_CQ_EX], [ test $efadv_support_extended_cq = 1])
AM_CONDITIONAL([HAVE_EFADV_QUERY_MR], [ test $have_efadv_query_mr = 1])
AM_CONDITIONAL([ENABLE_EFA_UNIT_TEST], [ test x"$enable_efa_unit_test" != xno])
AC_SUBST(efa_CPPFLAGS)
Expand Down
1 change: 1 addition & 0 deletions prov/efa/src/efa.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
#include "rdm/efa_rdm_pke.h"
#include "rdm/efa_rdm_peer.h"
#include "rdm/efa_rdm_util.h"
#include "fi_ext_efa.h"

#define EFA_ABI_VER_MAX_LEN 8

Expand Down
79 changes: 78 additions & 1 deletion prov/efa/src/efa_domain.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,15 @@ struct dlist_entry g_efa_domain_list;

static int efa_domain_close(fid_t fid);

static int efa_domain_ops_open(struct fid *fid, const char *ops_name,
uint64_t flags, void **ops, void *context);

static struct fi_ops efa_ops_domain_fid = {
.size = sizeof(struct fi_ops),
.close = efa_domain_close,
.bind = fi_no_bind,
.control = fi_no_control,
.ops_open = fi_no_ops_open,
.ops_open = efa_domain_ops_open,
};

static struct fi_ops_domain efa_ops_domain_dgram = {
Expand Down Expand Up @@ -380,3 +383,77 @@ static int efa_domain_close(fid_t fid)
return 0;
}

/**
* @brief Query EFA specific Memory Region attributes
*
* @param mr ptr to fid_mr
* @param mr_attr ptr to fi_efa_mr_attr
* @return int 0 on success, negative integer on failure
*/
#if HAVE_EFADV_QUERY_MR

static int
efa_domain_query_mr(struct fid_mr *mr_fid, struct fi_efa_mr_attr *mr_attr)
{
struct efadv_mr_attr attr = {0};
struct efa_mr *efa_mr;
int ret;

memset(mr_attr, 0, sizeof(*mr_attr));

efa_mr = container_of(mr_fid, struct efa_mr, mr_fid);
ret = efadv_query_mr(efa_mr->ibv_mr, &attr, sizeof(attr));
if (ret) {
EFA_WARN(FI_LOG_DOMAIN, "efadv_query_mr failed. err: %d\n", ret);
return ret;
}

/* Translate the validity masks and bus_id from efadv_mr_attr to fi_efa_mr_attr */
if (attr.pci_bus_id_validity & EFADV_MR_ATTR_VALIDITY_RECV_PCI_BUS_ID) {
mr_attr->recv_pci_bus_id = attr.recv_pci_bus_id;
mr_attr->pci_bus_id_validity |= FI_EFA_MR_ATTR_VALIDITY_RECV_PCI_BUS_ID;
}

if (attr.pci_bus_id_validity & EFADV_MR_ATTR_VALIDITY_RDMA_READ_PCI_BUS_ID) {
mr_attr->rdma_read_pci_bus_id = attr.rdma_read_pci_bus_id;
mr_attr->pci_bus_id_validity |= FI_EFA_MR_ATTR_VALIDITY_RDMA_READ_PCI_BUS_ID;
}

if (attr.pci_bus_id_validity & EFADV_MR_ATTR_VALIDITY_RDMA_RECV_PCI_BUS_ID) {
mr_attr->rdma_recv_pci_bus_id = attr.rdma_recv_pci_bus_id;
mr_attr->pci_bus_id_validity |= FI_EFA_MR_ATTR_VALIDITY_RDMA_RECV_PCI_BUS_ID;
}

return FI_SUCCESS;
}

#else

static int
efa_domain_query_mr(struct fid_mr *mr, struct fi_efa_mr_attr *mr_attr)
{
return -FI_ENOSYS;
}

#endif /* HAVE_EFADV_QUERY_MR */

static struct fi_efa_ops_domain efa_ops_domain = {
.query_mr = efa_domain_query_mr,
};

static int
efa_domain_ops_open(struct fid *fid, const char *ops_name, uint64_t flags,
void **ops, void *context)
{
int ret = FI_SUCCESS;

if (strcmp(ops_name, FI_EFA_DOMAIN_OPS) == 0) {
*ops = &efa_ops_domain;
} else {
EFA_WARN(FI_LOG_DOMAIN,
"Unknown ops name: %s\n", ops_name);
ret = -FI_EINVAL;
}

return ret;
}
55 changes: 55 additions & 0 deletions prov/efa/src/fi_ext_efa.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright (c) Amazon.com, Inc. or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

#ifndef _FI_EXT_EFA_H_
#define _FI_EXT_EFA_H_

#define FI_EFA_DOMAIN_OPS "efa domain ops"

struct fi_efa_mr_attr {
uint16_t pci_bus_id_validity;
uint16_t recv_pci_bus_id;
uint16_t rdma_read_pci_bus_id;
uint16_t rdma_recv_pci_bus_id;
};

enum {
FI_EFA_MR_ATTR_VALIDITY_RECV_PCI_BUS_ID = 1 << 0,
FI_EFA_MR_ATTR_VALIDITY_RDMA_READ_PCI_BUS_ID = 1 << 1,
FI_EFA_MR_ATTR_VALIDITY_RDMA_RECV_PCI_BUS_ID = 1 << 2,
};

struct fi_efa_ops_domain {
int (*query_mr)(struct fid_mr *mr, struct fi_efa_mr_attr *mr_attr);
};

#endif /* _FI_EXT_EFA_H_ */
Loading

0 comments on commit 53a0595

Please sign in to comment.