Skip to content

Commit

Permalink
prov/efa: Onboard fi_mr_dmabuf API in mem reg ops.
Browse files Browse the repository at this point in the history
This patch makes efa onboards the fi_mr_dmabuf API,
it also refactor the efa_mr_reg_ibv_mr function to
cleanup the dmabuf reg case for neuron interface.

Signed-off-by: Shi Jin <sjina@amazon.com>
  • Loading branch information
shijin-aws committed Nov 6, 2023
1 parent 12d0984 commit da35127
Showing 1 changed file with 70 additions and 67 deletions.
137 changes: 70 additions & 67 deletions prov/efa/src/efa_mr.c
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,8 @@ static struct fi_ops efa_mr_cache_ops = {
* efa_mr structure based on the attributes requested by the user.
*
* @param[in] efa_mr efa_mr structure to be updated
* @param[in] attr fi_mr_attr from the user's registration call
* @param[in] attr a copy of fi_mr_attr updated from the user's registration call
* @param[in] flags MR flags
*
* @return FI_SUCCESS or negative FI error code
Expand All @@ -212,6 +213,12 @@ static int efa_mr_hmem_setup(struct efa_mr *efa_mr,
uint64_t flags)
{
int err;
struct iovec mr_iov = {0};

if (flags & FI_MR_DMABUF)
ofi_mr_get_iov_from_dmabuf(&mr_iov, attr->dmabuf, 1);
else
memcpy(&mr_iov, attr->mr_iov, sizeof(*attr->mr_iov));

efa_mr->peer.flags = flags;

Expand Down Expand Up @@ -251,13 +258,13 @@ static int efa_mr_hmem_setup(struct efa_mr *efa_mr,
efa_mr->peer.device.cuda = attr->device.cuda;

if (cuda_is_gdrcopy_enabled()) {
err = ofi_hmem_dev_register(FI_HMEM_CUDA, attr->mr_iov->iov_base, attr->mr_iov->iov_len,
err = ofi_hmem_dev_register(FI_HMEM_CUDA, mr_iov.iov_base, mr_iov.iov_len,
(uint64_t *)&efa_mr->peer.hmem_data);
efa_mr->peer.flags |= OFI_HMEM_DATA_DEV_REG_HANDLE;
if (err) {
EFA_WARN(FI_LOG_MR,
"Unable to register handle for GPU memory. err: %d buf: %p len: %zu\n",
err, attr->mr_iov->iov_base, attr->mr_iov->iov_len);
err, mr_iov.iov_base, mr_iov.iov_len);
/* When gdrcopy pin buf failed, fallback to cudaMemcpy */
efa_mr->peer.hmem_data = NULL;
efa_mr->peer.flags &= ~OFI_HMEM_DATA_DEV_REG_HANDLE;
Expand Down Expand Up @@ -334,7 +341,11 @@ static int efa_mr_cache_regattr(struct fid *fid, const struct fi_mr_attr *attr,
struct ofi_mr_info info = {0};
int ret;

if (attr->iface == FI_HMEM_NEURON || attr->iface == FI_HMEM_SYNAPSEAI)
/*
* dmabuf reg currently doesn't support caching because there is no memory monitor for
* the dmabuf region yet.
*/
if (attr->iface == FI_HMEM_NEURON || attr->iface == FI_HMEM_SYNAPSEAI || (flags & FI_MR_DMABUF))
flags |= OFI_MR_NOCACHE;

if (flags & OFI_MR_NOCACHE) {
Expand Down Expand Up @@ -494,20 +505,38 @@ struct fi_ops efa_mr_ops = {
.ops_open = fi_no_ops_open,
};

#if HAVE_SYNAPSEAI

/**
* @brief Register a memory buffer with rdma-core api.
*
* @param efa_mr the ptr to the efa_mr object
* @param mr_attr the ptr to the fi_mr_attr object
* @param access the desired memory protection attributes
* @param flags flags in fi_mr_reg/fi_mr_regattr
* @return struct ibv_mr* the ptr to the registered MR
*/
static struct ibv_mr *efa_mr_reg_ibv_mr(struct efa_mr *efa_mr, struct fi_mr_attr *mr_attr, int access)
static struct ibv_mr *efa_mr_reg_ibv_mr(struct efa_mr *efa_mr, struct fi_mr_attr *mr_attr,
int access, const uint64_t flags)
{
int dmabuf_fd;
uint64_t offset;
int ret;

if (flags & FI_MR_DMABUF)
return ibv_reg_dmabuf_mr(
efa_mr->domain->ibv_pd,
mr_attr->dmabuf->offset,
mr_attr->dmabuf->len,
(uintptr_t) mr_attr->dmabuf->base_addr + mr_attr->dmabuf->offset,
mr_attr->dmabuf->fd,
access
);

/*
* TODO: remove the synapseai and neuron blocks by onboarding the
* ofi_hmem_get_dmabuf_fd API.
*/
#if HAVE_SYNAPSEAI
if (efa_mr_is_synapseai(efa_mr)) {
ret = synapseai_get_dmabuf_fd(mr_attr->mr_iov->iov_base,
(uint64_t) mr_attr->mr_iov->iov_len,
Expand All @@ -520,74 +549,42 @@ static struct ibv_mr *efa_mr_reg_ibv_mr(struct efa_mr *efa_mr, struct fi_mr_attr
mr_attr->mr_iov->iov_len,
(uint64_t)mr_attr->mr_iov->iov_base,
dmabuf_fd, access);
} else {
return ibv_reg_mr(efa_mr->domain->ibv_pd,
(void *)mr_attr->mr_iov->iov_base,
mr_attr->mr_iov->iov_len, access);
}
}
#elif HAVE_NEURON
/**
* @brief Register a memory buffer with rdma-core api.
*
* @param efa_mr the ptr to the efa_mr object
* @param mr_attr the ptr to the fi_mr_attr object
* @param access the desired memory protection attributes
* @return struct ibv_mr* the ptr to the registered MR
*/
static struct ibv_mr *efa_mr_reg_ibv_mr(struct efa_mr *efa_mr, struct fi_mr_attr *mr_attr, int access)
{
if (!efa_mr_is_neuron(efa_mr)) {
return ibv_reg_mr(
efa_mr->domain->ibv_pd,
(void *)mr_attr->mr_iov->iov_base,
mr_attr->mr_iov->iov_len, access);
}
#endif

int dmabuf_fd, ret;
uint64_t offset;

ret = neuron_get_dmabuf_fd(
mr_attr->mr_iov->iov_base,
(uint64_t) mr_attr->mr_iov->iov_len,
&dmabuf_fd, &offset);

if (ret == FI_SUCCESS) {
/* Success => invoke ibv_reg_dmabuf_mr */
return ibv_reg_dmabuf_mr(
efa_mr->domain->ibv_pd, offset,
#if HAVE_NEURON
if (efa_mr_is_neuron(efa_mr)) {
ret = neuron_get_dmabuf_fd(
mr_attr->mr_iov->iov_base,
mr_attr->mr_iov->iov_len,
(uint64_t)mr_attr->mr_iov->iov_base,
dmabuf_fd, access);
} else if (ret == -FI_ENOPROTOOPT) {
/* Protocol not availabe => fallback */
EFA_INFO(FI_LOG_MR,
"Unable to get dmabuf fd for Neuron device buffer, "
"Fall back to ibv_reg_mr\n");
return ibv_reg_mr(
efa_mr->domain->ibv_pd,
(void *)mr_attr->mr_iov->iov_base,
mr_attr->mr_iov->iov_len, access);
&dmabuf_fd,
&offset);

if (ret == FI_SUCCESS) {
/* Success => invoke ibv_reg_dmabuf_mr */
return ibv_reg_dmabuf_mr(
efa_mr->domain->ibv_pd, 0,
mr_attr->mr_iov->iov_len,
(uint64_t)mr_attr->mr_iov->iov_base,
dmabuf_fd, access);
} else if (ret == -FI_ENOPROTOOPT) {
/* Protocol not availabe => fallback */
EFA_INFO(FI_LOG_MR,
"Unable to get dmabuf fd for Neuron device buffer, "
"Fall back to ibv_reg_mr\n");
return ibv_reg_mr(
efa_mr->domain->ibv_pd,
(void *)mr_attr->mr_iov->iov_base,
mr_attr->mr_iov->iov_len, access);
}
return NULL;
}
#endif

return NULL;
}
#else
/**
* @brief Register a memory buffer with rdma-core api.
*
* @param efa_mr the ptr to the efa_mr object
* @param mr_attr the ptr to the fi_mr_attr object
* @param access the desired memory protection attributes
* @return struct ibv_mr* the ptr to the registered MR
*/
static struct ibv_mr *efa_mr_reg_ibv_mr(struct efa_mr *efa_mr, struct fi_mr_attr *mr_attr, int access)
{
return ibv_reg_mr(efa_mr->domain->ibv_pd,
(void *)mr_attr->mr_iov->iov_base,
mr_attr->mr_iov->iov_len, access);
}
#endif

#if HAVE_CUDA
static inline
Expand Down Expand Up @@ -847,7 +844,7 @@ static int efa_mr_reg_impl(struct efa_mr *efa_mr, uint64_t flags, const void *at
if (mr_attr.iface == FI_HMEM_CUDA && !efa_mr->domain->hmem_info[FI_HMEM_CUDA].p2p_supported_by_device) {
efa_mr->mr_fid.key = efa_mr_cuda_non_p2p_keygen();
} else {
efa_mr->ibv_mr = efa_mr_reg_ibv_mr(efa_mr, &mr_attr, fi_ibv_access);
efa_mr->ibv_mr = efa_mr_reg_ibv_mr(efa_mr, &mr_attr, fi_ibv_access, flags);
if (!efa_mr->ibv_mr) {
EFA_WARN(FI_LOG_MR, "Unable to register MR: %s\n",
fi_strerror(-errno));
Expand Down Expand Up @@ -932,8 +929,14 @@ static int efa_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr,
* can be access from host. EFA provider considers all device memory
* to be accessed by device only. Therefore, this function claim
* support of this flag, but do not save it in efa_mr.
*
* FI_MR_DMABUF:
* This flag indicates that the memory region to registered is
* a DMA-buf backed region. When set, the region is specified through
* the dmabuf field of the fi_mr_attr structure. This flag is only
* usable for domains opened with FI_HMEM capability support.
*/
supported_flags = OFI_MR_NOCACHE | FI_HMEM_DEVICE_ONLY;
supported_flags = OFI_MR_NOCACHE | FI_HMEM_DEVICE_ONLY | FI_MR_DMABUF;
if (flags & (~supported_flags)) {
EFA_WARN(FI_LOG_MR, "Unsupported flag type. requested"
"[0x%" PRIx64 "] supported[0x%" PRIx64 "]\n",
Expand Down

0 comments on commit da35127

Please sign in to comment.