Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ch4/ofi: Convert CUDA device id to handle for fi_mr_regattr #7156

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion src/mpid/ch4/netmod/ofi/ofi_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#include "ofi_types.h"
#include "mpidch4r.h"
#include "ch4_impl.h"
#ifdef MPL_HAVE_CUDA
#include <cuda.h> /* for cuDeviceGet */
#endif

extern unsigned long long PVAR_COUNTER_nic_sent_bytes_count[MPIDI_OFI_MAX_NICS] ATTRIBUTE((unused));
extern unsigned long long PVAR_COUNTER_nic_recvd_bytes_count[MPIDI_OFI_MAX_NICS]
Expand Down Expand Up @@ -707,8 +710,15 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_register_memory(char *send_buf, size_t da
mr_attr.context = NULL;
if (MPL_gpu_attr_is_strict_dev(attr)) {
#ifdef MPL_HAVE_CUDA
CUdevice device;
int dev_id;

/* libfabric says to get the device handle from cuDeviceGet */
dev_id = MPL_gpu_get_dev_id_from_attr(attr);
cuDeviceGet(&device, dev_id);

mr_attr.iface = FI_HMEM_CUDA;
mr_attr.device.cuda = MPL_gpu_get_dev_id_from_attr(attr);
mr_attr.device.cuda = device;
#elif defined MPL_HAVE_ZE
/* OFI does not support tiles yet, need to pass the root device. */
mr_attr.iface = FI_HMEM_ZE;
Expand Down
4 changes: 2 additions & 2 deletions src/mpl/include/mpl_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ typedef enum {

typedef struct {
MPL_pointer_type_t type;
MPL_gpu_device_handle_t device;
int device;
MPL_gpu_device_attr device_attr;
} MPL_pointer_attr_t;

Expand Down Expand Up @@ -125,7 +125,7 @@ int MPL_gpu_free_host(void *ptr);
int MPL_gpu_register_host(const void *ptr, size_t size);
int MPL_gpu_unregister_host(const void *ptr);

int MPL_gpu_malloc(void **ptr, size_t size, MPL_gpu_device_handle_t h_device);
int MPL_gpu_malloc(void **ptr, size_t size, int h_device);
int MPL_gpu_free(void *ptr);

int MPL_gpu_init(int debug_summary);
Expand Down
1 change: 0 additions & 1 deletion src/mpl/include/mpl_gpu_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "cuda_runtime_api.h"

typedef cudaIpcMemHandle_t MPL_gpu_ipc_mem_handle_t;
typedef int MPL_gpu_device_handle_t;
typedef struct cudaPointerAttributes MPL_gpu_device_attr;
typedef int MPL_gpu_request;
typedef cudaStream_t MPL_gpu_stream_t;
Expand Down
1 change: 0 additions & 1 deletion src/mpl/include/mpl_gpu_fallback.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#define MPL_GPU_CUDA_H_INCLUDED

typedef int MPL_gpu_ipc_mem_handle_t;
typedef int MPL_gpu_device_handle_t;
typedef int MPL_gpu_device_attr; /* dummy type */
typedef int MPL_gpu_request;
typedef int MPL_gpu_stream_t;
Expand Down
1 change: 0 additions & 1 deletion src/mpl/include/mpl_gpu_hip.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#include "hip/hip_runtime_api.h"

typedef hipIpcMemHandle_t MPL_gpu_ipc_mem_handle_t;
typedef int MPL_gpu_device_handle_t;
typedef struct hipPointerAttribute_t MPL_gpu_device_attr;
typedef int MPL_gpu_request;
typedef hipStream_t MPL_gpu_stream_t;
Expand Down
5 changes: 2 additions & 3 deletions src/mpl/include/mpl_gpu_ze.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ typedef struct _MPL_gpu_ipc_mem_handle_t {
fd_pid_t data;
} MPL_gpu_ipc_mem_handle_t;

typedef ze_device_handle_t MPL_gpu_device_handle_t;
typedef ze_alloc_attr_t MPL_gpu_device_attr;

typedef struct MPL_cmdlist_pool {
Expand All @@ -52,7 +51,7 @@ typedef int MPL_gpu_stream_t;
typedef volatile int MPL_gpu_event_t;

#define MPL_GPU_STREAM_DEFAULT 0
#define MPL_GPU_DEVICE_INVALID NULL
#define MPL_GPU_DEVICE_INVALID -1

#define MPL_GPU_DEV_AFFINITY_ENV "ZE_AFFINITY_MASK"

Expand All @@ -67,7 +66,7 @@ int MPL_ze_ipc_handle_map(MPL_gpu_ipc_mem_handle_t * ipc_handle, int is_shared_h
int MPL_ze_ipc_handle_mmap_host(MPL_gpu_ipc_mem_handle_t * ipc_handle, int shared_handle,
int dev_id, size_t size, void **ptr);
int MPL_ze_mmap_device_pointer(void *dptr, MPL_gpu_device_attr * attr,
MPL_gpu_device_handle_t device, void **mmaped_ptr);
int device, void **mmaped_ptr);
int MPL_ze_mmap_handle_unmap(void *ptr, int dev_id);

#endif /* ifndef MPL_GPU_ZE_H_INCLUDED */
2 changes: 1 addition & 1 deletion src/mpl/src/gpu/mpl_gpu_cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ int MPL_gpu_unregister_host(const void *ptr)
goto fn_exit;
}

int MPL_gpu_malloc(void **ptr, size_t size, MPL_gpu_device_handle_t h_device)
int MPL_gpu_malloc(void **ptr, size_t size, int h_device)
{
int mpl_err = MPL_SUCCESS;
int prev_devid;
Expand Down
2 changes: 1 addition & 1 deletion src/mpl/src/gpu/mpl_gpu_fallback.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ int MPL_gpu_unregister_host(const void *ptr)
return MPL_SUCCESS;
}

int MPL_gpu_malloc(void **ptr, size_t size, MPL_gpu_device_handle_t h_device)
int MPL_gpu_malloc(void **ptr, size_t size, int h_device)
{
abort();
return MPL_ERR_GPU_INTERNAL;
Expand Down
2 changes: 1 addition & 1 deletion src/mpl/src/gpu/mpl_gpu_hip.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ int MPL_gpu_unregister_host(const void *ptr)
goto fn_exit;
}

int MPL_gpu_malloc(void **ptr, size_t size, MPL_gpu_device_handle_t h_device)
int MPL_gpu_malloc(void **ptr, size_t size, int h_device)
{
int mpl_err = MPL_SUCCESS;
int prev_devid;
Expand Down
30 changes: 16 additions & 14 deletions src/mpl/src/gpu/mpl_gpu_ze.c
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ static int get_physical_device(int dev_id)
}

/* Get dev_id from device handle */
MPL_STATIC_INLINE_PREFIX int device_to_dev_id(MPL_gpu_device_handle_t device)
MPL_STATIC_INLINE_PREFIX int device_to_dev_id(ze_device_handle_t device)
{
int dev_id = -1;
for (int d = 0; d < local_ze_device_count; d++) {
Expand All @@ -706,7 +706,7 @@ MPL_STATIC_INLINE_PREFIX int device_to_dev_id(MPL_gpu_device_handle_t device)
}

/* Get device from dev_id */
MPL_STATIC_INLINE_PREFIX int dev_id_to_device(int dev_id, MPL_gpu_device_handle_t * device)
MPL_STATIC_INLINE_PREFIX int dev_id_to_device(int dev_id, ze_device_handle_t * device)
{
int mpl_err = MPL_SUCCESS;

Expand Down Expand Up @@ -1774,7 +1774,7 @@ int MPL_gpu_ipc_handle_destroy(const void *ptr, MPL_pointer_attr_t * gpu_attr)
}

if (likely(MPL_gpu_info.specialized_cache)) {
dev_id = device_to_dev_id(gpu_attr->device);
dev_id = gpu_attr->device;
if (dev_id == -1) {
goto fn_fail;
}
Expand Down Expand Up @@ -2053,7 +2053,7 @@ int MPL_gpu_query_pointer_attr(const void *ptr, MPL_pointer_attr_t * attr)
ret = zeMemGetAllocProperties(ze_context, ptr,
&attr->device_attr.prop, &attr->device_attr.device);
ZE_ERR_CHECK(ret);
attr->device = attr->device_attr.device;
attr->device = device_to_dev_id(attr->device_attr.device);
switch (attr->device_attr.prop.type) {
case ZE_MEMORY_TYPE_UNKNOWN:
attr->type = MPL_GPU_POINTER_UNREGISTERED_HOST;
Expand Down Expand Up @@ -2127,7 +2127,7 @@ int MPL_gpu_query_is_same_dev(int global_dev1, int global_dev2)
#endif
}

int MPL_gpu_malloc(void **ptr, size_t size, MPL_gpu_device_handle_t h_device)
int MPL_gpu_malloc(void **ptr, size_t size, int h_device)
{
int mpl_err = MPL_SUCCESS;
int ret;
Expand All @@ -2138,10 +2138,16 @@ int MPL_gpu_malloc(void **ptr, size_t size, MPL_gpu_device_handle_t h_device)
.flags = 0,
.ordinal = 0, /* We currently support a single memory type */
};

ze_device_handle_t device_handle;
ret = dev_id_to_device(h_device, &device_handle);
if (ret) {
goto fn_fail;
}
/* Currently ZE ignores this argument and uses an internal alignment
* value. However, this behavior can change in the future. */
mem_alignment = 1;
ret = zeMemAllocDevice(ze_context, &device_desc, size, mem_alignment, h_device, ptr);
ret = zeMemAllocDevice(ze_context, &device_desc, size, mem_alignment, device_handle, ptr);

ZE_ERR_CHECK(ret);

Expand Down Expand Up @@ -2238,11 +2244,7 @@ int MPL_gpu_unregister_host(const void *ptr)

int MPL_gpu_get_dev_id_from_attr(MPL_pointer_attr_t * attr)
{
int dev_id = -1;

dev_id = device_to_dev_id(attr->device);

return dev_id;
return attr->device;
}

int MPL_gpu_get_buffer_bounds(const void *ptr, void **pbase, uintptr_t * len)
Expand Down Expand Up @@ -3044,7 +3046,7 @@ int MPL_ze_ipc_handle_map(MPL_gpu_ipc_mem_handle_t * mpl_ipc_handle, int is_shar
ze_result_t ret;
int status;
uint32_t nfds;
MPL_gpu_device_handle_t dev_handle;
ze_device_handle_t dev_handle;

fd_pid_t h;
h = mpl_ipc_handle->data;
Expand Down Expand Up @@ -3210,7 +3212,7 @@ int MPL_ze_ipc_handle_mmap_host(MPL_gpu_ipc_mem_handle_t * mpl_ipc_handle, int i

/* this function takes a local device pointer and mmap to host */
int MPL_ze_mmap_device_pointer(void *dptr, MPL_gpu_device_attr * attr,
MPL_gpu_device_handle_t device, void **mmaped_ptr)
int device, void **mmaped_ptr)
{
ze_result_t ret;
int mpl_err = MPL_SUCCESS;
Expand All @@ -3228,7 +3230,7 @@ int MPL_ze_mmap_device_pointer(void *dptr, MPL_gpu_device_attr * attr,
offset = (char *) dptr - (char *) pbase;

mem_id = attr->prop.id;
local_dev_id = device_to_dev_id(device);
local_dev_id = device;
if (local_dev_id == -1) {
goto fn_fail;
}
Expand Down