diff --git a/openmp/libomptarget/include/OpenMP/InteropAPI.h b/openmp/libomptarget/include/OpenMP/InteropAPI.h index 71c78760a3226..f686ea9bd85fa 100644 --- a/openmp/libomptarget/include/OpenMP/InteropAPI.h +++ b/openmp/libomptarget/include/OpenMP/InteropAPI.h @@ -11,12 +11,72 @@ #ifndef OMPTARGET_OPENMP_INTEROP_API_H #define OMPTARGET_OPENMP_INTEROP_API_H -#include "omp.h" +#define omp_interop_none 0 +#include "omp.h" #include "omptarget.h" extern "C" { +/// TODO: Include the `omp.h` of the current build +/* OpenMP 5.1 interop */ +typedef intptr_t omp_intptr_t; + +/* 0..omp_get_num_interop_properties()-1 are reserved for implementation-defined + * properties */ +typedef enum omp_interop_property { + omp_ipr_fr_id = -1, + omp_ipr_fr_name = -2, + omp_ipr_vendor = -3, + omp_ipr_vendor_name = -4, + omp_ipr_device_num = -5, + omp_ipr_platform = -6, + omp_ipr_device = -7, + omp_ipr_device_context = -8, + omp_ipr_targetsync = -9, + omp_ipr_first = -9 +} omp_interop_property_t; + +typedef enum omp_interop_rc { + omp_irc_no_value = 1, + omp_irc_success = 0, + omp_irc_empty = -1, + omp_irc_out_of_range = -2, + omp_irc_type_int = -3, + omp_irc_type_ptr = -4, + omp_irc_type_str = -5, + omp_irc_other = -6 +} omp_interop_rc_t; + +typedef enum omp_interop_fr { + omp_ifr_cuda = 1, + omp_ifr_cuda_driver = 2, + omp_ifr_opencl = 3, + omp_ifr_sycl = 4, + omp_ifr_hip = 5, + omp_ifr_level_zero = 6, + omp_ifr_amdhsa = 7, + omp_ifr_last = 8 +} omp_interop_fr_t; + +typedef enum omp_interop_backend_type_t { + // reserve 0 + omp_interop_backend_type_cuda = 1, + omp_interop_backend_type_amdhsa = 7, + omp_interop_backend_type_invalid = 8 +} omp_interop_backend_type_t; + +typedef enum omp_foreign_runtime_ids { + invalid = 0, + cuda = 1, + cuda_driver = 2, + opencl = 3, + sycl = 4, + hip = 5, + level_zero = 6, + amdhsa = 7 +} omp_foreign_runtime_ids_t; + typedef enum kmp_interop_type_t { kmp_interop_type_unknown = -1, kmp_interop_type_platform, @@ -24,20 +84,57 @@ typedef enum kmp_interop_type_t { kmp_interop_type_tasksync, } kmp_interop_type_t; +typedef void *omp_interop_t; + /// The interop value type, aka. the interop object. typedef struct omp_interop_val_t { /// Device and interop-type are determined at construction time and fix. - omp_interop_val_t(intptr_t device_id, kmp_interop_type_t interop_type) - : interop_type(interop_type), device_id(device_id) {} + omp_interop_val_t(intptr_t device_id, kmp_interop_type_t interop_type, + omp_foreign_runtime_ids_t vendor_id, + intptr_t backend_type_id) + : interop_type(interop_type), device_id(device_id), vendor_id(vendor_id), + backend_type_id(backend_type_id) {} const char *err_str = nullptr; __tgt_async_info *async_info = nullptr; __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_foreign_runtime_ids_t vendor_id; + intptr_t backend_type_id; } omp_interop_val_t; +/// Retrieves the number of implementation-defined properties available for an +/// omp_interop_t object. +int __KAI_KMPC_CONVENTION omp_get_num_interop_properties(const omp_interop_t); + +/// Retrieves an integer property from an omp_interop_t object. +omp_intptr_t __KAI_KMPC_CONVENTION omp_get_interop_int(const omp_interop_t, + omp_interop_property_t, + int *); + +/// Retrieves a pointer property from an omp_interop_t object. +void *__KAI_KMPC_CONVENTION omp_get_interop_ptr(const omp_interop_t, + omp_interop_property_t, int *); + +/// Retrieve a string property from an omp_interop_t object. +const char *__KAI_KMPC_CONVENTION omp_get_interop_str(const omp_interop_t, + omp_interop_property_t, + int *); + +/// Retrieve a property name from an omp_interop_t object. +const char *__KAI_KMPC_CONVENTION omp_get_interop_name(const omp_interop_t, + omp_interop_property_t); + +/// Retrieve a description of the type of a property associated with an +/// omp_interop_t object. +const char *__KAI_KMPC_CONVENTION +omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t); + +/// Retrieve a description of the return code associated with an omp_interop_t +/// object. +extern const char *__KAI_KMPC_CONVENTION +omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t); + } // extern "C" #endif // OMPTARGET_OPENMP_INTEROP_API_H diff --git a/openmp/libomptarget/include/OpenMP/omp.h b/openmp/libomptarget/include/OpenMP/omp.h index b44c6aff1b289..d360b5ef3b164 100644 --- a/openmp/libomptarget/include/OpenMP/omp.h +++ b/openmp/libomptarget/include/OpenMP/omp.h @@ -44,112 +44,6 @@ int omp_get_default_device(void) __attribute__((weak)); ///} -/// InteropAPI -/// -///{ - -/// TODO: Include the `omp.h` of the current build -/* OpenMP 5.1 interop */ -typedef intptr_t omp_intptr_t; - -/* 0..omp_get_num_interop_properties()-1 are reserved for implementation-defined - * properties */ -typedef enum omp_interop_property { - omp_ipr_fr_id = -1, - omp_ipr_fr_name = -2, - omp_ipr_vendor = -3, - omp_ipr_vendor_name = -4, - omp_ipr_device_num = -5, - omp_ipr_platform = -6, - omp_ipr_device = -7, - omp_ipr_device_context = -8, - omp_ipr_targetsync = -9, - omp_ipr_first = -9 -} omp_interop_property_t; - -#define omp_interop_none 0 - -typedef enum omp_interop_rc { - omp_irc_no_value = 1, - omp_irc_success = 0, - omp_irc_empty = -1, - omp_irc_out_of_range = -2, - omp_irc_type_int = -3, - omp_irc_type_ptr = -4, - omp_irc_type_str = -5, - omp_irc_other = -6 -} omp_interop_rc_t; - -typedef enum omp_interop_fr { - omp_ifr_cuda = 1, - omp_ifr_cuda_driver = 2, - omp_ifr_opencl = 3, - omp_ifr_sycl = 4, - omp_ifr_hip = 5, - omp_ifr_level_zero = 6, - omp_ifr_last = 7 -} omp_interop_fr_t; - -typedef void *omp_interop_t; - -/*! - * The `omp_get_num_interop_properties` routine retrieves the number of - * implementation-defined properties available for an `omp_interop_t` object. - */ -int __KAI_KMPC_CONVENTION omp_get_num_interop_properties(const omp_interop_t); -/*! - * The `omp_get_interop_int` routine retrieves an integer property from an - * `omp_interop_t` object. - */ -omp_intptr_t __KAI_KMPC_CONVENTION -omp_get_interop_int(const omp_interop_t, omp_interop_property_t, int *); -/*! - * The `omp_get_interop_ptr` routine retrieves a pointer property from an - * `omp_interop_t` object. - */ -void *__KAI_KMPC_CONVENTION omp_get_interop_ptr(const omp_interop_t, - omp_interop_property_t, int *); -/*! - * The `omp_get_interop_str` routine retrieves a string property from an - * `omp_interop_t` object. - */ -const char *__KAI_KMPC_CONVENTION -omp_get_interop_str(const omp_interop_t, omp_interop_property_t, int *); -/*! - * The `omp_get_interop_name` routine retrieves a property name from an - * `omp_interop_t` object. - */ -const char *__KAI_KMPC_CONVENTION omp_get_interop_name(const omp_interop_t, - omp_interop_property_t); -/*! - * The `omp_get_interop_type_desc` routine retrieves a description of the type - * of a property associated with an `omp_interop_t` object. - */ -const char *__KAI_KMPC_CONVENTION -omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t); -/*! - * The `omp_get_interop_rc_desc` routine retrieves a description of the return - * code associated with an `omp_interop_t` object. - */ -extern const char *__KAI_KMPC_CONVENTION -omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t); - -typedef enum omp_interop_backend_type_t { - // reserve 0 - omp_interop_backend_type_cuda_1 = 1, -} omp_interop_backend_type_t; - -typedef enum omp_foreign_runtime_ids { - cuda = 1, - cuda_driver = 2, - opencl = 3, - sycl = 4, - hip = 5, - level_zero = 6, -} omp_foreign_runtime_ids_t; - -///} InteropAPI - } // extern "C" #endif // OMPTARGET_OPENMP_OMP_H diff --git a/openmp/libomptarget/include/Shared/PluginAPI.h b/openmp/libomptarget/include/Shared/PluginAPI.h index ecf669c774f14..c80b9d1693c10 100644 --- a/openmp/libomptarget/include/Shared/PluginAPI.h +++ b/openmp/libomptarget/include/Shared/PluginAPI.h @@ -17,6 +17,7 @@ #include #include +#include "OpenMP/InteropAPI.h" #include "Shared/APITypes.h" extern "C" { @@ -165,6 +166,9 @@ void __tgt_rtl_set_info_flag(uint32_t); // Print the device information void __tgt_rtl_print_device_info(int32_t ID); +// Set the runtime related information for interop object +int32_t __tgt_rtl_set_interop_info(omp_interop_val_t *InteropPtr); + // Event related interfaces. It is expected to use the interfaces in the // following way: // 1) Create an event on the target device (__tgt_rtl_create_event). diff --git a/openmp/libomptarget/include/Shared/PluginAPI.inc b/openmp/libomptarget/include/Shared/PluginAPI.inc index e445da6852f7b..c11341d969a6d 100644 --- a/openmp/libomptarget/include/Shared/PluginAPI.inc +++ b/openmp/libomptarget/include/Shared/PluginAPI.inc @@ -35,6 +35,7 @@ PLUGIN_API_HANDLE(synchronize); PLUGIN_API_HANDLE(query_async); PLUGIN_API_HANDLE(set_info_flag); PLUGIN_API_HANDLE(print_device_info); +PLUGIN_API_HANDLE(set_interop_info); PLUGIN_API_HANDLE(create_event); PLUGIN_API_HANDLE(record_event); PLUGIN_API_HANDLE(wait_event); diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp index a0fdde951b74a..654cce3bb39c0 100644 --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -2772,6 +2772,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { bool useMultipleSdmaEngines() const { return OMPX_UseMultipleSdmaEngines; } + virtual Error setInteropInfo(omp_interop_val_t *InterOpPtr) override { + InterOpPtr->vendor_id = amdhsa; + InterOpPtr->backend_type_id = omp_interop_backend_type_amdhsa; + + __tgt_device_info *DevInfo = &InterOpPtr->device_info; + DevInfo->Context = nullptr; + DevInfo->Device = &Agent; + + return Plugin::success(); + } + private: using AMDGPUEventRef = AMDGPUResourceRef; using AMDGPUEventManagerTy = GenericDeviceResourceManagerTy; diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h index 79e8464bfda5c..84159920a5730 100644 --- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h +++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h @@ -19,6 +19,7 @@ #include #include +#include "OpenMP/InteropAPI.h" #include "Shared/Debug.h" #include "Shared/Environment.h" #include "Shared/EnvironmentVar.h" @@ -850,6 +851,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy { return 0; } + virtual Error setInteropInfo(omp_interop_val_t *InterOpPtr) { + return Error::success(); + } + virtual Error getDeviceStackSize(uint64_t &V) = 0; /// Returns true if current plugin architecture is an APU @@ -1059,7 +1064,6 @@ struct GenericPluginTy { /// we could not move this function into GenericDeviceTy. virtual Expected isELFCompatible(StringRef Image) const = 0; -protected: /// Indicate whether a device id is valid. bool isValidDeviceId(int32_t DeviceId) const { return (DeviceId >= 0 && DeviceId < getNumDevices()); diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp index b5f3c45c835fd..febcd8ecb756c 100644 --- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp @@ -2040,6 +2040,21 @@ int32_t __tgt_rtl_init_plugin() { return OFFLOAD_SUCCESS; } +int32_t __tgt_rtl_set_interop_info(omp_interop_val_t *InterOpPtr) { + assert(InterOpPtr && "Interop object is allocated"); + int32_t DevId = InterOpPtr->device_id; + + assert(PluginTy::get().isValidDeviceId(DevId) && "Device Id is valid"); + if (auto Err = PluginTy::get().getDevice(DevId).setInteropInfo(InterOpPtr)) { + REPORT("Failure to determine the OpenMP interop object info for Device Id " + "%i\n", + DevId); + return OFFLOAD_FAIL; + } + + return OFFLOAD_SUCCESS; +} + int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) { if (!PluginTy::isActive()) return false; diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp index fc74c6aa23fdd..ad074a88035b5 100644 --- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp @@ -1143,6 +1143,17 @@ struct CUDADeviceTy : public GenericDeviceTy { /// Returns the clock frequency for the given NVPTX device. uint64_t getClockFrequency() const override { return 1000000000; } + virtual Error setInteropInfo(omp_interop_val_t *InterOpPtr) override { + InterOpPtr->vendor_id = cuda; + InterOpPtr->backend_type_id = omp_interop_backend_type_cuda; + + __tgt_device_info *DevInfo = &InterOpPtr->device_info; + DevInfo->Context = Context; + DevInfo->Device = Device; + + return Plugin::success(); + } + private: using CUDAStreamManagerTy = GenericDeviceResourceManagerTy; using CUDAEventManagerTy = GenericDeviceResourceManagerTy; diff --git a/openmp/libomptarget/src/OpenMP/InteropAPI.cpp b/openmp/libomptarget/src/OpenMP/InteropAPI.cpp index 1a995cde7816e..1db2addb25119 100644 --- a/openmp/libomptarget/src/OpenMP/InteropAPI.cpp +++ b/openmp/libomptarget/src/OpenMP/InteropAPI.cpp @@ -70,8 +70,21 @@ const char *getVendorIdToStr(const omp_foreign_runtime_ids_t VendorId) { return ("hip"); case level_zero: return ("level_zero"); + case amdhsa: + return ("amdhsa"); + default: + return ("unknown"); + } +} + +const char *getBackendIdToStr(intptr_t BackendId) { + switch (BackendId) { + case omp_interop_backend_type_cuda: + return "cuda backend"; + case omp_interop_backend_type_amdhsa: + return "amdhsa backend"; } - return ("unknown"); + return "unknown backend"; } template @@ -105,6 +118,8 @@ const char *getProperty(omp_interop_val_t &InteropVal, : "device+context"; case omp_ipr_vendor_name: return getVendorIdToStr(InteropVal.vendor_id); + case omp_ipr_fr_name: + return getBackendIdToStr(InteropVal.backend_type_id); default: getTypeMismatch(Property, Err); return nullptr; @@ -221,8 +236,11 @@ void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, NoaliasDepList); } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); + // Create interop value object + InteropPtr = new omp_interop_val_t(DeviceId, InteropType, invalid, + omp_interop_backend_type_invalid); + // Get an intitialized and ready device, or error auto DeviceOrErr = PM->getDevice(DeviceId); if (!DeviceOrErr) { InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); @@ -230,12 +248,15 @@ void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, } DeviceTy &Device = *DeviceOrErr; - if (!Device.RTL || !Device.RTL->init_device_info || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { + if (!Device.RTL || !Device.RTL->set_interop_info) { delete InteropPtr; InteropPtr = omp_interop_none; + return; } + + // Retrieve the target specific interop value object + Device.RTL->set_interop_info(InteropPtr); + if (InteropType == kmp_interop_type_tasksync) { if (!Device.RTL || !Device.RTL->init_async_info || Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { diff --git a/openmp/libomptarget/test/api/omp_interop_amdgpu.c b/openmp/libomptarget/test/api/omp_interop_amdgpu.c new file mode 100644 index 0000000000000..c66df93d44dc9 --- /dev/null +++ b/openmp/libomptarget/test/api/omp_interop_amdgpu.c @@ -0,0 +1,92 @@ +// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O1 +// RUN: %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa +// REQUIRES: amdgcn-amd-amdhsa + +#include +#include +#include + +#define N 16384 + +void vectorSet(int n, double s, double *x) { + for (int i = 0; i < n; ++i) + x[i] = s * (i + 1); +} + +void vectorCopy(int n, double *x, double *y) { + for (int i = 0; i < n; ++i) + y[i] = x[i]; +} + +void vectorScale(int n, double s, double *x) { + for (int i = 0; i < n; ++i) + x[i] = s * x[i]; +} + +int main() { + const double ScaleFactor = 2.0; + double x[N], y[N]; + omp_interop_t SyncObj = omp_interop_none; + int DeviceNum = omp_get_default_device(); + + // clang-format off + #pragma omp target nowait depend(out : x [0:N]) \ + map(from : x [0:N]) device(DeviceNum) + // clang-format on + vectorSet(N, 1.0, x); + +#pragma omp task depend(out : y [0:N]) + vectorSet(N, -1.0, y); + + // Get SyncObject for synchronization + // clang-format off + #pragma omp interop init(targetsync : SyncObj) device(DeviceNum) \ + depend(in : x [0:N]) depend(inout : y [0:N]) + // clang-format on + + int ForeignContextId = (int)omp_get_interop_int(SyncObj, omp_ipr_fr_id, NULL); + char *ForeignContextName = + (char *)omp_get_interop_str(SyncObj, omp_ipr_fr_name, NULL); + + if (SyncObj != omp_interop_none && ForeignContextId == omp_ifr_amdhsa) { + printf("OpenMP working with %s runtime to execute async memcpy.\n", + ForeignContextName); + int Status; + omp_get_interop_ptr(SyncObj, omp_ipr_targetsync, &Status); + + if (Status != omp_irc_success) { + fprintf(stderr, "ERROR: Failed to get %s stream, rt error = %d.\n", + ForeignContextName, Status); + if (Status == omp_irc_no_value) + fprintf(stderr, "Parameters valid, but no meaningful value available."); + exit(1); + } + + vectorCopy(N, x, y); + } else { + // Execute as OpenMP offload + printf("Notice: Offloading myCopy to perform memcpy.\n"); + // clang-format off + #pragma omp target depend(in : x [0:N]) depend(inout : y [0:N]) nowait \ + map(to : x [0:N]) map(tofrom : y [0:N]) device(DeviceNum) + // clang-format on + vectorCopy(N, x, y); + } + + // This also ensures foreign tasks complete +#pragma omp interop destroy(SyncObj) nowait depend(out : y [0:N]) + +#pragma omp target depend(inout : x [0:N]) + vectorScale(N, ScaleFactor, x); + +#pragma omp taskwait + + printf("(1 : 16384) %f:%f\n", y[0], y[N - 1]); + printf("(2 : 32768) %f:%f\n", x[0], x[N - 1]); + + return 0; +} + +// ToDo: Add meaningful checks; the following is a placeholder. + +// CHECK: OpenMP working with amdhsa backend runtime to execute async memcpy diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var index eb3ab7778606a..7c60764e44716 100644 --- a/openmp/runtime/src/include/omp.h.var +++ b/openmp/runtime/src/include/omp.h.var @@ -192,7 +192,8 @@ omp_ifr_sycl = 4, omp_ifr_hip = 5, omp_ifr_level_zero = 6, - omp_ifr_last = 7 + omp_ifr_amdhsa = 7, + omp_ifr_last = 8 } omp_interop_fr_t; typedef void * omp_interop_t; diff --git a/openmp/runtime/src/include/omp_lib.h.var b/openmp/runtime/src/include/omp_lib.h.var index a709a2f298f8c..a076890b1207b 100644 --- a/openmp/runtime/src/include/omp_lib.h.var +++ b/openmp/runtime/src/include/omp_lib.h.var @@ -261,8 +261,10 @@ parameter(omp_ifr_hip=5) integer(kind=omp_interop_fr_kind)omp_ifr_level_zero parameter(omp_ifr_level_zero=6) + integer(kind=omp_interop_fr_kind)omp_ifr_amdhsa + parameter(omp_ifr_amdhsa=7) integer(kind=omp_interop_fr_kind)omp_ifr_last - parameter(omp_ifr_last=7) + parameter(omp_ifr_last=8) integer(kind=omp_interop_kind)omp_interop_none parameter(omp_interop_none=0) diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h index 713561734c481..6092799d03d6e 100644 --- a/openmp/runtime/src/kmp_ftn_entry.h +++ b/openmp/runtime/src/kmp_ftn_entry.h @@ -1551,7 +1551,8 @@ typedef enum omp_interop_fr { omp_ifr_sycl = 4, omp_ifr_hip = 5, omp_ifr_level_zero = 6, - omp_ifr_last = 7 + omp_ifr_amdhsa = 7, + omp_ifr_last = 8 } omp_interop_fr_t; typedef void *omp_interop_t;