diff --git a/sycl/plugins/CMakeLists.txt b/sycl/plugins/CMakeLists.txt index 7ec6ee74071d4..9f7dbcf0c8025 100644 --- a/sycl/plugins/CMakeLists.txt +++ b/sycl/plugins/CMakeLists.txt @@ -2,9 +2,9 @@ if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang|IntelLLVM" ) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-covered-switch-default") endif() -# level_zero plugin depends today on unified_runtime plugin +# level_zero and opencl plugins depend today on unified_runtime plugin # and unified_runtime plugin is not an independent plugin, adding it explicitly -if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) +if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS OR "opencl" IN_LIST SYCL_ENABLE_PLUGINS) add_subdirectory(unified_runtime) endif() diff --git a/sycl/plugins/opencl/CMakeLists.txt b/sycl/plugins/opencl/CMakeLists.txt index 9943827e48788..a227cf79bc589 100644 --- a/sycl/plugins/opencl/CMakeLists.txt +++ b/sycl/plugins/opencl/CMakeLists.txt @@ -12,9 +12,41 @@ add_sycl_plugin(opencl SOURCES + # Some code is shared with the UR adapter + "../unified_runtime/pi2ur.hpp" + "../unified_runtime/pi2ur.cpp" + "../unified_runtime/ur/ur.hpp" + "../unified_runtime/ur/ur.cpp" + "../unified_runtime/ur/adapters/opencl/adapter.cpp" + "../unified_runtime/ur/adapters/opencl/adapter.hpp" + "../unified_runtime/ur/adapters/opencl/common.cpp" + "../unified_runtime/ur/adapters/opencl/common.hpp" + "../unified_runtime/ur/adapters/opencl/context.cpp" + "../unified_runtime/ur/adapters/opencl/context.hpp" + "../unified_runtime/ur/adapters/opencl/device.cpp" + "../unified_runtime/ur/adapters/opencl/device.hpp" + "../unified_runtime/ur/adapters/opencl/enqueue.cpp" + "../unified_runtime/ur/adapters/opencl/image.cpp" + "../unified_runtime/ur/adapters/opencl/kernel.cpp" + "../unified_runtime/ur/adapters/opencl/platform.cpp" + "../unified_runtime/ur/adapters/opencl/platform.hpp" + "../unified_runtime/ur/adapters/opencl/sampler.cpp" + "../unified_runtime/ur/adapters/opencl/memory.cpp" + "../unified_runtime/ur/adapters/opencl/usm.cpp" + "../unified_runtime/ur/adapters/opencl/program.cpp" + "../unified_runtime/ur/adapters/opencl/event.cpp" + "../unified_runtime/ur/adapters/opencl/queue.cpp" + "../unified_runtime/ur/adapters/opencl/command_buffer.hpp" + "../unified_runtime/ur/adapters/opencl/command_buffer.cpp" + "../unified_runtime/ur/adapters/opencl/usm_p2p.cpp" + # --- "${sycl_inc_dir}/sycl/detail/pi.h" "pi_opencl.cpp" + INCLUDE_DIRS + ${sycl_inc_dir} + ${CMAKE_CURRENT_SOURCE_DIR}/../unified_runtime LIBRARIES + UnifiedRuntime-Headers OpenCL-ICD ) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index b15418f35a3c8..6cc6a325af923 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -16,2590 +16,1112 @@ #define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#include #include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define CHECK_ERR_SET_NULL_RET(err, ptr, reterr) \ - if (err != CL_SUCCESS) { \ - if (ptr != nullptr) \ - *ptr = nullptr; \ - return cast(reterr); \ - } - -// Want all the needed casts be explicit, do not define conversion operators. -template To cast(From value) { - // TODO: see if more sanity checks are possible. - static_assert(sizeof(From) == sizeof(To), "cast failed size check"); - return (To)(value); -} - -// Older versions of GCC don't like "const" here -#if defined(__GNUC__) && (__GNUC__ < 7 || (__GNU__C == 7 && __GNUC_MINOR__ < 2)) -#define CONSTFIX constexpr -#else -#define CONSTFIX const -#endif +extern "C" { + +const char SupportedVersion[] = _PI_OPENCL_PLUGIN_VERSION_STRING; + +pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, + pi_uint32 *NumPlatforms) { + return pi2ur::piPlatformsGet(NumEntries, Platforms, NumPlatforms); +} + +pi_result piPlatformGetInfo(pi_platform Platform, pi_platform_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piPlatformGetInfo(Platform, ParamName, ParamValueSize, + ParamValue, ParamValueSizeRet); +} + +pi_result piextPlatformGetNativeHandle(pi_platform Platform, + pi_native_handle *NativeHandle) { + return pi2ur::piextPlatformGetNativeHandle(Platform, NativeHandle); +} + +pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_platform *Platform) { + return pi2ur::piextPlatformCreateWithNativeHandle(NativeHandle, Platform); +} -// Names of USM functions that are queried from OpenCL -CONSTFIX char clHostMemAllocName[] = "clHostMemAllocINTEL"; -CONSTFIX char clDeviceMemAllocName[] = "clDeviceMemAllocINTEL"; -CONSTFIX char clSharedMemAllocName[] = "clSharedMemAllocINTEL"; -CONSTFIX char clMemBlockingFreeName[] = "clMemBlockingFreeINTEL"; -CONSTFIX char clCreateBufferWithPropertiesName[] = - "clCreateBufferWithPropertiesINTEL"; -CONSTFIX char clSetKernelArgMemPointerName[] = "clSetKernelArgMemPointerINTEL"; -CONSTFIX char clEnqueueMemFillName[] = "clEnqueueMemFillINTEL"; -CONSTFIX char clEnqueueMemcpyName[] = "clEnqueueMemcpyINTEL"; -CONSTFIX char clGetMemAllocInfoName[] = "clGetMemAllocInfoINTEL"; -CONSTFIX char clSetProgramSpecializationConstantName[] = - "clSetProgramSpecializationConstant"; -CONSTFIX char clGetDeviceFunctionPointerName[] = - "clGetDeviceFunctionPointerINTEL"; -CONSTFIX char clEnqueueWriteGlobalVariableName[] = - "clEnqueueWriteGlobalVariableINTEL"; -CONSTFIX char clEnqueueReadGlobalVariableName[] = - "clEnqueueReadGlobalVariableINTEL"; -// Names of host pipe functions queried from OpenCL -CONSTFIX char clEnqueueReadHostPipeName[] = "clEnqueueReadHostPipeINTEL"; -CONSTFIX char clEnqueueWriteHostPipeName[] = "clEnqueueWriteHostPipeINTEL"; - -#undef CONSTFIX - -// Global variables for PI_ERROR_PLUGIN_SPECIFIC_ERROR -constexpr size_t MaxMessageSize = 256; -thread_local pi_result ErrorMessageCode = PI_SUCCESS; -thread_local char ErrorMessage[MaxMessageSize]; - -// Utility function for setting a message and warning -[[maybe_unused]] static void setErrorMessage(const char *message, - pi_result error_code) { - assert(strlen(message) <= MaxMessageSize); - strcpy(ErrorMessage, message); - ErrorMessageCode = error_code; -} - -// Returns plugin specific error and warning messages pi_result piPluginGetLastError(char **message) { - *message = &ErrorMessage[0]; - return ErrorMessageCode; + return pi2ur::piPluginGetLastError(message); } -// Returns plugin specific backend option. -pi_result piPluginGetBackendOption(pi_platform, const char *frontend_option, +pi_result piPluginGetBackendOption(pi_platform platform, + const char *frontend_option, const char **backend_option) { - using namespace std::literals; - if (frontend_option == nullptr) - return PI_ERROR_INVALID_VALUE; - if (frontend_option == ""sv) { - *backend_option = ""; - return PI_SUCCESS; - } - // Return '-cl-opt-disable' for frontend_option = -O0 and '' for others. - if (!strcmp(frontend_option, "-O0")) { - *backend_option = "-cl-opt-disable"; - return PI_SUCCESS; - } - if (frontend_option == "-O1"sv || frontend_option == "-O2"sv || - frontend_option == "-O3"sv) { - *backend_option = ""; - return PI_SUCCESS; - } - if (frontend_option == "-ftarget-compile-fast"sv) { - *backend_option = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'"; - return PI_SUCCESS; - } - return PI_ERROR_INVALID_VALUE; -} - -static cl_int getPlatformVersion(cl_platform_id plat, - OCLV::OpenCLVersion &version) { - cl_int ret_err = CL_INVALID_VALUE; - - size_t platVerSize = 0; - ret_err = - clGetPlatformInfo(plat, CL_PLATFORM_VERSION, 0, nullptr, &platVerSize); - - std::string platVer(platVerSize, '\0'); - ret_err = clGetPlatformInfo(plat, CL_PLATFORM_VERSION, platVerSize, - platVer.data(), nullptr); - - if (ret_err != CL_SUCCESS) - return ret_err; - - version = OCLV::OpenCLVersion(platVer); - if (!version.isValid()) - return CL_INVALID_PLATFORM; - - return ret_err; -} - -static cl_int getDeviceVersion(cl_device_id dev, OCLV::OpenCLVersion &version) { - cl_int ret_err = CL_INVALID_VALUE; - - size_t devVerSize = 0; - ret_err = clGetDeviceInfo(dev, CL_DEVICE_VERSION, 0, nullptr, &devVerSize); - - std::string devVer(devVerSize, '\0'); - ret_err = clGetDeviceInfo(dev, CL_DEVICE_VERSION, devVerSize, devVer.data(), - nullptr); - - if (ret_err != CL_SUCCESS) - return ret_err; - - version = OCLV::OpenCLVersion(devVer); - if (!version.isValid()) - return CL_INVALID_DEVICE; - - return ret_err; -} - -static cl_int checkDeviceExtensions(cl_device_id dev, - const std::vector &exts, - bool &supported) { - cl_int ret_err = CL_INVALID_VALUE; - - size_t extSize = 0; - ret_err = clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &extSize); - - std::string extStr(extSize, '\0'); - ret_err = clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, extSize, extStr.data(), - nullptr); - - if (ret_err != CL_SUCCESS) - return ret_err; - - supported = true; - for (const std::string &ext : exts) - if (!(supported = (extStr.find(ext) != std::string::npos))) - break; - - return ret_err; -} - -using clGetDeviceFunctionPointer_fn = CL_API_ENTRY -cl_int(CL_API_CALL *)(cl_device_id device, cl_program program, - const char *FuncName, cl_ulong *ret_ptr); - -using clEnqueueWriteGlobalVariable_fn = CL_API_ENTRY -cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool, - size_t, size_t, const void *, cl_uint, const cl_event *, - cl_event *); - -using clEnqueueReadGlobalVariable_fn = CL_API_ENTRY -cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool, - size_t, size_t, void *, cl_uint, const cl_event *, - cl_event *); - -using clSetProgramSpecializationConstant_fn = CL_API_ENTRY -cl_int(CL_API_CALL *)(cl_program program, cl_uint spec_id, size_t spec_size, - const void *spec_value); - -template struct FuncPtrCache { - std::map Map; - std::mutex Mutex; -}; - -// FIXME: There's currently no mechanism for cleaning up this cache, meaning -// that it is invalidated whenever a context is destroyed. This could lead to -// reusing an invalid function pointer if another context happends to have the -// same native handle. -struct ExtFuncPtrCacheT { - FuncPtrCache clHostMemAllocINTELCache; - FuncPtrCache clDeviceMemAllocINTELCache; - FuncPtrCache clSharedMemAllocINTELCache; - FuncPtrCache clGetDeviceFunctionPointerCache; - FuncPtrCache - clCreateBufferWithPropertiesINTELCache; - FuncPtrCache clMemBlockingFreeINTELCache; - FuncPtrCache - clSetKernelArgMemPointerINTELCache; - FuncPtrCache clEnqueueMemFillINTELCache; - FuncPtrCache clEnqueueMemcpyINTELCache; - FuncPtrCache clGetMemAllocInfoINTELCache; - FuncPtrCache - clEnqueueWriteGlobalVariableCache; - FuncPtrCache clEnqueueReadGlobalVariableCache; - FuncPtrCache clEnqueueReadHostPipeINTELCache; - FuncPtrCache clEnqueueWriteHostPipeINTELCache; - FuncPtrCache - clSetProgramSpecializationConstantCache; -}; -// A raw pointer is used here since the lifetime of this map has to be tied to -// piTeardown to avoid issues with static destruction order (a user application -// might have static objects that indirectly access this cache in their -// destructor). -static ExtFuncPtrCacheT *ExtFuncPtrCache = new ExtFuncPtrCacheT(); - -// USM helper function to get an extension function pointer -template -static pi_result getExtFuncFromContext(cl_context context, - FuncPtrCache &FPtrCache, - const char *FuncName, T *fptr) { - // TODO - // Potentially redo caching as PI interface changes. - // if cached, return cached FuncPtr - std::lock_guard CacheLock{FPtrCache.Mutex}; - std::map &FPtrMap = FPtrCache.Map; - auto It = FPtrMap.find(context); - if (It != FPtrMap.end()) { - auto F = It->second; - // if cached that extension is not available return nullptr and - // PI_ERROR_INVALID_VALUE - *fptr = F; - return F ? PI_SUCCESS : PI_ERROR_INVALID_VALUE; - } - - cl_uint deviceCount; - cl_int ret_err = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, - sizeof(cl_uint), &deviceCount, nullptr); - - if (ret_err != CL_SUCCESS || deviceCount < 1) { - return PI_ERROR_INVALID_CONTEXT; - } - - std::vector devicesInCtx(deviceCount); - ret_err = clGetContextInfo(context, CL_CONTEXT_DEVICES, - deviceCount * sizeof(cl_device_id), - devicesInCtx.data(), nullptr); - - if (ret_err != CL_SUCCESS) { - return PI_ERROR_INVALID_CONTEXT; - } - - cl_platform_id curPlatform; - ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &curPlatform, nullptr); - - if (ret_err != CL_SUCCESS) { - return PI_ERROR_INVALID_CONTEXT; - } - - T FuncPtr = - (T)clGetExtensionFunctionAddressForPlatform(curPlatform, FuncName); - - if (!FuncPtr) { - // Cache that the extension is not available - FPtrMap[context] = nullptr; - return PI_ERROR_INVALID_VALUE; - } + return pi2ur::piPluginGetBackendOption(platform, frontend_option, + backend_option); +} - *fptr = FuncPtr; - FPtrMap[context] = FuncPtr; +pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, + pi_uint32 NumEntries, pi_device *Devices, + pi_uint32 *NumDevices) { + return pi2ur::piDevicesGet(Platform, DeviceType, NumEntries, Devices, + NumDevices); +} - return cast(ret_err); +pi_result piDeviceRetain(pi_device Device) { + return pi2ur::piDeviceRetain(Device); } -/// Enables indirect access of pointers in kernels. -/// Necessary to avoid telling CL about every pointer that might be used. -/// -/// \param kernel is the kernel to be launched -static pi_result USMSetIndirectAccess(pi_kernel kernel) { - // We test that each alloc type is supported before we actually try to - // set KernelExecInfo. - cl_bool TrueVal = CL_TRUE; - clHostMemAllocINTEL_fn HFunc = nullptr; - clSharedMemAllocINTEL_fn SFunc = nullptr; - clDeviceMemAllocINTEL_fn DFunc = nullptr; - cl_context CLContext; - cl_int CLErr = clGetKernelInfo(cast(kernel), CL_KERNEL_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return cast(CLErr); - } - - getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clHostMemAllocINTELCache, clHostMemAllocName, - &HFunc); - if (HFunc) { - clSetKernelExecInfo(cast(kernel), - CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal); - } - - getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clDeviceMemAllocINTELCache, - clDeviceMemAllocName, &DFunc); - if (DFunc) { - clSetKernelExecInfo(cast(kernel), - CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal); - } - - getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clSharedMemAllocINTELCache, - clSharedMemAllocName, &SFunc); - if (SFunc) { - clSetKernelExecInfo(cast(kernel), - CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal); - } - return PI_SUCCESS; +pi_result piDeviceRelease(pi_device Device) { + return pi2ur::piDeviceRelease(Device); } -extern "C" { +pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piDeviceGetInfo(Device, ParamName, ParamValueSize, ParamValue, + ParamValueSizeRet); +} -pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, - size_t paramValueSize, void *paramValue, - size_t *paramValueSizeRet) { - switch (paramName) { - // TODO: Check regularly to see if support in enabled in OpenCL. - // Intel GPU EU device-specific information extensions. - // Some of the queries are enabled by cl_intel_device_attribute_query - // extension, but it's not yet in the Registry. - case PI_DEVICE_INFO_PCI_ADDRESS: - case PI_DEVICE_INFO_GPU_EU_COUNT: - case PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH: - case PI_DEVICE_INFO_GPU_SLICES: - case PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: - case PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: - case PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU: - case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH: - // TODO: Check if device UUID extension is enabled in OpenCL. - // For details about Intel UUID extension, see - // sycl/doc/extensions/supported/sycl_ext_intel_device_info.md - case PI_DEVICE_INFO_UUID: - return PI_ERROR_INVALID_VALUE; - case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { - // This query is missing before OpenCL 3.0 - // Check version and handle appropriately - OCLV::OpenCLVersion devVer; - cl_device_id deviceID = cast(device); - cl_int ret_err = getDeviceVersion(deviceID, devVer); - if (ret_err != CL_SUCCESS) { - return cast(ret_err); - } - - // Minimum required capability to be returned - // For OpenCL 1.2, this is all that is required - pi_memory_order_capabilities capabilities = PI_MEMORY_ORDER_RELAXED; - - if (devVer >= OCLV::V3_0) { - // For OpenCL >=3.0, the query should be implemented - cl_device_atomic_capabilities cl_capabilities = 0; - cl_int ret_err = clGetDeviceInfo( - deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, - sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr); - if (ret_err != CL_SUCCESS) - return cast(ret_err); - - // Mask operation to only consider atomic_memory_order* capabilities - cl_int mask = CL_DEVICE_ATOMIC_ORDER_RELAXED | - CL_DEVICE_ATOMIC_ORDER_ACQ_REL | - CL_DEVICE_ATOMIC_ORDER_SEQ_CST; - cl_capabilities &= mask; - - // The memory order capabilities are hierarchical, if one is implied, all - // preceding capbilities are implied as well. Especially in the case of - // ACQ_REL. - if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { - capabilities |= PI_MEMORY_ORDER_SEQ_CST; - } - if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) { - capabilities |= PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_ACQUIRE | - PI_MEMORY_ORDER_RELEASE; - } - } else if (devVer >= OCLV::V2_0) { - // For OpenCL 2.x, return all capabilities - // (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_memory_consistency_model) - capabilities |= PI_MEMORY_ORDER_ACQUIRE | PI_MEMORY_ORDER_RELEASE | - PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_SEQ_CST; - } - - if (paramValue) { - if (paramValueSize < sizeof(pi_memory_order_capabilities)) - return static_cast(CL_INVALID_VALUE); - - std::memcpy(paramValue, &capabilities, sizeof(capabilities)); - } - - if (paramValueSizeRet) - *paramValueSizeRet = sizeof(capabilities); - - return static_cast(CL_SUCCESS); - } - case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { - // Initialize result to minimum mandated capabilities according to - // SYCL2020 4.6.3.2 - // Because scopes are hierarchical, wider scopes support all narrower - // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and - // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382) - pi_memory_scope_capabilities result = PI_MEMORY_SCOPE_WORK_ITEM | - PI_MEMORY_SCOPE_SUB_GROUP | - PI_MEMORY_SCOPE_WORK_GROUP; - - OCLV::OpenCLVersion devVer; - - cl_device_id deviceID = cast(device); - cl_int ret_err = getDeviceVersion(deviceID, devVer); - if (ret_err != CL_SUCCESS) - return static_cast(ret_err); - - cl_device_atomic_capabilities devCapabilities = 0; - if (devVer >= OCLV::V3_0) { - ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, - sizeof(cl_device_atomic_capabilities), - &devCapabilities, nullptr); - if (ret_err != CL_SUCCESS) - return static_cast(ret_err); - assert((devCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && - "Violates minimum mandated guarantee"); - - // Because scopes are hierarchical, wider scopes support all narrower - // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and - // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382) - // We already initialized to these minimum mandated capabilities. Just - // check wider scopes. - if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { - result |= PI_MEMORY_SCOPE_DEVICE; - } - - if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { - result |= PI_MEMORY_SCOPE_SYSTEM; - } - - } else { - // This info is only available in OpenCL version >= 3.0 - // Just return minimum mandated capabilities for older versions. - // OpenCL 1.x minimum mandated capabilities are WORK_GROUP, we - // already initialized using it. - if (devVer >= OCLV::V2_0) { - // OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE | - // ALL_DEVICES - result |= PI_MEMORY_SCOPE_DEVICE | PI_MEMORY_SCOPE_SYSTEM; - } - } - if (paramValue) { - if (paramValueSize < sizeof(cl_device_atomic_capabilities)) - return PI_ERROR_INVALID_VALUE; - - std::memcpy(paramValue, &result, sizeof(result)); - } - if (paramValueSizeRet) - *paramValueSizeRet = sizeof(result); - return PI_SUCCESS; - } - case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { - // Initialize result to minimum mandated capabilities according to - // SYCL2020 4.6.3.2 - pi_memory_order_capabilities result = - PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | - PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL; - - OCLV::OpenCLVersion devVer; - - cl_device_id deviceID = cast(device); - cl_int ret_err = getDeviceVersion(deviceID, devVer); - if (ret_err != CL_SUCCESS) - return static_cast(ret_err); - - cl_device_atomic_capabilities devCapabilities = 0; - if (devVer >= OCLV::V3_0) { - ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, - sizeof(cl_device_atomic_capabilities), - &devCapabilities, nullptr); - if (ret_err != CL_SUCCESS) - return static_cast(ret_err); - assert((devCapabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED) && - "Violates minimum mandated guarantee"); - assert((devCapabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) && - "Violates minimum mandated guarantee"); - - // We already initialized to minimum mandated capabilities. Just - // check stronger orders. - if (devCapabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { - result |= PI_MEMORY_ORDER_SEQ_CST; - } - - } else { - // This info is only available in OpenCL version >= 3.0 - // Just return minimum mandated capabilities for older versions. - // OpenCL 1.x minimum mandated capabilities are RELAXED | ACQ_REL, we - // already initialized using these. - if (devVer >= OCLV::V2_0) { - // OpenCL 2.x minimum mandated capabilities are RELAXED | ACQ_REL | - // SEQ_CST - result |= PI_MEMORY_ORDER_SEQ_CST; - } - } - if (paramValue) { - if (paramValueSize < sizeof(cl_device_atomic_capabilities)) - return PI_ERROR_INVALID_VALUE; - - std::memcpy(paramValue, &result, sizeof(result)); - } - if (paramValueSizeRet) - *paramValueSizeRet = sizeof(result); - return PI_SUCCESS; - } - case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { - // Initialize result to minimum mandated capabilities according to - // SYCL2020 4.6.3.2. - // Because scopes are hierarchical, wider scopes support all narrower - // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and - // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382) - pi_memory_scope_capabilities result = PI_MEMORY_SCOPE_WORK_ITEM | - PI_MEMORY_SCOPE_SUB_GROUP | - PI_MEMORY_SCOPE_WORK_GROUP; - - OCLV::OpenCLVersion devVer; - - cl_device_id deviceID = cast(device); - cl_int ret_err = getDeviceVersion(deviceID, devVer); - if (ret_err != CL_SUCCESS) - return static_cast(ret_err); - - cl_device_atomic_capabilities devCapabilities = 0; - if (devVer >= OCLV::V3_0) { - ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, - sizeof(cl_device_atomic_capabilities), - &devCapabilities, nullptr); - if (ret_err != CL_SUCCESS) - return static_cast(ret_err); - assert((devCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && - "Violates minimum mandated guarantee"); - - // Because scopes are hierarchical, wider scopes support all narrower - // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and - // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382) - // We already initialized to these minimum mandated capabilities. Just - // check wider scopes. - if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { - result |= PI_MEMORY_SCOPE_DEVICE; - } - - if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { - result |= PI_MEMORY_SCOPE_SYSTEM; - } - - } else { - // This info is only available in OpenCL version >= 3.0 - // Just return minimum mandated capabilities for older versions. - // OpenCL 1.x minimum mandated capabilities are WORK_GROUP, we - // already initialized using it. - if (devVer >= OCLV::V2_0) { - // OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE | - // ALL_DEVICES - result |= PI_MEMORY_SCOPE_DEVICE | PI_MEMORY_SCOPE_SYSTEM; - } - } - if (paramValue) { - if (paramValueSize < sizeof(cl_device_atomic_capabilities)) - return PI_ERROR_INVALID_VALUE; - - std::memcpy(paramValue, &result, sizeof(result)); - } - if (paramValueSizeRet) - *paramValueSizeRet = sizeof(result); - return PI_SUCCESS; - } - case PI_DEVICE_INFO_ATOMIC_64: { - cl_int ret_err = CL_SUCCESS; - cl_bool result = CL_FALSE; - bool supported = false; - - ret_err = checkDeviceExtensions( - cast(device), - {"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics"}, - supported); - if (ret_err != CL_SUCCESS) - return static_cast(ret_err); - - result = supported; - std::memcpy(paramValue, &result, sizeof(cl_bool)); - return PI_SUCCESS; - } - case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS: { - // bfloat16 math functions are not yet supported on Intel GPUs. - bool result = false; - if (paramValueSize < sizeof(result)) - return PI_ERROR_INVALID_VALUE; - std::memcpy(paramValue, &result, sizeof(result)); - return PI_SUCCESS; - } - case PI_DEVICE_INFO_IMAGE_SRGB: { - bool result = true; - if (paramValueSize < sizeof(result)) - return PI_ERROR_INVALID_VALUE; - std::memcpy(paramValue, &result, sizeof(result)); - return PI_SUCCESS; - } - case PI_DEVICE_INFO_BUILD_ON_SUBDEVICE: { - cl_device_type devType = CL_DEVICE_TYPE_DEFAULT; - cl_int res = clGetDeviceInfo(cast(device), CL_DEVICE_TYPE, - sizeof(cl_device_type), &devType, nullptr); - - // FIXME: here we assume that program built for a root GPU device can be - // used on its sub-devices without re-building - bool result = (res == CL_SUCCESS) && (devType == CL_DEVICE_TYPE_GPU); - if (paramValueSize < sizeof(result)) - return PI_ERROR_INVALID_VALUE; - std::memcpy(paramValue, &result, sizeof(result)); - return PI_SUCCESS; - } - case PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D: - // Returns the maximum sizes of a work group for each dimension one - // could use to submit a kernel. There is no such query defined in OpenCL - // so we'll return the maximum value. - { - if (paramValueSizeRet) - *paramValueSizeRet = paramValueSize; - static constexpr size_t Max = (std::numeric_limits::max)(); - size_t *out = cast(paramValue); - if (paramValueSize >= sizeof(size_t)) - out[0] = Max; - if (paramValueSize >= 2 * sizeof(size_t)) - out[1] = Max; - if (paramValueSize >= 3 * sizeof(size_t)) - out[2] = Max; - return PI_SUCCESS; - } - case PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: { - pi_int32 result = 1; - std::memcpy(paramValue, &result, sizeof(pi_int32)); - return PI_SUCCESS; - } - case PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS: { - // Corresponding OpenCL query is only available starting with OpenCL 2.1 and - // we have to emulate it on older OpenCL runtimes. - OCLV::OpenCLVersion version; - cl_int err = getDeviceVersion(cast(device), version); - if (err != CL_SUCCESS) - return static_cast(err); - - if (version >= OCLV::V2_1) { - err = clGetDeviceInfo(cast(device), - cast(paramName), paramValueSize, - paramValue, paramValueSizeRet); - if (err != CL_SUCCESS) - return static_cast(err); - - if (paramValue && *static_cast(paramValue) == 0u) { - // OpenCL returns 0 if sub-groups are not supported, but SYCL 2020 spec - // says that minimum possible value is 1. - cl_uint value = 1u; - std::memcpy(paramValue, &value, sizeof(cl_uint)); - } - - return static_cast(err); - } - - // Otherwise, we can't query anything, because even cl_khr_subgroups does - // not provide similar query. Therefore, simply return minimum possible - // value 1 here. - if (paramValue && paramValueSize < sizeof(cl_uint)) - return static_cast(CL_INVALID_VALUE); - if (paramValueSizeRet) - *paramValueSizeRet = sizeof(cl_uint); - - if (paramValue) { - cl_uint value = 1u; - std::memcpy(paramValue, &value, sizeof(cl_uint)); - } - - return static_cast(CL_SUCCESS); - } - case PI_DEVICE_INFO_BACKEND_VERSION: { - // TODO: return some meaningful for backend_version below - const char *value = ""; - size_t valueSize = (strlen(value) + 1) * sizeof(char); - if (paramValue) - std::memcpy(paramValue, value, valueSize); - if (paramValueSizeRet != nullptr) - *paramValueSizeRet = valueSize; - return PI_SUCCESS; - } - case PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT: { - cl_int ret_err = CL_SUCCESS; - bool result = false; - if (paramValueSize < sizeof(result)) - return PI_ERROR_INVALID_VALUE; - bool supported = false; - - ret_err = - checkDeviceExtensions(cast(device), - {"cl_intel_mem_channel_property"}, supported); - if (ret_err != CL_SUCCESS) - return static_cast(ret_err); - - result = supported; - std::memcpy(paramValue, &result, sizeof(result)); - return PI_SUCCESS; - } - case PI_EXT_INTEL_DEVICE_INFO_ESIMD_SUPPORT: { - bool result = false; - if (paramValueSize < sizeof(result)) - return PI_ERROR_INVALID_VALUE; - cl_device_type devType = CL_DEVICE_TYPE_DEFAULT; - cl_int res = clGetDeviceInfo(cast(device), CL_DEVICE_TYPE, - sizeof(cl_device_type), &devType, nullptr); - if (res != CL_SUCCESS) - return static_cast(res); - - pi_uint32 vendorId = 0; - res = clGetDeviceInfo(cast(device), PI_DEVICE_INFO_VENDOR_ID, - sizeof(vendorId), &vendorId, nullptr); - if (res != CL_SUCCESS) - return static_cast(res); - // ESIMD is only supported by Intel GPUs. - result = devType == CL_DEVICE_TYPE_GPU && vendorId == 0x8086; - if (paramValue) - std::memcpy(paramValue, &result, sizeof(result)); - return PI_SUCCESS; - } - default: - cl_int result = clGetDeviceInfo( - cast(device), cast(paramName), - paramValueSize, paramValue, paramValueSizeRet); - return static_cast(result); - } -} - -pi_result piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, - pi_uint32 *num_platforms) { - cl_int result = clGetPlatformIDs(cast(num_entries), - cast(platforms), - cast(num_platforms)); - - // Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms - if (result == CL_PLATFORM_NOT_FOUND_KHR) { - assert(num_platforms != 0); - *num_platforms = 0; - result = PI_SUCCESS; - } - return static_cast(result); -} - -pi_result piPlatformGetInfo(pi_platform platform, pi_platform_info paramName, - size_t paramValueSize, void *paramValue, - size_t *paramValueSizeRet) { - - switch (paramName) { - case PI_EXT_PLATFORM_INFO_BACKEND: { - pi_platform_backend result = PI_EXT_PLATFORM_BACKEND_OPENCL; - if (paramValue) { - if (paramValueSize < sizeof(result)) - return PI_ERROR_INVALID_VALUE; - std::memcpy(paramValue, &result, sizeof(result)); - } - if (paramValueSizeRet) - *paramValueSizeRet = sizeof(result); - return PI_SUCCESS; - } - default: { - cl_int result = clGetPlatformInfo( - cast(platform), cast(paramName), - paramValueSize, paramValue, paramValueSizeRet); - return static_cast(result); - } - } - return PI_SUCCESS; +pi_result piDevicePartition(pi_device Device, + const pi_device_partition_property *Properties, + pi_uint32 NumDevices, pi_device *OutDevices, + pi_uint32 *OutNumDevices) { + return pi2ur::piDevicePartition(Device, Properties, NumDevices, OutDevices, + OutNumDevices); } -pi_result piextPlatformCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_platform *platform) { - assert(platform); - assert(nativeHandle); - *platform = reinterpret_cast(nativeHandle); - return PI_SUCCESS; +pi_result piextDeviceSelectBinary(pi_device Device, pi_device_binary *Binaries, + pi_uint32 NumBinaries, + pi_uint32 *SelectedBinaryInd) { + return pi2ur::piextDeviceSelectBinary(Device, Binaries, NumBinaries, + SelectedBinaryInd); } -pi_result piDevicesGet(pi_platform platform, pi_device_type device_type, - pi_uint32 num_entries, pi_device *devices, - pi_uint32 *num_devices) { - cl_int result = clGetDeviceIDs( - cast(platform), cast(device_type), - cast(num_entries), cast(devices), - cast(num_devices)); - - // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in num_devices - if (result == CL_DEVICE_NOT_FOUND) { - assert(num_devices != 0); - *num_devices = 0; - result = PI_SUCCESS; - } - return cast(result); -} - -pi_result piextDeviceSelectBinary(pi_device device, pi_device_binary *images, - pi_uint32 num_images, - pi_uint32 *selected_image_ind) { - - // TODO: this is a bare-bones implementation for choosing a device image - // that would be compatible with the targeted device. An AOT-compiled - // image is preferred over SPIR-V for known devices (i.e. Intel devices) - // The implementation makes no effort to differentiate between multiple images - // for the given device, and simply picks the first one compatible - // Real implementation will use the same mechanism OpenCL ICD dispatcher - // uses. Something like: - // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_ERROR_INVALID_CONTEXT); - // return context->dispatch->piextDeviceSelectIR( - // ctx, images, num_images, selected_image); - // where context->dispatch is set to the dispatch table provided by PI - // plugin for platform/device the ctx was created for. - - // Choose the binary target for the provided device - const char *image_target = nullptr; - // Get the type of the device - cl_device_type device_type; - constexpr pi_uint32 invalid_ind = std::numeric_limits::max(); - cl_int ret_err = - clGetDeviceInfo(cast(device), CL_DEVICE_TYPE, - sizeof(cl_device_type), &device_type, nullptr); - if (ret_err != CL_SUCCESS) { - *selected_image_ind = invalid_ind; - return cast(ret_err); - } - - switch (device_type) { - // TODO: Factor out vendor specifics into a separate source - // E.g. sycl/source/detail/vendor/intel/detail/pi_opencl.cpp? - - // We'll attempt to find an image that was AOT-compiled - // from a SPIR-V image into an image specific for: - - case CL_DEVICE_TYPE_CPU: // OpenCL 64-bit CPU - image_target = __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64; - break; - case CL_DEVICE_TYPE_GPU: // OpenCL 64-bit GEN GPU - image_target = __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN; - break; - case CL_DEVICE_TYPE_ACCELERATOR: // OpenCL 64-bit FPGA - image_target = __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA; - break; - default: - // Otherwise, we'll attempt to find and JIT-compile - // a device-independent SPIR-V image - image_target = __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64; - break; - } - - // Find the appropriate device image, fallback to spirv if not found - pi_uint32 fallback = invalid_ind; - for (pi_uint32 i = 0; i < num_images; ++i) { - if (strcmp(images[i]->DeviceTargetSpec, image_target) == 0) { - *selected_image_ind = i; - return PI_SUCCESS; - } - if (strcmp(images[i]->DeviceTargetSpec, - __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64) == 0) - fallback = i; - } - // Points to a spirv image, if such indeed was found - if ((*selected_image_ind = fallback) != invalid_ind) - return PI_SUCCESS; - // No image can be loaded for the given device - return PI_ERROR_INVALID_BINARY; -} - -pi_result piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_platform, pi_device *piDevice) { - assert(piDevice != nullptr); - *piDevice = reinterpret_cast(nativeHandle); - return PI_SUCCESS; +pi_result piextDeviceGetNativeHandle(pi_device Device, + pi_native_handle *NativeHandle) { + return pi2ur::piextDeviceGetNativeHandle(Device, NativeHandle); +} + +pi_result piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_platform Platform, + pi_device *Device) { + return pi2ur::piextDeviceCreateWithNativeHandle(NativeHandle, Platform, + Device); +} + +pi_result piContextCreate(const pi_context_properties *Properties, + pi_uint32 NumDevices, const pi_device *Devices, + void (*PFnNotify)(const char *ErrInfo, + const void *PrivateInfo, size_t CB, + void *UserData), + void *UserData, pi_context *RetContext) { + return pi2ur::piContextCreate(Properties, NumDevices, Devices, PFnNotify, + UserData, RetContext); +} + +pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piContextGetInfo(Context, ParamName, ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result piextContextSetExtendedDeleter(pi_context Context, + pi_context_extended_deleter Function, + void *UserData) { + return pi2ur::piextContextSetExtendedDeleter(Context, Function, UserData); +} + +pi_result piextContextGetNativeHandle(pi_context Context, + pi_native_handle *NativeHandle) { + return pi2ur::piextContextGetNativeHandle(Context, NativeHandle); +} + +pi_result piextContextCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_uint32 NumDevices, + const pi_device *Devices, + bool OwnNativeHandle, + pi_context *RetContext) { + return pi2ur::piextContextCreateWithNativeHandle( + NativeHandle, NumDevices, Devices, OwnNativeHandle, RetContext); +} + +pi_result piContextRetain(pi_context Context) { + return pi2ur::piContextRetain(Context); +} + +pi_result piContextRelease(pi_context Context) { + return pi2ur::piContextRelease(Context); +} + +pi_result piQueueCreate(pi_context Context, pi_device Device, + pi_queue_properties Flags, pi_queue *Queue) { + pi_queue_properties Properties[] = {PI_QUEUE_FLAGS, Flags, 0}; + return piextQueueCreate(Context, Device, Properties, Queue); } pi_result piextQueueCreate(pi_context Context, pi_device Device, pi_queue_properties *Properties, pi_queue *Queue) { - assert(Properties); - // Expect flags mask to be passed first. - assert(Properties[0] == PI_QUEUE_FLAGS); - if (Properties[0] != PI_QUEUE_FLAGS) - return PI_ERROR_INVALID_VALUE; - pi_queue_properties Flags = Properties[1]; - // Extra data isn't supported yet. - assert(Properties[2] == 0); - if (Properties[2] != 0) - return PI_ERROR_INVALID_VALUE; - return piQueueCreate(Context, Device, Flags, Queue); -} -pi_result piQueueCreate(pi_context context, pi_device device, - pi_queue_properties properties, pi_queue *queue) { - assert(queue && "piQueueCreate failed, queue argument is null"); - - cl_platform_id curPlatform; - cl_int ret_err = - clGetDeviceInfo(cast(device), CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &curPlatform, nullptr); - - CHECK_ERR_SET_NULL_RET(ret_err, queue, ret_err); - - // Check that unexpected bits are not set. - assert(!(properties & - ~(PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE | - PI_QUEUE_FLAG_PROFILING_ENABLE | PI_QUEUE_FLAG_ON_DEVICE | - PI_QUEUE_FLAG_ON_DEVICE_DEFAULT | - PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS | - PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW | - PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH))); - - // Properties supported by OpenCL backend. - cl_command_queue_properties SupportByOpenCL = - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE | - CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; - - OCLV::OpenCLVersion version; - ret_err = getPlatformVersion(curPlatform, version); - - CHECK_ERR_SET_NULL_RET(ret_err, queue, ret_err); - - if (version >= OCLV::V2_0) { - *queue = cast(clCreateCommandQueue( - cast(context), cast(device), - cast(properties) & SupportByOpenCL, - &ret_err)); - return cast(ret_err); - } - - cl_queue_properties CreationFlagProperties[] = { - CL_QUEUE_PROPERTIES, - cast(properties) & SupportByOpenCL, 0}; - *queue = cast(clCreateCommandQueueWithProperties( - cast(context), cast(device), - CreationFlagProperties, &ret_err)); - return cast(ret_err); -} - -pi_result piQueueGetInfo(pi_queue queue, pi_queue_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - if (queue == nullptr) { - return PI_ERROR_INVALID_QUEUE; - } - - switch (param_name) { - case PI_EXT_ONEAPI_QUEUE_INFO_EMPTY: - // OpenCL doesn't provide API to check the status of the queue. - return PI_ERROR_INVALID_VALUE; - default: - cl_int CLErr = clGetCommandQueueInfo( - cast(queue), cast(param_name), - param_value_size, param_value, param_value_size_ret); - if (CLErr != CL_SUCCESS) { - return cast(CLErr); - } - } - return PI_SUCCESS; + return pi2ur::piextQueueCreate(Context, Device, Properties, Queue); +} + +pi_result piQueueGetInfo(pi_queue Queue, pi_queue_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piQueueGetInfo(Queue, ParamName, ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result piQueueRetain(pi_queue Queue) { return pi2ur::piQueueRetain(Queue); } + +pi_result piQueueRelease(pi_queue Queue) { + return pi2ur::piQueueRelease(Queue); } -pi_result piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle, - int32_t NativeHandleDesc, pi_context, - pi_device, bool ownNativeHandle, +pi_result piQueueFinish(pi_queue Queue) { return pi2ur::piQueueFinish(Queue); } + +pi_result piQueueFlush(pi_queue Queue) { return pi2ur::piQueueFlush(Queue); } + +pi_result piextQueueGetNativeHandle(pi_queue Queue, + pi_native_handle *NativeHandle, + int32_t *NativeHandleDesc) { + return pi2ur::piextQueueGetNativeHandle(Queue, NativeHandle, + NativeHandleDesc); +} + +pi_result piextQueueCreateWithNativeHandle(pi_native_handle NativeHandle, + int32_t NativeHandleDesc, + pi_context Context, pi_device Device, + bool OwnNativeHandle, pi_queue_properties *Properties, - pi_queue *piQueue) { - (void)NativeHandleDesc; - (void)ownNativeHandle; - (void)Properties; - assert(piQueue != nullptr); - *piQueue = reinterpret_cast(nativeHandle); - clRetainCommandQueue(cast(nativeHandle)); - return PI_SUCCESS; + pi_queue *Queue) { + return pi2ur::piextQueueCreateWithNativeHandle( + NativeHandle, NativeHandleDesc, Context, Device, OwnNativeHandle, + Properties, Queue); } -pi_result piProgramCreate(pi_context context, const void *il, size_t length, - pi_program *res_program) { - cl_uint deviceCount; - cl_int ret_err = - clGetContextInfo(cast(context), CL_CONTEXT_NUM_DEVICES, - sizeof(cl_uint), &deviceCount, nullptr); +pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size, + void *HostPtr, pi_mem *RetMem, + const pi_mem_properties *properties) { + return pi2ur::piMemBufferCreate(Context, Flags, Size, HostPtr, RetMem, + properties); +} - std::vector devicesInCtx(deviceCount); +pi_result piMemGetInfo(pi_mem Mem, pi_mem_info ParamName, size_t ParamValueSize, + void *ParamValue, size_t *ParamValueSizeRet) { + return pi2ur::piMemGetInfo(Mem, ParamName, ParamValueSize, ParamValue, + ParamValueSizeRet); +} - if (ret_err != CL_SUCCESS || deviceCount < 1) { - if (res_program != nullptr) - *res_program = nullptr; - return cast(CL_INVALID_CONTEXT); - } +pi_result piMemRetain(pi_mem Mem) { return pi2ur::piMemRetain(Mem); } - ret_err = clGetContextInfo(cast(context), CL_CONTEXT_DEVICES, - deviceCount * sizeof(cl_device_id), - devicesInCtx.data(), nullptr); +pi_result piMemRelease(pi_mem Mem) { return pi2ur::piMemRelease(Mem); } - CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT); +pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, + const pi_image_format *ImageFormat, + const pi_image_desc *ImageDesc, void *HostPtr, + pi_mem *RetImage) { + return pi2ur::piMemImageCreate(Context, Flags, ImageFormat, ImageDesc, + HostPtr, RetImage); +} - cl_platform_id curPlatform; - ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &curPlatform, nullptr); +pi_result piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) { + return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle); +} - CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT); +pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_context Context, + bool ownNativeHandle, pi_mem *Mem) { + return pi2ur::piextMemCreateWithNativeHandle(NativeHandle, Context, + ownNativeHandle, Mem); +} - OCLV::OpenCLVersion platVer; - ret_err = getPlatformVersion(curPlatform, platVer); +pi_result piProgramCreate(pi_context Context, const void *ILBytes, + size_t Length, pi_program *Program) { + return pi2ur::piProgramCreate(Context, ILBytes, Length, Program); +} - CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT); +pi_result piProgramCreateWithBinary( + pi_context Context, pi_uint32 NumDevices, const pi_device *DeviceList, + const size_t *Lengths, const unsigned char **Binaries, + size_t NumMetadataEntries, const pi_device_binary_property *Metadata, + pi_int32 *BinaryStatus, pi_program *Program) { + return pi2ur::piProgramCreateWithBinary(Context, NumDevices, DeviceList, + Lengths, Binaries, NumMetadataEntries, + Metadata, BinaryStatus, Program); +} - pi_result err = PI_SUCCESS; - if (platVer >= OCLV::V2_1) { +pi_result piextMemImageCreateWithNativeHandle( + pi_native_handle NativeHandle, pi_context Context, bool OwnNativeHandle, + const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, + pi_mem *Img) { + return pi2ur::piextMemImageCreateWithNativeHandle( + NativeHandle, Context, OwnNativeHandle, ImageFormat, ImageDesc, Img); +} - /* Make sure all devices support CL 2.1 or newer as well. */ - for (cl_device_id dev : devicesInCtx) { - OCLV::OpenCLVersion devVer; +pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piProgramGetInfo(Program, ParamName, ParamValueSize, ParamValue, + ParamValueSizeRet); +} - ret_err = getDeviceVersion(dev, devVer); - CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT); +pi_result piProgramLink(pi_context Context, pi_uint32 NumDevices, + const pi_device *DeviceList, const char *Options, + pi_uint32 NumInputPrograms, + const pi_program *InputPrograms, + void (*PFnNotify)(pi_program Program, void *UserData), + void *UserData, pi_program *RetProgram) { + return pi2ur::piProgramLink(Context, NumDevices, DeviceList, Options, + NumInputPrograms, InputPrograms, PFnNotify, + UserData, RetProgram); +} - /* If the device does not support CL 2.1 or greater, we need to make sure - * it supports the cl_khr_il_program extension. - */ - if (devVer < OCLV::V2_1) { - bool supported = false; +pi_result piProgramCompile( + pi_program Program, pi_uint32 NumDevices, const pi_device *DeviceList, + const char *Options, pi_uint32 NumInputHeaders, + const pi_program *InputHeaders, const char **HeaderIncludeNames, + void (*PFnNotify)(pi_program Program, void *UserData), void *UserData) { + return pi2ur::piProgramCompile(Program, NumDevices, DeviceList, Options, + NumInputHeaders, InputHeaders, + HeaderIncludeNames, PFnNotify, UserData); +} - ret_err = checkDeviceExtensions(dev, {"cl_khr_il_program"}, supported); - CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT); +pi_result piProgramBuild(pi_program Program, pi_uint32 NumDevices, + const pi_device *DeviceList, const char *Options, + void (*PFnNotify)(pi_program Program, void *UserData), + void *UserData) { + return pi2ur::piProgramBuild(Program, NumDevices, DeviceList, Options, + PFnNotify, UserData); +} - if (!supported) - return cast(CL_INVALID_OPERATION); - } - } - if (res_program != nullptr) - *res_program = cast(clCreateProgramWithIL( - cast(context), il, length, cast(&err))); - return err; - } +pi_result piProgramGetBuildInfo(pi_program Program, pi_device Device, + pi_program_build_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piProgramGetBuildInfo(Program, Device, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} - /* If none of the devices conform with CL 2.1 or newer make sure they all - * support the cl_khr_il_program extension. - */ - for (cl_device_id dev : devicesInCtx) { - bool supported = false; +pi_result piProgramRetain(pi_program Program) { + return pi2ur::piProgramRetain(Program); +} - ret_err = checkDeviceExtensions(dev, {"cl_khr_il_program"}, supported); - CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT); +pi_result piProgramRelease(pi_program Program) { + return pi2ur::piProgramRelease(Program); +} - if (!supported) - return cast(CL_INVALID_OPERATION); - } +pi_result piextProgramGetNativeHandle(pi_program Program, + pi_native_handle *NativeHandle) { + return pi2ur::piextProgramGetNativeHandle(Program, NativeHandle); +} - using apiFuncT = - cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *); - apiFuncT funcPtr = - reinterpret_cast(clGetExtensionFunctionAddressForPlatform( - curPlatform, "clCreateProgramWithILKHR")); +pi_result piextProgramCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_context Context, + bool OwnNativeHandle, + pi_program *Program) { + return pi2ur::piextProgramCreateWithNativeHandle(NativeHandle, Context, + OwnNativeHandle, Program); +} - assert(funcPtr != nullptr); - if (res_program != nullptr) - *res_program = cast( - funcPtr(cast(context), il, length, cast(&err))); - else - err = PI_ERROR_INVALID_VALUE; +pi_result piKernelCreate(pi_program Program, const char *KernelName, + pi_kernel *RetKernel) { + return pi2ur::piKernelCreate(Program, KernelName, RetKernel); +} - return err; +pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, size_t ArgSize, + const void *ArgValue) { + return pi2ur::piKernelSetArg(Kernel, ArgIndex, ArgSize, ArgValue); } -pi_result piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_context, bool, - pi_program *piProgram) { - assert(piProgram != nullptr); - *piProgram = reinterpret_cast(nativeHandle); - return PI_SUCCESS; +pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex, + const pi_mem_obj_property *ArgProperties, + const pi_mem *ArgValue) { + return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgProperties, + ArgValue); } -pi_result piSamplerCreate(pi_context context, - const pi_sampler_properties *sampler_properties, - pi_sampler *result_sampler) { - // Initialize properties according to OpenCL 2.1 spec. - pi_result error_code; - pi_bool normalizedCoords = PI_TRUE; - pi_sampler_addressing_mode addressingMode = PI_SAMPLER_ADDRESSING_MODE_CLAMP; - pi_sampler_filter_mode filterMode = PI_SAMPLER_FILTER_MODE_NEAREST; - - // Unpack sampler properties - for (std::size_t i = 0; sampler_properties && sampler_properties[i] != 0; - ++i) { - if (sampler_properties[i] == PI_SAMPLER_INFO_NORMALIZED_COORDS) { - normalizedCoords = static_cast(sampler_properties[++i]); - } else if (sampler_properties[i] == PI_SAMPLER_INFO_ADDRESSING_MODE) { - addressingMode = - static_cast(sampler_properties[++i]); - } else if (sampler_properties[i] == PI_SAMPLER_INFO_FILTER_MODE) { - filterMode = static_cast(sampler_properties[++i]); - } else { - assert(false && "Cannot recognize sampler property"); - } - } - - // Always call OpenCL 1.0 API - *result_sampler = cast( - clCreateSampler(cast(context), normalizedCoords, - addressingMode, filterMode, cast(&error_code))); - return error_code; -} - -pi_result piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, - const pi_mem_obj_property *arg_properties, - const pi_mem *arg_value) { - std::ignore = arg_properties; - return cast( - clSetKernelArg(cast(kernel), cast(arg_index), - sizeof(arg_value), cast(arg_value))); -} - -pi_result piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index, - const pi_sampler *arg_value) { - return cast( - clSetKernelArg(cast(kernel), cast(arg_index), - sizeof(cl_sampler), cast(arg_value))); -} - -pi_result piextKernelCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_context, pi_program, bool, - pi_kernel *piKernel) { - assert(piKernel != nullptr); - *piKernel = reinterpret_cast(nativeHandle); - return PI_SUCCESS; +pi_result piextKernelSetArgSampler(pi_kernel Kernel, pi_uint32 ArgIndex, + const pi_sampler *ArgValue) { + return pi2ur::piextKernelSetArgSampler(Kernel, ArgIndex, ArgValue); } -// Function gets characters between delimeter's in str -// then checks if they are equal to the sub_str. -// returns true if there is at least one instance -// returns false if there are no instances of the name -static bool is_in_separated_string(const std::string &str, char delimiter, - const std::string &sub_str) { - size_t beg = 0; - size_t length = 0; - for (const auto &x : str) { - if (x == delimiter) { - if (str.substr(beg, length) == sub_str) - return true; - - beg += length + 1; - length = 0; - continue; - } - length++; - } - if (length != 0) - if (str.substr(beg, length) == sub_str) - return true; - - return false; -} - -pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program, - const char *func_name, - pi_uint64 *function_pointer_ret) { - - cl_context CLContext = nullptr; - cl_int ret_err = - clGetProgramInfo(cast(program), CL_PROGRAM_CONTEXT, - sizeof(CLContext), &CLContext, nullptr); - - if (ret_err != CL_SUCCESS) - return cast(ret_err); - - clGetDeviceFunctionPointer_fn FuncT = nullptr; - ret_err = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clGetDeviceFunctionPointerCache, - clGetDeviceFunctionPointerName, &FuncT); - - pi_result pi_ret_err = PI_SUCCESS; - - // Check if kernel name exists, to prevent opencl runtime throwing exception - // with cpu runtime - // TODO: Use fallback search method if extension does not exist once CPU - // runtime no longer throws exceptions and prints messages when given - // unavailable functions. - *function_pointer_ret = 0; - size_t Size; - cl_int Res = - clGetProgramInfo(cast(program), PI_PROGRAM_INFO_KERNEL_NAMES, - 0, nullptr, &Size); - if (Res != CL_SUCCESS) - return cast(Res); - - std::string ClResult(Size, ' '); - Res = - clGetProgramInfo(cast(program), PI_PROGRAM_INFO_KERNEL_NAMES, - ClResult.size(), &ClResult[0], nullptr); - if (Res != CL_SUCCESS) - return cast(Res); - - // Get rid of the null terminator and search for kernel_name - // If function cannot be found return error code to indicate it - // exists - ClResult.pop_back(); - if (!is_in_separated_string(ClResult, ';', func_name)) - return PI_ERROR_INVALID_KERNEL_NAME; - - pi_ret_err = PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE; - - // If clGetDeviceFunctionPointer is in list of extensions - if (FuncT) { - pi_ret_err = cast(FuncT(cast(device), - cast(program), func_name, - function_pointer_ret)); - // GPU runtime sometimes returns PI_ERROR_INVALID_ARG_VALUE if func address - // cannot be found even if kernel exits. As the kernel does exist return - // that the address is not available - if (pi_ret_err == CL_INVALID_ARG_VALUE) { - *function_pointer_ret = 0; - return PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE; - } - } - return pi_ret_err; -} - -pi_result piContextCreate(const pi_context_properties *properties, - pi_uint32 num_devices, const pi_device *devices, - void (*pfn_notify)(const char *errinfo, - const void *private_info, - size_t cb, void *user_data1), - void *user_data, pi_context *retcontext) { - pi_result ret = PI_ERROR_INVALID_OPERATION; - *retcontext = cast( - clCreateContext(properties, cast(num_devices), - cast(devices), pfn_notify, - user_data, cast(&ret))); - - return ret; -} - -pi_result piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_uint32 num_devices, - const pi_device *devices, - bool ownNativeHandle, - pi_context *piContext) { - (void)num_devices; - (void)devices; - (void)ownNativeHandle; - assert(piContext != nullptr); - assert(ownNativeHandle == false); - *piContext = reinterpret_cast(nativeHandle); - return PI_SUCCESS; +pi_result piKernelGetInfo(pi_kernel Kernel, pi_kernel_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piKernelGetInfo(Kernel, ParamName, ParamValueSize, ParamValue, + ParamValueSizeRet); } -pi_result piContextGetInfo(pi_context context, pi_context_info paramName, - size_t paramValueSize, void *paramValue, - size_t *paramValueSizeRet) { - switch (paramName) { - case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: - case PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT: - case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT: { - // 2D USM memops are not supported. - cl_bool result = false; - std::memcpy(paramValue, &result, sizeof(cl_bool)); - return PI_SUCCESS; - } - case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: - case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: - case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: - case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { - // These queries should be dealt with in context_impl.cpp by calling the - // queries of each device separately and building the intersection set. - setErrorMessage("These queries should have never come here.", - PI_ERROR_INVALID_ARG_VALUE); - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; - } - default: - cl_int result = clGetContextInfo( - cast(context), cast(paramName), - paramValueSize, paramValue, paramValueSizeRet); - return static_cast(result); - } -} - -pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, - void *host_ptr, pi_mem *ret_mem, - const pi_mem_properties *properties) { - pi_result ret_err = PI_ERROR_INVALID_OPERATION; - if (properties) { - // TODO: need to check if all properties are supported by OpenCL RT and - // ignore unsupported - clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cast(context); - // First we need to look up the function pointer - ret_err = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clCreateBufferWithPropertiesINTELCache, - clCreateBufferWithPropertiesName, &FuncPtr); - if (FuncPtr) { - *ret_mem = - cast(FuncPtr(CLContext, properties, cast(flags), - size, host_ptr, cast(&ret_err))); - return ret_err; - } - } - - *ret_mem = cast(clCreateBuffer(cast(context), - cast(flags), size, - host_ptr, cast(&ret_err))); - return ret_err; -} - -pi_result piMemImageCreate(pi_context context, pi_mem_flags flags, - const pi_image_format *image_format, - const pi_image_desc *image_desc, void *host_ptr, - pi_mem *ret_mem) { - pi_result ret_err = PI_ERROR_INVALID_OPERATION; - *ret_mem = cast( - clCreateImage(cast(context), cast(flags), - cast(image_format), - cast(image_desc), host_ptr, - cast(&ret_err))); - - return ret_err; -} - -pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, - pi_buffer_create_type buffer_create_type, - void *buffer_create_info, pi_mem *ret_mem) { - - pi_result ret_err = PI_ERROR_INVALID_OPERATION; - *ret_mem = cast( - clCreateSubBuffer(cast(buffer), cast(flags), - cast(buffer_create_type), - buffer_create_info, cast(&ret_err))); - return ret_err; -} - -pi_result piextMemCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_context context, - bool ownNativeHandle, pi_mem *piMem) { - (void)context; - (void)ownNativeHandle; - assert(piMem != nullptr); - *piMem = reinterpret_cast(nativeHandle); - return PI_SUCCESS; +pi_result piextMemImageAllocate(pi_context Context, pi_device Device, + pi_image_format *ImageFormat, + pi_image_desc *ImageDesc, + pi_image_mem_handle *RetMem) { + return pi2ur::piextMemImageAllocate(Context, Device, ImageFormat, ImageDesc, + RetMem); } -pi_result piextMemImageCreateWithNativeHandle( - pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, - const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, - pi_mem *Img) { - (void)context; - (void)ownNativeHandle; - (void)ImageFormat; - (void)ImageDesc; - assert(Img != nullptr); - *Img = reinterpret_cast(nativeHandle); - return PI_SUCCESS; +pi_result piextMemUnsampledImageCreate(pi_context Context, pi_device Device, + pi_image_mem_handle ImgMem, + pi_image_format *ImageFormat, + pi_image_desc *ImageDesc, pi_mem *RetMem, + pi_image_handle *RetHandle) { + return pi2ur::piextMemUnsampledImageCreate( + Context, Device, ImgMem, ImageFormat, ImageDesc, RetMem, RetHandle); } -pi_result piProgramCreateWithBinary( - pi_context context, pi_uint32 num_devices, const pi_device *device_list, - const size_t *lengths, const unsigned char **binaries, - size_t num_metadata_entries, const pi_device_binary_property *metadata, - pi_int32 *binary_status, pi_program *ret_program) { - (void)metadata; - (void)num_metadata_entries; - - pi_result ret_err = PI_ERROR_INVALID_OPERATION; - *ret_program = cast(clCreateProgramWithBinary( - cast(context), cast(num_devices), - cast(device_list), lengths, binaries, - cast(binary_status), cast(&ret_err))); - return ret_err; -} - -pi_result piProgramLink(pi_context context, pi_uint32 num_devices, - const pi_device *device_list, const char *options, - pi_uint32 num_input_programs, - const pi_program *input_programs, - void (*pfn_notify)(pi_program program, void *user_data), - void *user_data, pi_program *ret_program) { - - pi_result ret_err = PI_ERROR_INVALID_OPERATION; - *ret_program = cast( - clLinkProgram(cast(context), cast(num_devices), - cast(device_list), options, - cast(num_input_programs), - cast(input_programs), - cast(pfn_notify), user_data, - cast(&ret_err))); - return ret_err; -} - -pi_result piKernelCreate(pi_program program, const char *kernel_name, - pi_kernel *ret_kernel) { - - pi_result ret_err = PI_ERROR_INVALID_OPERATION; - *ret_kernel = cast(clCreateKernel( - cast(program), kernel_name, cast(&ret_err))); - return ret_err; -} - -pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device, - pi_kernel_group_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - if (kernel == nullptr) { - return PI_ERROR_INVALID_KERNEL; - } - - switch (param_name) { - case PI_KERNEL_GROUP_INFO_NUM_REGS: - return PI_ERROR_INVALID_VALUE; - default: - cl_int result = clGetKernelWorkGroupInfo( - cast(kernel), cast(device), - cast(param_name), param_value_size, - param_value, param_value_size_ret); - return static_cast(result); - } -} - -pi_result piKernelGetSubGroupInfo(pi_kernel kernel, pi_device device, - pi_kernel_sub_group_info param_name, - size_t input_value_size, - const void *input_value, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - (void)param_value_size; - size_t ret_val; - cl_int ret_err; - - std::shared_ptr implicit_input_value; - if (param_name == PI_KERNEL_MAX_SUB_GROUP_SIZE && !input_value) { - // OpenCL needs an input value for PI_KERNEL_MAX_SUB_GROUP_SIZE so if no - // value is given we use the max work item size of the device in the first - // dimention to avoid truncation of max sub-group size. - pi_uint32 max_dims = 0; - pi_result pi_ret_err = - piDeviceGetInfo(device, PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS, - sizeof(pi_uint32), &max_dims, nullptr); - if (pi_ret_err != PI_SUCCESS) - return pi_ret_err; - std::shared_ptr WGSizes{new size_t[max_dims]}; - pi_ret_err = - piDeviceGetInfo(device, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, - max_dims * sizeof(size_t), WGSizes.get(), nullptr); - if (pi_ret_err != PI_SUCCESS) - return pi_ret_err; - for (size_t i = 1; i < max_dims; ++i) - WGSizes.get()[i] = 1; - implicit_input_value = std::move(WGSizes); - input_value_size = max_dims * sizeof(size_t); - input_value = implicit_input_value.get(); - } - - ret_err = cast(clGetKernelSubGroupInfo( - cast(kernel), cast(device), - cast(param_name), input_value_size, input_value, - sizeof(size_t), &ret_val, param_value_size_ret)); - - if (ret_err == CL_INVALID_OPERATION) { - // clGetKernelSubGroupInfo returns CL_INVALID_OPERATION if the device does - // not support subgroups. - - if (param_name == PI_KERNEL_MAX_NUM_SUB_GROUPS) { - ret_val = 1; // Minimum required by SYCL 2020 spec - ret_err = CL_SUCCESS; - } else if (param_name == PI_KERNEL_COMPILE_NUM_SUB_GROUPS) { - ret_val = 0; // Not specified by kernel - ret_err = CL_SUCCESS; - } else if (param_name == PI_KERNEL_MAX_SUB_GROUP_SIZE) { - // Return the maximum sub group size for the device - size_t result_size = 0; - // Two calls to piDeviceGetInfo are needed: the first determines the size - // required to store the result, and the second returns the actual size - // values. - pi_result pi_ret_err = - piDeviceGetInfo(device, PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, 0, - nullptr, &result_size); - if (pi_ret_err != PI_SUCCESS) { - return pi_ret_err; - } - assert(result_size % sizeof(size_t) == 0); - std::vector result(result_size / sizeof(size_t)); - pi_ret_err = piDeviceGetInfo(device, PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, - result_size, result.data(), nullptr); - if (pi_ret_err != PI_SUCCESS) { - return pi_ret_err; - } - ret_val = *std::max_element(result.begin(), result.end()); - ret_err = CL_SUCCESS; - } else if (param_name == PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL) { - ret_val = 0; // Not specified by kernel - ret_err = CL_SUCCESS; - } - } - - if (ret_err != CL_SUCCESS) - return cast(ret_err); - - *(static_cast(param_value)) = static_cast(ret_val); - if (param_value_size_ret) - *param_value_size_ret = sizeof(uint32_t); - return PI_SUCCESS; +pi_result piextMemSampledImageCreate(pi_context Context, pi_device Device, + pi_image_mem_handle ImgMem, + pi_image_format *ImageFormat, + pi_image_desc *ImageDesc, + pi_sampler Sampler, pi_mem *RetMem, + pi_image_handle *RetHandle) { + return pi2ur::piextMemSampledImageCreate(Context, Device, ImgMem, ImageFormat, + ImageDesc, Sampler, RetMem, + RetHandle); } -pi_result piEventCreate(pi_context context, pi_event *ret_event) { +pi_result piextBindlessImageSamplerCreate( + pi_context Context, const pi_sampler_properties *SamplerProperties, + float MinMipmapLevelClamp, float MaxMipmapLevelClamp, float MaxAnisotropy, + pi_sampler *RetSampler) { + return pi2ur::piextBindlessImageSamplerCreate( + Context, SamplerProperties, MinMipmapLevelClamp, MaxMipmapLevelClamp, + MaxAnisotropy, RetSampler); +} - pi_result ret_err = PI_ERROR_INVALID_OPERATION; - auto *cl_err = cast(&ret_err); +pi_result piextMemMipmapGetLevel(pi_context Context, pi_device Device, + pi_image_mem_handle MipMem, unsigned int Level, + pi_image_mem_handle *RetMem) { + return pi2ur::piextMemMipmapGetLevel(Context, Device, MipMem, Level, RetMem); +} - cl_event e = clCreateUserEvent(cast(context), cl_err); - *ret_event = cast(e); - if (*cl_err != CL_SUCCESS) - return ret_err; - *cl_err = clSetUserEventStatus(e, CL_COMPLETE); - return ret_err; +pi_result piextMemImageFree(pi_context Context, pi_device Device, + pi_image_mem_handle MemoryHandle) { + return pi2ur::piextMemImageFree(Context, Device, MemoryHandle); } -pi_result piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_context context, - bool ownNativeHandle, - pi_event *piEvent) { - (void)context; - // TODO: ignore this, but eventually want to return error as unsupported - (void)ownNativeHandle; +pi_result piextMemMipmapFree(pi_context Context, pi_device Device, + pi_image_mem_handle MemoryHandle) { + return pi2ur::piextMemMipmapFree(Context, Device, MemoryHandle); +} - assert(piEvent != nullptr); - assert(nativeHandle); - assert(context); +pi_result +piextMemImageCopy(pi_queue Queue, void *DstPtr, void *SrcPtr, + const pi_image_format *ImageFormat, + const pi_image_desc *ImageDesc, + const pi_image_copy_flags Flags, pi_image_offset SrcOffset, + pi_image_offset DstOffset, pi_image_region CopyExtent, + pi_image_region HostExtent, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + return pi2ur::piextMemImageCopy(Queue, DstPtr, SrcPtr, ImageFormat, ImageDesc, + Flags, SrcOffset, DstOffset, CopyExtent, + HostExtent, NumEventsInWaitList, + EventWaitList, Event); +} - *piEvent = reinterpret_cast(nativeHandle); - return PI_SUCCESS; +pi_result piextMemUnsampledImageHandleDestroy(pi_context Context, + pi_device Device, + pi_image_handle Handle) { + return pi2ur::piextMemUnsampledImageHandleDestroy(Context, Device, Handle); } -pi_result piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, - pi_bool blocking_map, pi_map_flags map_flags, - size_t offset, size_t size, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event, void **ret_map) { +pi_result piextMemSampledImageHandleDestroy(pi_context Context, + pi_device Device, + pi_image_handle Handle) { + return pi2ur::piextMemSampledImageHandleDestroy(Context, Device, Handle); +} - pi_result ret_err = PI_ERROR_INVALID_OPERATION; - *ret_map = cast(clEnqueueMapBuffer( - cast(command_queue), cast(buffer), - cast(blocking_map), map_flags, offset, size, - cast(num_events_in_wait_list), - cast(event_wait_list), cast(event), - cast(&ret_err))); - return ret_err; +pi_result piextMemImageGetInfo(pi_image_mem_handle MemHandle, + pi_image_info ParamName, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextMemImageGetInfo(MemHandle, ParamName, ParamValue, + ParamValueSizeRet); } -// -// USM -// +pi_result piextMemImportOpaqueFD(pi_context Context, pi_device Device, + size_t Size, int FileDescriptor, + pi_interop_mem_handle *RetHandle) { + return pi2ur::piextMemImportOpaqueFD(Context, Device, Size, FileDescriptor, + RetHandle); +} -/// Allocates host memory accessible by the device. -/// -/// \param result_ptr contains the allocated memory -/// \param context is the pi_context -/// \param pi_usm_mem_properties are optional allocation properties -/// \param size_t is the size of the allocation -/// \param alignment is the desired alignment of the allocation -pi_result piextUSMHostAlloc(void **result_ptr, pi_context context, - pi_usm_mem_properties *properties, size_t size, - pi_uint32 alignment) { +pi_result piextMemMapExternalArray(pi_context Context, pi_device Device, + pi_image_format *ImageFormat, + pi_image_desc *ImageDesc, + pi_interop_mem_handle MemHandle, + pi_image_mem_handle *RetMem) { + return pi2ur::piextMemMapExternalArray(Context, Device, ImageFormat, + ImageDesc, MemHandle, RetMem); +} - void *Ptr = nullptr; - pi_result RetVal = PI_ERROR_INVALID_OPERATION; +pi_result piextMemReleaseInterop(pi_context Context, pi_device Device, + pi_interop_mem_handle ExtMem) { + return pi2ur::piextMemReleaseInterop(Context, Device, ExtMem); +} - // First we need to look up the function pointer - clHostMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cast(context); - RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clHostMemAllocINTELCache, clHostMemAllocName, - &FuncPtr); +pi_result +piextImportExternalSemaphoreOpaqueFD(pi_context Context, pi_device Device, + int FileDescriptor, + pi_interop_semaphore_handle *RetHandle) { + return pi2ur::piextImportExternalSemaphoreOpaqueFD(Context, Device, + FileDescriptor, RetHandle); +} - if (FuncPtr) { - Ptr = FuncPtr(CLContext, cast(properties), size, - alignment, cast(&RetVal)); - } +pi_result piextDestroyExternalSemaphore(pi_context Context, pi_device Device, + pi_interop_semaphore_handle SemHandle) { + return pi2ur::piextDestroyExternalSemaphore(Context, Device, SemHandle); +} - *result_ptr = Ptr; +pi_result piextWaitExternalSemaphore(pi_queue Queue, + pi_interop_semaphore_handle SemHandle, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piextWaitExternalSemaphore( + Queue, SemHandle, NumEventsInWaitList, EventWaitList, Event); +} - // ensure we aligned the allocation correctly - if (RetVal == PI_SUCCESS && alignment != 0) - assert(reinterpret_cast(*result_ptr) % alignment == 0 && - "allocation not aligned correctly"); +pi_result piextSignalExternalSemaphore(pi_queue Queue, + pi_interop_semaphore_handle SemHandle, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piextSignalExternalSemaphore( + Queue, SemHandle, NumEventsInWaitList, EventWaitList, Event); +} - return RetVal; +pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device, + pi_kernel_group_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piKernelGetGroupInfo(Kernel, Device, ParamName, ParamValueSize, + ParamValue, ParamValueSizeRet); } -/// Allocates device memory -/// -/// \param result_ptr contains the allocated memory -/// \param context is the pi_context -/// \param device is the device the memory will be allocated on -/// \param pi_usm_mem_properties are optional allocation properties -/// \param size_t is the size of the allocation -/// \param alignment is the desired alignment of the allocation -pi_result piextUSMDeviceAlloc(void **result_ptr, pi_context context, - pi_device device, - pi_usm_mem_properties *properties, size_t size, - pi_uint32 alignment) { - - void *Ptr = nullptr; - pi_result RetVal = PI_ERROR_INVALID_OPERATION; - - // First we need to look up the function pointer - clDeviceMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cast(context); - RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clDeviceMemAllocINTELCache, - clDeviceMemAllocName, &FuncPtr); - - if (FuncPtr) { - Ptr = FuncPtr(CLContext, cast(device), - cast(properties), size, alignment, - cast(&RetVal)); - } - - *result_ptr = Ptr; - - // ensure we aligned the allocation correctly - if (RetVal == PI_SUCCESS && alignment != 0) - assert(reinterpret_cast(*result_ptr) % alignment == 0 && - "allocation not aligned correctly"); - - return RetVal; -} - -/// Allocates memory accessible on both host and device -/// -/// \param result_ptr contains the allocated memory -/// \param context is the pi_context -/// \param device is the device the memory will be allocated on -/// \param pi_usm_mem_properties are optional allocation properties -/// \param size_t is the size of the allocation -/// \param alignment is the desired alignment of the allocation -pi_result piextUSMSharedAlloc(void **result_ptr, pi_context context, - pi_device device, - pi_usm_mem_properties *properties, size_t size, - pi_uint32 alignment) { - - void *Ptr = nullptr; - pi_result RetVal = PI_ERROR_INVALID_OPERATION; - - // First we need to look up the function pointer - clSharedMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cast(context); - RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clSharedMemAllocINTELCache, - clSharedMemAllocName, &FuncPtr); - - if (FuncPtr) { - Ptr = FuncPtr(cast(context), cast(device), - cast(properties), size, alignment, - cast(&RetVal)); - } - - *result_ptr = Ptr; - - assert(alignment == 0 || - (RetVal == PI_SUCCESS && - reinterpret_cast(*result_ptr) % alignment == 0)); - return RetVal; -} - -/// Frees allocated USM memory in a blocking manner -/// -/// \param context is the pi_context of the allocation -/// \param ptr is the memory to be freed -pi_result piextUSMFree(pi_context context, void *ptr) { - // Use a blocking free to avoid issues with indirect access from kernels that - // might be still running. - clMemBlockingFreeINTEL_fn FuncPtr = nullptr; +pi_result piKernelGetSubGroupInfo(pi_kernel Kernel, pi_device Device, + pi_kernel_sub_group_info ParamName, + size_t InputValueSize, const void *InputValue, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piKernelGetSubGroupInfo( + Kernel, Device, ParamName, InputValueSize, InputValue, ParamValueSize, + ParamValue, ParamValueSizeRet); +} - cl_context CLContext = cast(context); - pi_result RetVal = PI_ERROR_INVALID_OPERATION; - RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clMemBlockingFreeINTELCache, - clMemBlockingFreeName, &FuncPtr); +pi_result piKernelRetain(pi_kernel Kernel) { + return pi2ur::piKernelRetain(Kernel); +} - if (FuncPtr) { - RetVal = cast(FuncPtr(CLContext, ptr)); - } +pi_result piKernelRelease(pi_kernel Kernel) { + return pi2ur::piKernelRelease(Kernel); +} - return RetVal; +pi_result +piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, + const size_t *GlobalWorkOffset, + const size_t *GlobalWorkSize, const size_t *LocalWorkSize, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *OutEvent) { + return pi2ur::piEnqueueKernelLaunch( + Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize, + NumEventsInWaitList, EventWaitList, OutEvent); } -/// Sets up pointer arguments for CL kernels. An extra indirection -/// is required due to CL argument conventions. -/// -/// \param kernel is the kernel to be launched -/// \param arg_index is the index of the kernel argument -/// \param arg_size is the size in bytes of the argument (ignored in CL) -/// \param arg_value is the pointer argument -pi_result piextKernelSetArgPointer(pi_kernel kernel, pi_uint32 arg_index, - size_t arg_size, const void *arg_value) { - (void)arg_size; - - // Size is unused in CL as pointer args are passed by value. - - // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = clGetKernelInfo(cast(kernel), CL_KERNEL_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return cast(CLErr); - } - - clSetKernelArgMemPointerINTEL_fn FuncPtr = nullptr; - pi_result RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clSetKernelArgMemPointerINTELCache, - clSetKernelArgMemPointerName, &FuncPtr); - - if (FuncPtr) { - // OpenCL passes pointers by value not by reference - // This means we need to deref the arg to get the pointer value - auto PtrToPtr = reinterpret_cast(arg_value); - auto DerefPtr = reinterpret_cast(*PtrToPtr); - RetVal = - cast(FuncPtr(cast(kernel), arg_index, DerefPtr)); - } - - return RetVal; -} - -/// USM Memset API -/// -/// \param queue is the queue to submit to -/// \param ptr is the ptr to memset -/// \param value is value to set. It is interpreted as an 8-bit value and the -/// upper 24 bits are ignored -/// \param count is the size in bytes to memset -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, - size_t count, pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - - // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = - clGetCommandQueueInfo(cast(queue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return cast(CLErr); - } - - clEnqueueMemFillINTEL_fn FuncPtr = nullptr; - pi_result RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clEnqueueMemFillINTELCache, - clEnqueueMemFillName, &FuncPtr); - - if (FuncPtr) { - RetVal = cast(FuncPtr(cast(queue), ptr, &value, - 1, count, num_events_in_waitlist, - cast(events_waitlist), - cast(event))); - } - - return RetVal; -} - -/// USM Memcpy API -/// -/// \param queue is the queue to submit to -/// \param blocking is whether this operation should block the host -/// \param src_ptr is the data to be copied -/// \param dst_ptr is the location the data will be copied -/// \param size is number of bytes to copy -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr, - const void *src_ptr, size_t size, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - - // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = - clGetCommandQueueInfo(cast(queue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return cast(CLErr); - } - - clEnqueueMemcpyINTEL_fn FuncPtr = nullptr; - pi_result RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clEnqueueMemcpyINTELCache, - clEnqueueMemcpyName, &FuncPtr); - - if (FuncPtr) { - RetVal = cast( - FuncPtr(cast(queue), blocking, dst_ptr, src_ptr, size, - num_events_in_waitlist, cast(events_waitlist), - cast(event))); - } - - return RetVal; -} - -/// Hint to migrate memory to the device -/// -/// \param queue is the queue to submit to -/// \param ptr points to the memory to migrate -/// \param size is the number of bytes to migrate -/// \param flags is a bitfield used to specify memory migration options -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size, - pi_usm_migration_flags flags, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - (void)ptr; - (void)size; - - // flags is currently unused so fail if set - if (flags != 0) - return PI_ERROR_INVALID_VALUE; +pi_result piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_context Context, + pi_program Program, + bool OwnNativeHandle, + pi_kernel *Kernel) { + return pi2ur::piextKernelCreateWithNativeHandle( + NativeHandle, Context, Program, OwnNativeHandle, Kernel); +} - return cast(clEnqueueMarkerWithWaitList( - cast(queue), num_events_in_waitlist, - cast(events_waitlist), cast(event))); - - /* - // Use this once impls support it. - // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = - clGetCommandQueueInfo(cast(queue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return cast(CLErr); - } - - clEnqueueMigrateMemINTEL_fn FuncPtr; - pi_result Err = getExtFuncFromContext( - cast(CLContext), "clEnqueueMigrateMemINTEL", &FuncPtr); - - if (Err != PI_SUCCESS) { - RetVal = Err; - } else { - RetVal = cast(FuncPtr( - cast(queue), ptr, size, flags, num_events_in_waitlist, - reinterpret_cast(events_waitlist), - reinterpret_cast(event))); - } - */ -} - -/// USM Memadvise API -/// -/// \param queue is the queue to submit to -/// \param ptr is the data to be advised -/// \param length is the size in bytes of the meory to advise -/// \param advice is device specific advice -/// \param event is the event that represents this operation -// USM memadvise API to govern behavior of automatic migration mechanisms -pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, - size_t length, pi_mem_advice advice, - pi_event *event) { - (void)ptr; - (void)length; - (void)advice; - - return cast( - clEnqueueMarkerWithWaitList(cast(queue), 0, nullptr, - reinterpret_cast(event))); - - /* - // Change to use this once drivers support it. - - // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = clGetCommandQueueInfo(cast(queue), - CL_QUEUE_CONTEXT, - sizeof(cl_context), - &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return cast(CLErr); - } - - clEnqueueMemAdviseINTEL_fn FuncPtr; - pi_result Err = - getExtFuncFromContext( - cast(CLContext), "clEnqueueMemAdviseINTEL", &FuncPtr); - - if (Err != PI_SUCCESS) { - RetVal = Err; - } else { - RetVal = cast(FuncPtr(cast(queue), - ptr, length, advice, 0, nullptr, - reinterpret_cast(event))); - } - */ -} - -/// USM 2D Fill API -/// -/// \param queue is the queue to submit to -/// \param ptr is the ptr to fill -/// \param pattern is a pointer with the bytes of the pattern to set -/// \param pattern_size is the size in bytes of the pattern -/// \param pitch is the total width of the destination memory including padding -/// \param width is width in bytes of each row to fill -/// \param height is height the columns to fill -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueFill2D(pi_queue queue, void *ptr, - size_t pitch, size_t pattern_size, - const void *pattern, size_t width, - size_t height, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - std::ignore = queue; - std::ignore = ptr; - std::ignore = pitch; - std::ignore = pattern_size; - std::ignore = pattern; - std::ignore = width; - std::ignore = height; - std::ignore = num_events_in_waitlist; - std::ignore = events_waitlist; - std::ignore = event; - return PI_ERROR_INVALID_OPERATION; -} - -/// USM 2D Memset API -/// -/// \param queue is the queue to submit to -/// \param ptr is the ptr to memset -/// \param value contains the byte to set with -/// \param pitch is the total width of the destination memory including padding -/// \param width is width in bytes of each row to memset -/// \param height is height the columns to memset -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueMemset2D( - pi_queue queue, void *ptr, size_t pitch, int value, size_t width, - size_t height, pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, pi_event *event) { - std::ignore = queue; - std::ignore = ptr; - std::ignore = pitch; - std::ignore = value; - std::ignore = width; - std::ignore = height; - std::ignore = num_events_in_waitlist; - std::ignore = events_waitlist; - std::ignore = event; - return PI_ERROR_INVALID_OPERATION; -} - -/// USM 2D Memcpy API -/// -/// \param queue is the queue to submit to -/// \param blocking is whether this operation should block the host -/// \param dst_ptr is the location the data will be copied -/// \param dst_pitch is the total width of the destination memory including -/// padding -/// \param src_ptr is the data to be copied -/// \param dst_pitch is the total width of the source memory including padding -/// \param width is width in bytes of each row to be copied -/// \param height is height the columns to be copied -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueMemcpy2D( - pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch, - const void *src_ptr, size_t src_pitch, size_t width, size_t height, - pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, - pi_event *event) { - std::ignore = queue; - std::ignore = blocking; - std::ignore = dst_ptr; - std::ignore = dst_pitch; - std::ignore = src_ptr; - std::ignore = src_pitch; - std::ignore = width; - std::ignore = height; - std::ignore = num_events_in_waitlist; - std::ignore = events_waitlist; - std::ignore = event; - return PI_ERROR_INVALID_OPERATION; -} - -/// API to query information about USM allocated pointers -/// Valid Queries: -/// PI_MEM_ALLOC_TYPE returns host/device/shared pi_host_usm value -/// PI_MEM_ALLOC_BASE_PTR returns the base ptr of an allocation if -/// the queried pointer fell inside an allocation. -/// Result must fit in void * -/// PI_MEM_ALLOC_SIZE returns how big the queried pointer's -/// allocation is in bytes. Result is a size_t. -/// PI_MEM_ALLOC_DEVICE returns the pi_device this was allocated against -/// -/// \param context is the pi_context -/// \param ptr is the pointer to query -/// \param param_name is the type of query to perform -/// \param param_value_size is the size of the result in bytes -/// \param param_value is the result -/// \param param_value_ret is how many bytes were written -pi_result piextUSMGetMemAllocInfo(pi_context context, const void *ptr, - pi_mem_alloc_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - - clGetMemAllocInfoINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cast(context); - pi_result RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clGetMemAllocInfoINTELCache, - clGetMemAllocInfoName, &FuncPtr); - - if (FuncPtr) { - RetVal = cast(FuncPtr(cast(context), ptr, param_name, - param_value_size, param_value, - param_value_size_ret)); - } - - return RetVal; -} - -pi_result piextUSMImport(const void *ptr, size_t size, pi_context context) { - std::ignore = ptr; - std::ignore = size; - std::ignore = context; - return PI_SUCCESS; +pi_result piextKernelGetNativeHandle(pi_kernel Kernel, + pi_native_handle *NativeHandle) { + return pi2ur::piextKernelGetNativeHandle(Kernel, NativeHandle); } -pi_result piextUSMRelease(const void *ptr, pi_context context) { - std::ignore = ptr; - std::ignore = context; - return PI_SUCCESS; +pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { + return pi2ur::piEventCreate(Context, RetEvent); } -/// API for writing data from host to a device global variable. -/// -/// \param queue is the queue -/// \param program is the program containing the device global variable -/// \param name is the unique identifier for the device global variable -/// \param blocking_write is true if the write should block -/// \param count is the number of bytes to copy -/// \param offset is the byte offset into the device global variable to start -/// copying -/// \param src is a pointer to where the data must be copied from -/// \param num_events_in_wait_list is a number of events in the wait list -/// \param event_wait_list is the wait list -/// \param event is the resulting event -pi_result piextEnqueueDeviceGlobalVariableWrite( - pi_queue queue, pi_program program, const char *name, - pi_bool blocking_write, size_t count, size_t offset, const void *src, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event) { - cl_context Ctx = nullptr; - cl_int Res = - clGetCommandQueueInfo(cast(queue), CL_QUEUE_CONTEXT, - sizeof(Ctx), &Ctx, nullptr); - - if (Res != CL_SUCCESS) - return cast(Res); - - clEnqueueWriteGlobalVariable_fn F = nullptr; - Res = getExtFuncFromContext( - Ctx, ExtFuncPtrCache->clEnqueueWriteGlobalVariableCache, - clEnqueueWriteGlobalVariableName, &F); - - if (!F || Res != CL_SUCCESS) - return PI_ERROR_INVALID_OPERATION; - Res = F(cast(queue), cast(program), name, - blocking_write, count, offset, src, num_events_in_wait_list, - cast(event_wait_list), cast(event)); - return cast(Res); -} - -/// API reading data from a device global variable to host. -/// -/// \param queue is the queue -/// \param program is the program containing the device global variable -/// \param name is the unique identifier for the device global variable -/// \param blocking_read is true if the read should block -/// \param count is the number of bytes to copy -/// \param offset is the byte offset into the device global variable to start -/// copying -/// \param dst is a pointer to where the data must be copied to -/// \param num_events_in_wait_list is a number of events in the wait list -/// \param event_wait_list is the wait list -/// \param event is the resulting event -pi_result piextEnqueueDeviceGlobalVariableRead( - pi_queue queue, pi_program program, const char *name, pi_bool blocking_read, - size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - cl_context Ctx = nullptr; - cl_int Res = - clGetCommandQueueInfo(cast(queue), CL_QUEUE_CONTEXT, - sizeof(Ctx), &Ctx, nullptr); - - if (Res != CL_SUCCESS) - return cast(Res); - - clEnqueueReadGlobalVariable_fn F = nullptr; - Res = getExtFuncFromContext( - Ctx, ExtFuncPtrCache->clEnqueueReadGlobalVariableCache, - clEnqueueReadGlobalVariableName, &F); - - if (!F || Res != CL_SUCCESS) - return PI_ERROR_INVALID_OPERATION; - Res = F(cast(queue), cast(program), name, - blocking_read, count, offset, dst, num_events_in_wait_list, - cast(event_wait_list), cast(event)); - return cast(Res); -} - -pi_result piextEnqueueReadHostPipe(pi_queue queue, pi_program program, - const char *pipe_symbol, pi_bool blocking, - void *ptr, size_t size, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - cl_context CLContext; - cl_int CLErr = - clGetCommandQueueInfo(cast(queue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return cast(CLErr); - } - - clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; - pi_result RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clEnqueueReadHostPipeINTELCache, - clEnqueueReadHostPipeName, &FuncPtr); - - if (FuncPtr) { - RetVal = cast(FuncPtr( - cast(queue), cast(program), pipe_symbol, - blocking, ptr, size, num_events_in_waitlist, - cast(events_waitlist), cast(event))); - } - - return RetVal; -} - -pi_result piextEnqueueWriteHostPipe(pi_queue queue, pi_program program, - const char *pipe_symbol, pi_bool blocking, - void *ptr, size_t size, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - cl_context CLContext; - cl_int CLErr = - clGetCommandQueueInfo(cast(queue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return cast(CLErr); - } - - clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; - pi_result RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clEnqueueWriteHostPipeINTELCache, - clEnqueueWriteHostPipeName, &FuncPtr); - - if (FuncPtr) { - RetVal = cast(FuncPtr( - cast(queue), cast(program), pipe_symbol, - blocking, ptr, size, num_events_in_waitlist, - cast(events_waitlist), cast(event))); - } - - return RetVal; -} - -/// API to set attributes controlling kernel execution -/// -/// \param kernel is the pi kernel to execute -/// \param param_name is a pi_kernel_exec_info value that specifies the info -/// passed to the kernel -/// \param param_value_size is the size of the value in bytes -/// \param param_value is a pointer to the value to set for the kernel -/// -/// If param_name is PI_USM_INDIRECT_ACCESS, the value will be a ptr to -/// the pi_bool value PI_TRUE -/// If param_name is PI_USM_PTRS, the value will be an array of ptrs -pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info param_name, - size_t param_value_size, - const void *param_value) { - if (param_name == PI_USM_INDIRECT_ACCESS && - *(static_cast(param_value)) == PI_TRUE) { - return USMSetIndirectAccess(kernel); - } else { - return cast(clSetKernelExecInfo( - cast(kernel), param_name, param_value_size, param_value)); - } -} - -pi_result piextProgramSetSpecializationConstant(pi_program prog, - pi_uint32 spec_id, - size_t spec_size, - const void *spec_value) { - cl_program ClProg = cast(prog); - cl_context Ctx = nullptr; - size_t RetSize = 0; - cl_int Res = - clGetProgramInfo(ClProg, CL_PROGRAM_CONTEXT, sizeof(Ctx), &Ctx, &RetSize); - - if (Res != CL_SUCCESS) - return cast(Res); - - clSetProgramSpecializationConstant_fn F = nullptr; - Res = getExtFuncFromContext( - Ctx, ExtFuncPtrCache->clSetProgramSpecializationConstantCache, - clSetProgramSpecializationConstantName, &F); - - if (!F || Res != CL_SUCCESS) - return PI_ERROR_INVALID_OPERATION; - Res = F(ClProg, spec_id, spec_size, spec_value); - return cast(Res); -} - -/// Common API for getting the native handle of a PI object -/// -/// \param piObj is the pi object to get the native handle of -/// \param nativeHandle is a pointer to be set to the native handle -/// -/// PI_SUCCESS -static pi_result piextGetNativeHandle(void *piObj, - pi_native_handle *nativeHandle) { - assert(nativeHandle != nullptr); - *nativeHandle = reinterpret_cast(piObj); - return PI_SUCCESS; +pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piEventGetInfo(Event, ParamName, ParamValueSize, ParamValue, + ParamValueSizeRet); } -pi_result piextPlatformGetNativeHandle(pi_platform platform, - pi_native_handle *nativeHandle) { - return piextGetNativeHandle(platform, nativeHandle); +pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piEventGetProfilingInfo(Event, ParamName, ParamValueSize, + ParamValue, ParamValueSizeRet); } -pi_result piextDeviceGetNativeHandle(pi_device device, - pi_native_handle *nativeHandle) { - return piextGetNativeHandle(device, nativeHandle); +pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) { + return pi2ur::piEventsWait(NumEvents, EventList); } -pi_result piextContextGetNativeHandle(pi_context context, - pi_native_handle *nativeHandle) { - return piextGetNativeHandle(context, nativeHandle); +pi_result piEventSetCallback(pi_event Event, pi_int32 CommandExecCallbackType, + void (*PFnNotify)(pi_event Event, + pi_int32 EventCommandStatus, + void *UserData), + void *UserData) { + return pi2ur::piEventSetCallback(Event, CommandExecCallbackType, PFnNotify, + UserData); } -pi_result piextQueueGetNativeHandle(pi_queue queue, - pi_native_handle *nativeHandle, - int32_t *nativeHandleDesc) { - *nativeHandleDesc = 0; - return piextGetNativeHandle(queue, nativeHandle); +pi_result piEventSetStatus(pi_event Event, pi_int32 ExecutionStatus) { + return pi2ur::piEventSetStatus(Event, ExecutionStatus); } -pi_result piextMemGetNativeHandle(pi_mem mem, pi_native_handle *nativeHandle) { - return piextGetNativeHandle(mem, nativeHandle); +pi_result piEventRetain(pi_event Event) { return pi2ur::piEventRetain(Event); } + +pi_result piEventRelease(pi_event Event) { + return pi2ur::piEventRelease(Event); } -pi_result piextProgramGetNativeHandle(pi_program program, - pi_native_handle *nativeHandle) { - return piextGetNativeHandle(program, nativeHandle); +pi_result piextEventGetNativeHandle(pi_event Event, + pi_native_handle *NativeHandle) { + return pi2ur::piextEventGetNativeHandle(Event, NativeHandle); } -pi_result piextKernelGetNativeHandle(pi_kernel kernel, - pi_native_handle *nativeHandle) { - return piextGetNativeHandle(kernel, nativeHandle); +pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_context Context, + bool OwnNativeHandle, + pi_event *Event) { + return pi2ur::piextEventCreateWithNativeHandle(NativeHandle, Context, + OwnNativeHandle, Event); } -// command-buffer extension -pi_result piextCommandBufferCreate(pi_context context, pi_device device, - const pi_ext_command_buffer_desc *desc, - pi_ext_command_buffer *ret_command_buffer) { - (void)context; - (void)device; - (void)desc; - (void)ret_command_buffer; +pi_result piSamplerCreate(pi_context Context, + const pi_sampler_properties *SamplerProperties, + pi_sampler *RetSampler) { + return pi2ur::piSamplerCreate(Context, SamplerProperties, RetSampler); +} - // Not implemented - return PI_ERROR_INVALID_OPERATION; +pi_result piSamplerGetInfo(pi_sampler Sampler, pi_sampler_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piSamplerGetInfo(Sampler, ParamName, ParamValueSize, ParamValue, + ParamValueSizeRet); } -pi_result piextCommandBufferRetain(pi_ext_command_buffer command_buffer) { - (void)command_buffer; +pi_result piSamplerRetain(pi_sampler Sampler) { + return pi2ur::piSamplerRetain(Sampler); +} - // Not implemented - return PI_ERROR_INVALID_OPERATION; +pi_result piSamplerRelease(pi_sampler Sampler) { + return pi2ur::piSamplerRelease(Sampler); } -pi_result piextCommandBufferRelease(pi_ext_command_buffer command_buffer) { - (void)command_buffer; +pi_result piEnqueueEventsWait(pi_queue Queue, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *OutEvent) { + return pi2ur::piEnqueueEventsWait(Queue, NumEventsInWaitList, EventWaitList, + OutEvent); +} - // Not implemented - return PI_ERROR_INVALID_OPERATION; +pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *OutEvent) { + return pi2ur::piEnqueueEventsWaitWithBarrier(Queue, NumEventsInWaitList, + EventWaitList, OutEvent); } -pi_result piextCommandBufferFinalize(pi_ext_command_buffer command_buffer) { - (void)command_buffer; +pi_result piEnqueueMemBufferRead(pi_queue Queue, pi_mem Src, + pi_bool BlockingRead, size_t Offset, + size_t Size, void *Dst, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piEnqueueMemBufferRead(Queue, Src, BlockingRead, Offset, Size, + Dst, NumEventsInWaitList, EventWaitList, + Event); +} - // Not implemented - return PI_ERROR_INVALID_OPERATION; +pi_result piEnqueueMemBufferReadRect( + pi_queue Queue, pi_mem Buffer, pi_bool BlockingRead, + pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, + pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, + size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, + pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piEnqueueMemBufferReadRect( + Queue, Buffer, BlockingRead, BufferOffset, HostOffset, Region, + BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, + NumEventsInWaitList, EventWaitList, Event); } -pi_result piextCommandBufferNDRangeKernel( - pi_ext_command_buffer command_buffer, pi_kernel kernel, pi_uint32 work_dim, - const size_t *global_work_offset, const size_t *global_work_size, - const size_t *local_work_size, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - (void)command_buffer; - (void)kernel; - (void)work_dim; - (void)global_work_offset; - (void)global_work_size; - (void)local_work_size; - (void)num_sync_points_in_wait_list; - (void)sync_point_wait_list; - (void)sync_point; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; +pi_result piEnqueueMemBufferWrite(pi_queue Queue, pi_mem Buffer, + pi_bool BlockingWrite, size_t Offset, + size_t Size, const void *Ptr, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piEnqueueMemBufferWrite(Queue, Buffer, BlockingWrite, Offset, + Size, Ptr, NumEventsInWaitList, + EventWaitList, Event); +} + +pi_result piEnqueueMemBufferWriteRect( + pi_queue Queue, pi_mem Buffer, pi_bool BlockingWrite, + pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, + pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, + size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, + pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piEnqueueMemBufferWriteRect( + Queue, Buffer, BlockingWrite, BufferOffset, HostOffset, Region, + BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, + NumEventsInWaitList, EventWaitList, Event); +} + +pi_result piEnqueueMemBufferCopy(pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, + size_t SrcOffset, size_t DstOffset, + size_t Size, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piEnqueueMemBufferCopy(Queue, SrcMem, DstMem, SrcOffset, + DstOffset, Size, NumEventsInWaitList, + EventWaitList, Event); +} + +pi_result piEnqueueMemBufferCopyRect( + pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, pi_buff_rect_offset SrcOrigin, + pi_buff_rect_offset DstOrigin, pi_buff_rect_region Region, + size_t SrcRowPitch, size_t SrcSlicePitch, size_t DstRowPitch, + size_t DstSlicePitch, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + + return pi2ur::piEnqueueMemBufferCopyRect( + Queue, SrcMem, DstMem, SrcOrigin, DstOrigin, Region, SrcRowPitch, + SrcSlicePitch, DstRowPitch, DstSlicePitch, NumEventsInWaitList, + EventWaitList, Event); +} + +pi_result piEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, + const void *Pattern, size_t PatternSize, + size_t Offset, size_t Size, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piEnqueueMemBufferFill(Queue, Buffer, Pattern, PatternSize, + Offset, Size, NumEventsInWaitList, + EventWaitList, Event); +} + +pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Mem, pi_bool BlockingMap, + pi_map_flags MapFlags, size_t Offset, + size_t Size, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *OutEvent, void **RetMap) { + return pi2ur::piEnqueueMemBufferMap(Queue, Mem, BlockingMap, MapFlags, Offset, + Size, NumEventsInWaitList, EventWaitList, + OutEvent, RetMap); +} + +pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem Mem, void *MappedPtr, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *OutEvent) { + return pi2ur::piEnqueueMemUnmap(Queue, Mem, MappedPtr, NumEventsInWaitList, + EventWaitList, OutEvent); +} + +pi_result piMemImageGetInfo(pi_mem Image, pi_image_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piMemImageGetInfo(Image, ParamName, ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result piEnqueueMemImageRead(pi_queue Queue, pi_mem Image, + pi_bool BlockingRead, pi_image_offset Origin, + pi_image_region Region, size_t RowPitch, + size_t SlicePitch, void *Ptr, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piEnqueueMemImageRead( + Queue, Image, BlockingRead, Origin, Region, RowPitch, SlicePitch, Ptr, + NumEventsInWaitList, EventWaitList, Event); +} + +pi_result piEnqueueMemImageWrite(pi_queue Queue, pi_mem Image, + pi_bool BlockingWrite, pi_image_offset Origin, + pi_image_region Region, size_t InputRowPitch, + size_t InputSlicePitch, const void *Ptr, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piEnqueueMemImageWrite( + Queue, Image, BlockingWrite, Origin, Region, InputRowPitch, + InputSlicePitch, Ptr, NumEventsInWaitList, EventWaitList, Event); } pi_result -piextCommandBufferMemcpyUSM(pi_ext_command_buffer command_buffer, void *dst_ptr, - const void *src_ptr, size_t size, - pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - (void)command_buffer; - (void)dst_ptr; - (void)src_ptr; - (void)size; - (void)num_sync_points_in_wait_list; - (void)sync_point_wait_list; - (void)sync_point; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; +piEnqueueMemImageCopy(pi_queue Queue, pi_mem SrcImage, pi_mem DstImage, + pi_image_offset SrcOrigin, pi_image_offset DstOrigin, + pi_image_region Region, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + return pi2ur::piEnqueueMemImageCopy(Queue, SrcImage, DstImage, SrcOrigin, + DstOrigin, Region, NumEventsInWaitList, + EventWaitList, Event); +} + +pi_result piEnqueueMemImageFill(pi_queue Queue, pi_mem Image, + const void *FillColor, const size_t *Origin, + const size_t *Region, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piEnqueueMemImageFill(Queue, Image, FillColor, Origin, Region, + NumEventsInWaitList, EventWaitList, + Event); +} + +pi_result piMemBufferPartition(pi_mem Buffer, pi_mem_flags Flags, + pi_buffer_create_type BufferCreateType, + void *BufferCreateInfo, pi_mem *RetMem) { + return pi2ur::piMemBufferPartition(Buffer, Flags, BufferCreateType, + BufferCreateInfo, RetMem); +} + +pi_result piextGetDeviceFunctionPointer(pi_device Device, pi_program Program, + const char *FunctionName, + pi_uint64 *FunctionPointerRet) { + return pi2ur::piextGetDeviceFunctionPointer(Device, Program, FunctionName, + FunctionPointerRet); +} + +pi_result piextUSMDeviceAlloc(void **ResultPtr, pi_context Context, + pi_device Device, + pi_usm_mem_properties *Properties, size_t Size, + pi_uint32 Alignment) { + return pi2ur::piextUSMDeviceAlloc(ResultPtr, Context, Device, Properties, + Size, Alignment); +} + +pi_result piextUSMSharedAlloc(void **ResultPtr, pi_context Context, + pi_device Device, + pi_usm_mem_properties *Properties, size_t Size, + pi_uint32 Alignment) { + return pi2ur::piextUSMSharedAlloc(ResultPtr, Context, Device, Properties, + Size, Alignment); +} + +pi_result piextUSMPitchedAlloc(void **ResultPtr, size_t *ResultPitch, + pi_context Context, pi_device Device, + pi_usm_mem_properties *Properties, + size_t WidthInBytes, size_t Height, + unsigned int ElementSizeBytes) { + return pi2ur::piextUSMPitchedAlloc(ResultPtr, ResultPitch, Context, Device, + Properties, WidthInBytes, Height, + ElementSizeBytes); +} + +pi_result piextUSMHostAlloc(void **ResultPtr, pi_context Context, + pi_usm_mem_properties *Properties, size_t Size, + pi_uint32 Alignment) { + return pi2ur::piextUSMHostAlloc(ResultPtr, Context, Properties, Size, + Alignment); +} + +pi_result piextUSMFree(pi_context Context, void *Ptr) { + return pi2ur::piextUSMFree(Context, Ptr); +} + +pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, + size_t ArgSize, const void *ArgValue) { + return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); +} + +pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, + size_t Count, pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, + pi_event *Event) { + return pi2ur::piextUSMEnqueueMemset( + Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +} + +pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, + const void *SrcPtr, size_t Size, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, + pi_event *Event) { + return pi2ur::piextUSMEnqueueMemcpy(Queue, Blocking, DstPtr, SrcPtr, Size, + NumEventsInWaitlist, EventsWaitlist, + Event); +} + +pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size, + pi_usm_migration_flags Flags, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *OutEvent) { + return pi2ur::piextUSMEnqueuePrefetch( + Queue, Ptr, Size, Flags, NumEventsInWaitList, EventWaitList, OutEvent); +} + +pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, + size_t Length, pi_mem_advice Advice, + pi_event *OutEvent) { + + return pi2ur::piextUSMEnqueueMemAdvise(Queue, Ptr, Length, Advice, OutEvent); +} + +pi_result piextUSMEnqueueFill2D(pi_queue Queue, void *Ptr, size_t Pitch, + size_t PatternSize, const void *Pattern, + size_t Width, size_t Height, + pi_uint32 NumEventsWaitList, + const pi_event *EventsWaitList, + pi_event *Event) { + return pi2ur::piextUSMEnqueueFill2D(Queue, Ptr, Pitch, PatternSize, Pattern, + Width, Height, NumEventsWaitList, + EventsWaitList, Event); +} + +pi_result piextUSMEnqueueMemset2D(pi_queue Queue, void *Ptr, size_t Pitch, + int Value, size_t Width, size_t Height, + pi_uint32 NumEventsWaitList, + const pi_event *EventsWaitlist, + pi_event *Event) { + return pi2ur::piextUSMEnqueueMemset2D(Queue, Ptr, Pitch, Value, Width, Height, + NumEventsWaitList, EventsWaitlist, + Event); +} + +pi_result piextUSMEnqueueMemcpy2D(pi_queue Queue, pi_bool Blocking, + void *DstPtr, size_t DstPitch, + const void *SrcPtr, size_t SrcPitch, + size_t Width, size_t Height, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventWaitlist, + pi_event *Event) { + return pi2ur::piextUSMEnqueueMemcpy2D( + Queue, Blocking, DstPtr, DstPitch, SrcPtr, SrcPitch, Width, Height, + NumEventsInWaitlist, EventWaitlist, Event); +} + +pi_result piextUSMGetMemAllocInfo(pi_context Context, const void *Ptr, + pi_mem_alloc_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextUSMGetMemAllocInfo(Context, Ptr, ParamName, ParamValueSize, + ParamValue, ParamValueSizeRet); +} + +pi_result piextUSMImport(const void *HostPtr, size_t Size, pi_context Context) { + return pi2ur::piextUSMImport(HostPtr, Size, Context); +} + +pi_result piextUSMRelease(const void *HostPtr, pi_context Context) { + return pi2ur::piextUSMRelease(HostPtr, Context); +} + +pi_result piextEnqueueDeviceGlobalVariableWrite( + pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingWrite, + size_t Count, size_t Offset, const void *Src, pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, pi_event *Event) { + return pi2ur::piextEnqueueDeviceGlobalVariableWrite( + Queue, Program, Name, BlockingWrite, Count, Offset, Src, + NumEventsInWaitList, EventsWaitList, Event); +} + +pi_result piextEnqueueDeviceGlobalVariableRead( + pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingRead, + size_t Count, size_t Offset, void *Dst, pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, pi_event *Event) { + + return pi2ur::piextEnqueueDeviceGlobalVariableRead( + Queue, Program, Name, BlockingRead, Count, Offset, Dst, + NumEventsInWaitList, EventsWaitList, Event); + + return PI_SUCCESS; +} + +pi_result piextEnqueueReadHostPipe(pi_queue Queue, pi_program Program, + const char *PipeSymbol, pi_bool Blocking, + void *Ptr, size_t Size, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *Event) { + return pi2ur::piextEnqueueReadHostPipe(Queue, Program, PipeSymbol, Blocking, + Ptr, Size, NumEventsInWaitList, + EventsWaitList, Event); +} + +pi_result piextEnqueueWriteHostPipe(pi_queue Queue, pi_program Program, + const char *PipeSymbol, pi_bool Blocking, + void *Ptr, size_t Size, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *Event) { + return pi2ur::piextEnqueueWriteHostPipe(Queue, Program, PipeSymbol, Blocking, + Ptr, Size, NumEventsInWaitList, + EventsWaitList, Event); +} + +pi_result piKernelSetExecInfo(pi_kernel Kernel, pi_kernel_exec_info ParamName, + size_t ParamValueSize, const void *ParamValue) { + + return pi2ur::piKernelSetExecInfo(Kernel, ParamName, ParamValueSize, + ParamValue); +} + +pi_result piextProgramSetSpecializationConstant(pi_program Prog, + pi_uint32 SpecID, size_t Size, + const void *SpecValue) { + return pi2ur::piextProgramSetSpecializationConstant(Prog, SpecID, Size, + SpecValue); +} + +pi_result piextCommandBufferCreate(pi_context Context, pi_device Device, + const pi_ext_command_buffer_desc *Desc, + pi_ext_command_buffer *RetCommandBuffer) { + return pi2ur::piextCommandBufferCreate(Context, Device, Desc, + RetCommandBuffer); +} + +pi_result piextCommandBufferRetain(pi_ext_command_buffer CommandBuffer) { + return pi2ur::piextCommandBufferRetain(CommandBuffer); +} + +pi_result piextCommandBufferRelease(pi_ext_command_buffer CommandBuffer) { + return pi2ur::piextCommandBufferRelease(CommandBuffer); +} + +pi_result piextCommandBufferFinalize(pi_ext_command_buffer CommandBuffer) { + return pi2ur::piextCommandBufferFinalize(CommandBuffer); +} + +pi_result piextCommandBufferNDRangeKernel( + pi_ext_command_buffer CommandBuffer, pi_kernel Kernel, pi_uint32 WorkDim, + const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, + const size_t *LocalWorkSize, pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferNDRangeKernel( + CommandBuffer, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, + LocalWorkSize, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); +} + +pi_result piextCommandBufferMemcpyUSM( + pi_ext_command_buffer CommandBuffer, void *DstPtr, const void *SrcPtr, + size_t Size, pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemcpyUSM(CommandBuffer, DstPtr, SrcPtr, Size, + NumSyncPointsInWaitList, + SyncPointWaitList, SyncPoint); } pi_result piextCommandBufferMemBufferCopy( - pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, - size_t src_offset, size_t dst_offset, size_t size, - pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - (void)command_buffer; - (void)src_buffer; - (void)dst_buffer; - (void)src_offset; - (void)dst_offset; - (void)size; - (void)num_sync_points_in_wait_list; - (void)sync_point_wait_list; - (void)sync_point; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; + pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, + size_t SrcOffset, size_t DstOffset, size_t Size, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemBufferCopy( + CommandBuffer, SrcMem, DstMem, SrcOffset, DstOffset, Size, + NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } pi_result piextCommandBufferMemBufferCopyRect( - pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, - pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, - pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, - size_t dst_row_pitch, size_t dst_slice_pitch, - pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - (void)command_buffer; - (void)src_buffer; - (void)dst_buffer; - (void)src_origin; - (void)dst_origin; - (void)region; - (void)src_row_pitch; - (void)src_slice_pitch; - (void)dst_row_pitch; - (void)dst_slice_pitch; - (void)num_sync_points_in_wait_list; - (void)sync_point_wait_list; - (void)sync_point; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; + pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, + pi_buff_rect_offset SrcOrigin, pi_buff_rect_offset DstOrigin, + pi_buff_rect_region Region, size_t SrcRowPitch, size_t SrcSlicePitch, + size_t DstRowPitch, size_t DstSlicePitch, pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemBufferCopyRect( + CommandBuffer, SrcMem, DstMem, SrcOrigin, DstOrigin, Region, SrcRowPitch, + SrcSlicePitch, DstRowPitch, DstSlicePitch, NumSyncPointsInWaitList, + SyncPointWaitList, SyncPoint); } pi_result piextCommandBufferMemBufferRead( - pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, - size_t size, void *dst, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - (void)command_buffer; - (void)buffer; - (void)offset; - (void)size; - (void)dst; - (void)num_sync_points_in_wait_list; - (void)sync_point_wait_list; - (void)sync_point; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; + pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, + size_t Size, void *Dst, pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemBufferRead( + CommandBuffer, Buffer, Offset, Size, Dst, NumSyncPointsInWaitList, + SyncPointWaitList, SyncPoint); } pi_result piextCommandBufferMemBufferReadRect( - pi_ext_command_buffer command_buffer, pi_mem buffer, - pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, - pi_buff_rect_region region, size_t buffer_row_pitch, - size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, - void *ptr, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - (void)command_buffer; - (void)buffer; - (void)buffer_offset; - (void)host_offset; - (void)region; - (void)buffer_row_pitch; - (void)buffer_slice_pitch; - (void)host_row_pitch; - (void)host_slice_pitch; - (void)ptr; - (void)num_sync_points_in_wait_list; - (void)sync_point_wait_list; - (void)sync_point; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; + pi_ext_command_buffer CommandBuffer, pi_mem Buffer, + pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, + pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, + size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemBufferReadRect( + CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch, + BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, + NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } pi_result piextCommandBufferMemBufferWrite( - pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, - size_t size, const void *ptr, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - (void)command_buffer; - (void)buffer; - (void)offset; - (void)size; - (void)ptr; - (void)num_sync_points_in_wait_list; - (void)sync_point_wait_list; - (void)sync_point; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; + pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, + size_t Size, const void *Ptr, pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemBufferWrite( + CommandBuffer, Buffer, Offset, Size, Ptr, NumSyncPointsInWaitList, + SyncPointWaitList, SyncPoint); } pi_result piextCommandBufferMemBufferWriteRect( - pi_ext_command_buffer command_buffer, pi_mem buffer, - pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, - pi_buff_rect_region region, size_t buffer_row_pitch, - size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, - const void *ptr, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - (void)command_buffer; - (void)buffer; - (void)buffer_offset; - (void)host_offset; - (void)region; - (void)buffer_row_pitch; - (void)buffer_slice_pitch; - (void)host_row_pitch; - (void)host_slice_pitch; - (void)ptr; - (void)num_sync_points_in_wait_list; - (void)sync_point_wait_list; - (void)sync_point; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; -} - -pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer command_buffer, - pi_queue queue, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { - (void)command_buffer; - (void)queue; - (void)num_events_in_wait_list; - (void)event_wait_list; - (void)event; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; -} - -// This API is called by Sycl RT to notify the end of the plugin lifetime. -// Windows: dynamically loaded plugins might have been unloaded already -// when this is called. Sycl RT holds onto the PI plugin so it can be -// called safely. But this is not transitive. If the PI plugin in turn -// dynamically loaded a different DLL, that may have been unloaded. -// TODO: add a global variable lifetime management code here (see -// pi_level_zero.cpp for reference). -pi_result piTearDown(void *PluginParameter) { - (void)PluginParameter; - delete ExtFuncPtrCache; - ExtFuncPtrCache = nullptr; - return PI_SUCCESS; + pi_ext_command_buffer CommandBuffer, pi_mem Buffer, + pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, + pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, + size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemBufferWriteRect( + CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch, + BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, + NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, - uint64_t *HostTime) { - OCLV::OpenCLVersion devVer, platVer; - cl_platform_id platform; - cl_device_id deviceID = cast(Device); - - // TODO: Cache OpenCL version for each device and platform - auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &platform, nullptr); - if (ret_err != CL_SUCCESS) { - return cast(ret_err); - } - - ret_err = getDeviceVersion(deviceID, devVer); - - if (ret_err != CL_SUCCESS) { - return cast(ret_err); - } - - ret_err = getPlatformVersion(platform, platVer); +pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, + pi_queue Queue, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piextEnqueueCommandBuffer( + CommandBuffer, Queue, NumEventsInWaitList, EventWaitList, Event); +} - if (platVer < OCLV::V2_1 || devVer < OCLV::V2_1) { - setErrorMessage( - "OpenCL version for device and/or platform is less than 2.1", - PI_ERROR_INVALID_OPERATION); - return PI_ERROR_INVALID_OPERATION; - } +pi_result piextPluginGetOpaqueData(void *opaque_data_param, + void **opaque_data_return) { + return pi2ur::piextPluginGetOpaqueData(opaque_data_param, opaque_data_return); +} - if (DeviceTime) { - uint64_t dummy; - clGetDeviceAndHostTimer(deviceID, DeviceTime, - HostTime == nullptr ? &dummy : HostTime); +pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, + uint64_t *HostTime) { + return pi2ur::piGetDeviceAndHostTimer(Device, DeviceTime, HostTime); +} - } else if (HostTime) { - clGetHostTimer(deviceID, HostTime); - } +pi_result piextEnablePeerAccess(pi_device command_device, + pi_device peer_device) { + return pi2ur::piextEnablePeerAccess(command_device, peer_device); +} - return PI_SUCCESS; +pi_result piextDisablePeerAccess(pi_device command_device, + pi_device peer_device) { + return pi2ur::piextDisablePeerAccess(command_device, peer_device); } -pi_result piEventGetInfo(pi_event event, pi_event_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - cl_int result = - clGetEventInfo(reinterpret_cast(event), param_name, - param_value_size, param_value, param_value_size_ret); - if (result == CL_SUCCESS && param_name == CL_EVENT_COMMAND_EXECUTION_STATUS) { - // If the CL_EVENT_COMMAND_EXECUTION_STATUS info value is CL_QUEUED, change - // it to CL_SUBMITTED. This change is needed since - // sycl::info::event::event_command_status has no equivalent to CL_QUEUED. - const auto param_value_int = static_cast(param_value); - if (*param_value_int == CL_QUEUED) { - *param_value_int = CL_SUBMITTED; - } - } - return static_cast(result); +pi_result piextPeerAccessGetInfo(pi_device command_device, + pi_device peer_device, pi_peer_attr attr, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextPeerAccessGetInfo(command_device, peer_device, attr, + ParamValueSize, ParamValue, + ParamValueSizeRet); } -const char SupportedVersion[] = _PI_OPENCL_PLUGIN_VERSION_STRING; +pi_result piTearDown(void *PluginParameter) { + return pi2ur::piTearDown(PluginParameter); +} pi_result piPluginInit(pi_plugin *PluginInit) { // Check that the major version matches in PiVersion and SupportedVersion @@ -2611,160 +1133,10 @@ pi_result piPluginInit(pi_plugin *PluginInit) { return PI_ERROR_INVALID_VALUE; strncpy(PluginInit->PluginVersion, SupportedVersion, PluginVersionSize); -#define _PI_CL(pi_api, ocl_api) \ - (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&ocl_api); - - // Platform - _PI_CL(piPlatformsGet, piPlatformsGet) - _PI_CL(piPlatformGetInfo, piPlatformGetInfo) - _PI_CL(piextPlatformGetNativeHandle, piextPlatformGetNativeHandle) - _PI_CL(piextPlatformCreateWithNativeHandle, - piextPlatformCreateWithNativeHandle) - // Device - _PI_CL(piDevicesGet, piDevicesGet) - _PI_CL(piDeviceGetInfo, piDeviceGetInfo) - _PI_CL(piDevicePartition, clCreateSubDevices) - _PI_CL(piDeviceRetain, clRetainDevice) - _PI_CL(piDeviceRelease, clReleaseDevice) - _PI_CL(piextDeviceSelectBinary, piextDeviceSelectBinary) - _PI_CL(piextGetDeviceFunctionPointer, piextGetDeviceFunctionPointer) - _PI_CL(piextDeviceGetNativeHandle, piextDeviceGetNativeHandle) - _PI_CL(piextDeviceCreateWithNativeHandle, piextDeviceCreateWithNativeHandle) - // Context - _PI_CL(piContextCreate, piContextCreate) - _PI_CL(piContextGetInfo, piContextGetInfo) - _PI_CL(piContextRetain, clRetainContext) - _PI_CL(piContextRelease, clReleaseContext) - _PI_CL(piextContextGetNativeHandle, piextContextGetNativeHandle) - _PI_CL(piextContextCreateWithNativeHandle, piextContextCreateWithNativeHandle) - // Queue - _PI_CL(piQueueCreate, piQueueCreate) - _PI_CL(piextQueueCreate, piextQueueCreate) - _PI_CL(piQueueGetInfo, piQueueGetInfo) - _PI_CL(piQueueFinish, clFinish) - _PI_CL(piQueueFlush, clFlush) - _PI_CL(piQueueRetain, clRetainCommandQueue) - _PI_CL(piQueueRelease, clReleaseCommandQueue) - _PI_CL(piextQueueGetNativeHandle, piextQueueGetNativeHandle) - _PI_CL(piextQueueCreateWithNativeHandle, piextQueueCreateWithNativeHandle) - // Memory - _PI_CL(piMemBufferCreate, piMemBufferCreate) - _PI_CL(piMemImageCreate, piMemImageCreate) - _PI_CL(piMemGetInfo, clGetMemObjectInfo) - _PI_CL(piMemImageGetInfo, clGetImageInfo) - _PI_CL(piMemRetain, clRetainMemObject) - _PI_CL(piMemRelease, clReleaseMemObject) - _PI_CL(piMemBufferPartition, piMemBufferPartition) - _PI_CL(piextMemGetNativeHandle, piextMemGetNativeHandle) - _PI_CL(piextMemCreateWithNativeHandle, piextMemCreateWithNativeHandle) - // Program - _PI_CL(piProgramCreate, piProgramCreate) - _PI_CL(piProgramCreateWithBinary, piProgramCreateWithBinary) - _PI_CL(piProgramGetInfo, clGetProgramInfo) - _PI_CL(piProgramCompile, clCompileProgram) - _PI_CL(piProgramBuild, clBuildProgram) - _PI_CL(piProgramLink, piProgramLink) - _PI_CL(piProgramGetBuildInfo, clGetProgramBuildInfo) - _PI_CL(piProgramRetain, clRetainProgram) - _PI_CL(piProgramRelease, clReleaseProgram) - _PI_CL(piextProgramSetSpecializationConstant, - piextProgramSetSpecializationConstant) - _PI_CL(piextProgramGetNativeHandle, piextProgramGetNativeHandle) - _PI_CL(piextProgramCreateWithNativeHandle, piextProgramCreateWithNativeHandle) - // Kernel - _PI_CL(piKernelCreate, piKernelCreate) - _PI_CL(piKernelSetArg, clSetKernelArg) - _PI_CL(piKernelGetInfo, clGetKernelInfo) - _PI_CL(piKernelGetGroupInfo, piKernelGetGroupInfo) - _PI_CL(piKernelGetSubGroupInfo, piKernelGetSubGroupInfo) - _PI_CL(piKernelRetain, clRetainKernel) - _PI_CL(piKernelRelease, clReleaseKernel) - _PI_CL(piKernelSetExecInfo, piKernelSetExecInfo) - _PI_CL(piextKernelSetArgPointer, piextKernelSetArgPointer) - _PI_CL(piextKernelCreateWithNativeHandle, piextKernelCreateWithNativeHandle) - _PI_CL(piextKernelGetNativeHandle, piextKernelGetNativeHandle) - // Event - _PI_CL(piEventCreate, piEventCreate) - _PI_CL(piEventGetInfo, piEventGetInfo) - _PI_CL(piEventGetProfilingInfo, clGetEventProfilingInfo) - _PI_CL(piEventsWait, clWaitForEvents) - _PI_CL(piEventSetCallback, clSetEventCallback) - _PI_CL(piEventSetStatus, clSetUserEventStatus) - _PI_CL(piEventRetain, clRetainEvent) - _PI_CL(piEventRelease, clReleaseEvent) - _PI_CL(piextEventGetNativeHandle, piextGetNativeHandle) - _PI_CL(piextEventCreateWithNativeHandle, piextEventCreateWithNativeHandle) - // Sampler - _PI_CL(piSamplerCreate, piSamplerCreate) - _PI_CL(piSamplerGetInfo, clGetSamplerInfo) - _PI_CL(piSamplerRetain, clRetainSampler) - _PI_CL(piSamplerRelease, clReleaseSampler) - // Queue commands - _PI_CL(piEnqueueKernelLaunch, clEnqueueNDRangeKernel) - _PI_CL(piEnqueueEventsWait, clEnqueueMarkerWithWaitList) - _PI_CL(piEnqueueEventsWaitWithBarrier, clEnqueueBarrierWithWaitList) - _PI_CL(piEnqueueMemBufferRead, clEnqueueReadBuffer) - _PI_CL(piEnqueueMemBufferReadRect, clEnqueueReadBufferRect) - _PI_CL(piEnqueueMemBufferWrite, clEnqueueWriteBuffer) - _PI_CL(piEnqueueMemBufferWriteRect, clEnqueueWriteBufferRect) - _PI_CL(piEnqueueMemBufferCopy, clEnqueueCopyBuffer) - _PI_CL(piEnqueueMemBufferCopyRect, clEnqueueCopyBufferRect) - _PI_CL(piEnqueueMemBufferFill, clEnqueueFillBuffer) - _PI_CL(piEnqueueMemImageRead, clEnqueueReadImage) - _PI_CL(piEnqueueMemImageWrite, clEnqueueWriteImage) - _PI_CL(piEnqueueMemImageCopy, clEnqueueCopyImage) - _PI_CL(piEnqueueMemImageFill, clEnqueueFillImage) - _PI_CL(piEnqueueMemBufferMap, piEnqueueMemBufferMap) - _PI_CL(piEnqueueMemUnmap, clEnqueueUnmapMemObject) - // USM - _PI_CL(piextUSMHostAlloc, piextUSMHostAlloc) - _PI_CL(piextUSMDeviceAlloc, piextUSMDeviceAlloc) - _PI_CL(piextUSMSharedAlloc, piextUSMSharedAlloc) - _PI_CL(piextUSMFree, piextUSMFree) - _PI_CL(piextUSMEnqueueMemset, piextUSMEnqueueMemset) - _PI_CL(piextUSMEnqueueMemcpy, piextUSMEnqueueMemcpy) - _PI_CL(piextUSMEnqueuePrefetch, piextUSMEnqueuePrefetch) - _PI_CL(piextUSMEnqueueMemAdvise, piextUSMEnqueueMemAdvise) - _PI_CL(piextUSMEnqueueFill2D, piextUSMEnqueueFill2D) - _PI_CL(piextUSMEnqueueMemset2D, piextUSMEnqueueMemset2D) - _PI_CL(piextUSMEnqueueMemcpy2D, piextUSMEnqueueMemcpy2D) - _PI_CL(piextUSMGetMemAllocInfo, piextUSMGetMemAllocInfo) - _PI_CL(piextUSMImport, piextUSMImport) - _PI_CL(piextUSMRelease, piextUSMRelease) - // Device global variable - _PI_CL(piextEnqueueDeviceGlobalVariableWrite, - piextEnqueueDeviceGlobalVariableWrite) - _PI_CL(piextEnqueueDeviceGlobalVariableRead, - piextEnqueueDeviceGlobalVariableRead) - // Host Pipe - _PI_CL(piextEnqueueReadHostPipe, piextEnqueueReadHostPipe) - _PI_CL(piextEnqueueWriteHostPipe, piextEnqueueWriteHostPipe) - - // command-buffer - _PI_CL(piextCommandBufferCreate, piextCommandBufferCreate) - _PI_CL(piextCommandBufferRetain, piextCommandBufferRetain) - _PI_CL(piextCommandBufferRelease, piextCommandBufferRelease) - _PI_CL(piextCommandBufferNDRangeKernel, piextCommandBufferNDRangeKernel) - _PI_CL(piextCommandBufferMemcpyUSM, piextCommandBufferMemcpyUSM) - _PI_CL(piextCommandBufferMemBufferCopy, piextCommandBufferMemBufferCopy) - _PI_CL(piextCommandBufferMemBufferCopyRect, - piextCommandBufferMemBufferCopyRect) - _PI_CL(piextCommandBufferMemBufferRead, piextCommandBufferMemBufferRead) - _PI_CL(piextCommandBufferMemBufferReadRect, - piextCommandBufferMemBufferReadRect) - _PI_CL(piextCommandBufferMemBufferWrite, piextCommandBufferMemBufferWrite) - _PI_CL(piextCommandBufferMemBufferWriteRect, - piextCommandBufferMemBufferWriteRect) - _PI_CL(piextEnqueueCommandBuffer, piextEnqueueCommandBuffer) - - _PI_CL(piextKernelSetArgMemObj, piextKernelSetArgMemObj) - _PI_CL(piextKernelSetArgSampler, piextKernelSetArgSampler) - _PI_CL(piPluginGetLastError, piPluginGetLastError) - _PI_CL(piTearDown, piTearDown) - _PI_CL(piGetDeviceAndHostTimer, piGetDeviceAndHostTimer) - _PI_CL(piPluginGetBackendOption, piPluginGetBackendOption) - -#undef _PI_CL +#define _PI_API(api) \ + (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api); +#include +#undef _PI_API return PI_SUCCESS; } diff --git a/sycl/plugins/opencl/pi_opencl.hpp b/sycl/plugins/opencl/pi_opencl.hpp index 54b1ad90abcaf..6894d5cb65d30 100644 --- a/sycl/plugins/opencl/pi_opencl.hpp +++ b/sycl/plugins/opencl/pi_opencl.hpp @@ -21,6 +21,9 @@ #include #include +// Share code between the PI Plugin and UR Adapter +#include + // This version should be incremented for any change made to this file or its // corresponding .cpp file. #define _PI_OPENCL_PLUGIN_VERSION 1 @@ -28,92 +31,4 @@ #define _PI_OPENCL_PLUGIN_VERSION_STRING \ _PI_PLUGIN_VERSION_STRING(_PI_OPENCL_PLUGIN_VERSION) -namespace OCLV { -class OpenCLVersion { -protected: - unsigned int ocl_major; - unsigned int ocl_minor; - -public: - OpenCLVersion() : ocl_major(0), ocl_minor(0) {} - - OpenCLVersion(unsigned int ocl_major, unsigned int ocl_minor) - : ocl_major(ocl_major), ocl_minor(ocl_minor) { - if (!isValid()) - ocl_major = ocl_minor = 0; - } - - OpenCLVersion(const char *version) : OpenCLVersion(std::string(version)) {} - - OpenCLVersion(const std::string &version) : ocl_major(0), ocl_minor(0) { - /* The OpenCL specification defines the full version string as - * 'OpenCL' for platforms and as - * 'OpenCL' for devices. - */ - std::regex rx("OpenCL ([0-9]+)\\.([0-9]+)"); - std::smatch match; - - if (std::regex_search(version, match, rx) && (match.size() == 3)) { - ocl_major = strtoul(match[1].str().c_str(), nullptr, 10); - ocl_minor = strtoul(match[2].str().c_str(), nullptr, 10); - - if (!isValid()) - ocl_major = ocl_minor = 0; - } - } - - bool operator==(const OpenCLVersion &v) const { - return ocl_major == v.ocl_major && ocl_minor == v.ocl_minor; - } - - bool operator!=(const OpenCLVersion &v) const { return !(*this == v); } - - bool operator<(const OpenCLVersion &v) const { - if (ocl_major == v.ocl_major) - return ocl_minor < v.ocl_minor; - - return ocl_major < v.ocl_major; - } - - bool operator>(const OpenCLVersion &v) const { return v < *this; } - - bool operator<=(const OpenCLVersion &v) const { - return (*this < v) || (*this == v); - } - - bool operator>=(const OpenCLVersion &v) const { - return (*this > v) || (*this == v); - } - - bool isValid() const { - switch (ocl_major) { - case 0: - return false; - case 1: - case 2: - return ocl_minor <= 2; - case UINT_MAX: - return false; - default: - return ocl_minor != UINT_MAX; - } - } - - int getMajor() const { return ocl_major; } - int getMinor() const { return ocl_minor; } -}; - -inline const OpenCLVersion V1_0(1, 0); -inline const OpenCLVersion V1_1(1, 1); -inline const OpenCLVersion V1_2(1, 2); -inline const OpenCLVersion V2_0(2, 0); -inline const OpenCLVersion V2_1(2, 1); -inline const OpenCLVersion V2_2(2, 2); -inline const OpenCLVersion V3_0(3, 0); - -} // namespace OCLV -struct _pi_ext_command_buffer {}; - #endif // PI_OPENCL_HPP diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index e0cee9dfe3e7d..e04e77dd6b4bb 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -12,7 +12,9 @@ if (NOT DEFINED UNIFIED_RUNTIME_LIBRARY OR NOT DEFINED UNIFIED_RUNTIME_INCLUDE_D # [UR][L0] Copy prebuilt L0 to avoid leaking shared folder path set(UNIFIED_RUNTIME_TAG b38855ed815ffd076bfde5e5e06170ca4f723dc1) - set(UR_BUILD_ADAPTER_L0 ON) + if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) + set(UR_BUILD_ADAPTER_L0 ON) + endif() if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS) set(UR_BUILD_ADAPTER_CUDA ON) @@ -71,27 +73,39 @@ target_include_directories(UnifiedRuntime-Headers find_package(Threads REQUIRED) -add_sycl_plugin(unified_runtime +set(UNIFIED_RUNTIME_PLUGIN_ARGS SOURCES # These are short-term shared with Unified Runtime - # The two plugins define a few things differrently so must + # The two plugins define a few things differently so must # be built separately. This difference is spelled in # their "ur_bindings.hpp" files. - # "ur_bindings.hpp" "pi2ur.hpp" "pi2ur.cpp" # These below belong to Unified Runtime PI Plugin only "pi_unified_runtime.hpp" "pi_unified_runtime.cpp" - LIBRARIES + LIBRARIES Threads::Threads UnifiedRuntimeLoader UnifiedRuntime-Headers - LevelZeroLoader-Headers # we need for #include in common.h ) -add_dependencies(sycl-runtime-libraries ur_adapter_level_zero) +# We need for #include in common.h +if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) + list(APPEND UNIFIED_RUNTIME_PLUGIN_ARGS LevelZeroLoader-Headers) +endif() + +if ("opencl" IN_LIST SYCL_ENABLE_PLUGINS) + list(APPEND UNIFIED_RUNTIME_PLUGIN_ARGS OpenCL-ICD) +endif() + +add_sycl_plugin(unified_runtime ${UNIFIED_RUNTIME_PLUGIN_ARGS}) + +if("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) + add_dependencies(sycl-runtime-libraries ur_adapter_level_zero) +endif() + if("hip" IN_LIST SYCL_ENABLE_PLUGINS) add_dependencies(sycl-runtime-libraries ur_adapter_hip) endif() @@ -100,6 +114,53 @@ if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS) add_dependencies(sycl-runtime-libraries ur_adapter_cuda) endif() +if ("opencl" IN_LIST SYCL_ENABLE_PLUGINS) + # Build OpenCL adapter + add_sycl_library("ur_adapter_opencl" SHARED + SOURCES + "ur/ur.hpp" + "ur/ur.cpp" + "ur/adapters/opencl/adapter.cpp" + "ur/adapters/opencl/adapter.hpp" + "ur/adapters/opencl/command_buffer.hpp" + "ur/adapters/opencl/command_buffer.cpp" + "ur/adapters/opencl/common.cpp" + "ur/adapters/opencl/common.hpp" + "ur/adapters/opencl/context.cpp" + "ur/adapters/opencl/context.hpp" + "ur/adapters/opencl/device.cpp" + "ur/adapters/opencl/device.hpp" + "ur/adapters/opencl/enqueue.cpp" + "ur/adapters/opencl/event.cpp" + "ur/adapters/opencl/image.cpp" + "ur/adapters/opencl/kernel.cpp" + "ur/adapters/opencl/memory.cpp" + "ur/adapters/opencl/platform.cpp" + "ur/adapters/opencl/platform.hpp" + "ur/adapters/opencl/program.cpp" + "ur/adapters/opencl/queue.cpp" + "ur/adapters/opencl/sampler.cpp" + "ur/adapters/opencl/ur_interface_loader.cpp" + "ur/adapters/opencl/usm.cpp" + "ur/adapters/opencl/usm_p2p.cpp" + INCLUDE_DIRS + ${sycl_inc_dir} + LIBRARIES + UnifiedRuntime-Headers + Threads::Threads + OpenCL-ICD + ) + + # Suppress a compiler message about undefined CL_TARGET_OPENCL_VERSION. + # Define all symbols up to OpenCL 3.0. + target_compile_definitions(ur_adapter_opencl PRIVATE CL_TARGET_OPENCL_VERSION=300) + + set_target_properties("ur_adapter_opencl" PROPERTIES + VERSION "0.0.0" + SOVERSION "0" + ) +endif() + if("native_cpu" IN_LIST SYCL_ENABLE_PLUGINS) add_sycl_library("ur_adapter_native_cpu" SHARED SOURCES @@ -160,8 +221,10 @@ if (TARGET UnifiedRuntimeLoader) endif() # Install the UR adapters too -install(TARGETS ur_adapter_level_zero - LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev - ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev - RUNTIME DESTINATION "bin" COMPONENT level-zero-sycl-dev +if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) + install(TARGETS ur_adapter_level_zero + LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev + ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev + RUNTIME DESTINATION "bin" COMPONENT level-zero-sycl-dev ) +endif() diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 8994d22559e77..69637713fc6ba 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -345,7 +345,7 @@ inline pi_result ur2piDeviceInfoValue(ur_device_info_t ParamName, /* Helper function to perform conversions in-place */ ConvertHelper Value(ParamValueSize, ParamValue, ParamValueSizeRet); - pi_result error = PI_SUCCESS; + pi_result Error = PI_SUCCESS; if (ParamName == UR_DEVICE_INFO_TYPE) { auto ConvertFunc = [](ur_device_type_t UrValue) { switch (UrValue) { @@ -437,25 +437,24 @@ inline pi_result ur2piDeviceInfoValue(ur_device_info_t ParamName, sizeof(ur_device_partition_property_t), PI_ERROR_UNKNOWN); - const uint32_t ur_number_elements = + const uint32_t UrNumberElements = *ParamValueSizeRet / sizeof(ur_device_partition_property_t); if (ParamValue) { - auto param_value_copy = - std::make_unique( - ur_number_elements); - std::memcpy(param_value_copy.get(), ParamValue, - ur_number_elements * sizeof(ur_device_partition_property_t)); + auto ParamValueCopy = + std::make_unique(UrNumberElements); + std::memcpy(ParamValueCopy.get(), ParamValue, + UrNumberElements * sizeof(ur_device_partition_property_t)); pi_device_partition_property *pValuePI = reinterpret_cast(ParamValue); ur_device_partition_property_t *pValueUR = reinterpret_cast( - param_value_copy.get()); - const ur_device_partition_t type = pValueUR->type; - *pValuePI = ConvertFunc(type); + ParamValueCopy.get()); + const ur_device_partition_t Type = pValueUR->type; + *pValuePI = ConvertFunc(Type); ++pValuePI; - for (uint32_t i = 0; i < ur_number_elements; ++i) { + for (uint32_t i = 0; i < UrNumberElements; ++i) { switch (pValueUR->type) { case UR_DEVICE_PARTITION_EQUALLY: { *pValuePI = pValueUR->value.equally; @@ -486,7 +485,7 @@ inline pi_result ur2piDeviceInfoValue(ur_device_info_t ParamName, /* Add 2 extra elements to the return value (one for the type at the * beginning and another to terminate the array with a 0 */ *ParamValueSizeRet = - (ur_number_elements + 2) * sizeof(pi_device_partition_property); + (UrNumberElements + 2) * sizeof(pi_device_partition_property); } } @@ -616,7 +615,7 @@ inline pi_result ur2piDeviceInfoValue(ur_device_info_t ParamName, (int)ParamValueSize, (int)*ParamValueSizeRet); die("ur2piDeviceInfoValue: size mismatch"); } - return error; + return Error; } inline pi_result ur2piSamplerInfoValue(ur_sampler_info_t ParamName, @@ -1498,9 +1497,7 @@ inline pi_result piextContextCreateWithNativeHandle( pi_native_handle NativeHandle, pi_uint32 NumDevices, const pi_device *Devices, bool OwnNativeHandle, pi_context *RetContext) { PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Devices, PI_ERROR_INVALID_DEVICE); PI_ASSERT(RetContext, PI_ERROR_INVALID_VALUE); - PI_ASSERT(NumDevices, PI_ERROR_INVALID_VALUE); ur_native_handle_t NativeContext = reinterpret_cast(NativeHandle); @@ -1509,8 +1506,9 @@ inline pi_result piextContextCreateWithNativeHandle( ur_context_handle_t *UrContext = reinterpret_cast(RetContext); - ur_context_native_properties_t Properties{}; - Properties.isNativeHandleOwned = OwnNativeHandle; + ur_context_native_properties_t Properties{ + UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES, nullptr, OwnNativeHandle}; + HANDLE_ERRORS(urContextCreateWithNativeHandle( NativeContext, NumDevices, UrDevices, &Properties, UrContext)); @@ -2215,8 +2213,6 @@ inline pi_result piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle, pi_context Context, pi_program Program, bool OwnNativeHandle, pi_kernel *Kernel) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); @@ -2462,7 +2458,6 @@ inline pi_result piProgramRelease(pi_program Program) { inline pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, size_t, const void *ArgValue) { ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - HANDLE_ERRORS(urKernelSetArgPointer(UrKernel, ArgIndex, nullptr, ArgValue)); return PI_SUCCESS; @@ -3302,8 +3297,8 @@ inline pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, /// /// \param queue is the queue to submit to /// \param ptr is the ptr to fill -/// \param pitch is the total width of the destination memory including padding -/// \param pattern is a pointer with the bytes of the pattern to set +/// \param pitch is the total width of the destination memory including +/// padding \param pattern is a pointer with the bytes of the pattern to set /// \param pattern_size is the size in bytes of the pattern /// \param width is width in bytes of each row to fill /// \param height is height the columns to fill diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.cpp new file mode 100644 index 0000000000000..88684e597f465 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.cpp @@ -0,0 +1,80 @@ +//===-------------- adapter.cpp - OpenCL Adapter ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" + +struct ur_adapter_handle_t_ { + std::atomic RefCount = 0; +}; + +ur_adapter_handle_t_ adapter{}; + +UR_APIEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t, + ur_loader_config_handle_t) { + cl_ext::ExtFuncPtrCache = new cl_ext::ExtFuncPtrCacheT(); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urTearDown(void *) { + if (cl_ext::ExtFuncPtrCache) { + delete cl_ext::ExtFuncPtrCache; + cl_ext::ExtFuncPtrCache = nullptr; + } + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, + uint32_t *pNumAdapters) { + if (NumEntries > 0 && phAdapters) { + *phAdapters = &adapter; + } + + if (pNumAdapters) { + *pNumAdapters = 1; + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { + ++adapter.RefCount; + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { + --adapter.RefCount; + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError( + ur_adapter_handle_t, const char **ppMessage, int32_t *pError) { + *ppMessage = cl_adapter::ErrorMessage; + *pError = cl_adapter::ErrorMessageCode; + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, + ur_adapter_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { + case UR_ADAPTER_INFO_BACKEND: + return ReturnValue(UR_ADAPTER_BACKEND_CUDA); + case UR_ADAPTER_INFO_REFERENCE_COUNT: + return ReturnValue(adapter.RefCount.load()); + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.hpp new file mode 100644 index 0000000000000..d0f530f3daf77 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.hpp @@ -0,0 +1,11 @@ +//===-------------- adapter.hpp - OpenCL Adapter ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +struct ur_adapter_handle_t_; + +extern ur_adapter_handle_t_ adapter; diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp new file mode 100644 index 0000000000000..6a942823fcc05 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp @@ -0,0 +1,196 @@ +//===--------- command_buffer.cpp - OpenCL Adapter ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "command_buffer.hpp" +#include "common.hpp" + +/// Stub implementations of UR experimental feature command-buffers + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] const ur_exp_command_buffer_desc_t *pCommandBufferDesc, + [[maybe_unused]] ur_exp_command_buffer_handle_t *phCommandBuffer) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_kernel_handle_t hKernel, + [[maybe_unused]] uint32_t workDim, + [[maybe_unused]] const size_t *pGlobalWorkOffset, + [[maybe_unused]] const size_t *pGlobalWorkSize, + [[maybe_unused]] const size_t *pLocalWorkSize, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] void *pDst, [[maybe_unused]] const void *pSrc, + [[maybe_unused]] size_t size, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_mem_handle_t hSrcMem, + [[maybe_unused]] ur_mem_handle_t hDstMem, [[maybe_unused]] size_t srcOffset, + [[maybe_unused]] size_t dstOffset, [[maybe_unused]] size_t size, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_mem_handle_t hSrcMem, + [[maybe_unused]] ur_mem_handle_t hDstMem, + [[maybe_unused]] ur_rect_offset_t srcOrigin, + [[maybe_unused]] ur_rect_offset_t dstOrigin, + [[maybe_unused]] ur_rect_region_t region, + [[maybe_unused]] size_t srcRowPitch, [[maybe_unused]] size_t srcSlicePitch, + [[maybe_unused]] size_t dstRowPitch, [[maybe_unused]] size_t dstSlicePitch, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT +ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] size_t offset, + [[maybe_unused]] size_t size, [[maybe_unused]] const void *pSrc, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT +ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] size_t offset, + [[maybe_unused]] size_t size, [[maybe_unused]] void *pDst, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT +ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_mem_handle_t hBuffer, + [[maybe_unused]] ur_rect_offset_t bufferOffset, + [[maybe_unused]] ur_rect_offset_t hostOffset, + [[maybe_unused]] ur_rect_region_t region, + [[maybe_unused]] size_t bufferRowPitch, + [[maybe_unused]] size_t bufferSlicePitch, + [[maybe_unused]] size_t hostRowPitch, + [[maybe_unused]] size_t hostSlicePitch, [[maybe_unused]] void *pSrc, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT +ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_mem_handle_t hBuffer, + [[maybe_unused]] ur_rect_offset_t bufferOffset, + [[maybe_unused]] ur_rect_offset_t hostOffset, + [[maybe_unused]] ur_rect_region_t region, + [[maybe_unused]] size_t bufferRowPitch, + [[maybe_unused]] size_t bufferSlicePitch, + [[maybe_unused]] size_t hostRowPitch, + [[maybe_unused]] size_t hostSlicePitch, [[maybe_unused]] void *pDst, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_queue_handle_t hQueue, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.hpp new file mode 100644 index 0000000000000..8375ee6d48a72 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.hpp @@ -0,0 +1,13 @@ +//===--------- command_buffer.hpp - OpenCL Adapter ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include + +/// Stub implementation of command-buffers for OpenCL + +struct ur_exp_command_buffer_handle_t_ {}; diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp new file mode 100644 index 0000000000000..9a57de1906792 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp @@ -0,0 +1,80 @@ +//===--------- common.hpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" + +namespace cl_adapter { + +/* Global variables for urPlatformGetLastError() */ +thread_local int32_t ErrorMessageCode = 0; +thread_local char ErrorMessage[MaxMessageSize]; + +[[maybe_unused]] void setErrorMessage(const char *Message, int32_t ErrorCode) { + assert(strlen(Message) <= cl_adapter::MaxMessageSize); + strcpy(cl_adapter::ErrorMessage, Message); + ErrorMessageCode = ErrorCode; +} +} // namespace cl_adapter + +ur_result_t mapCLErrorToUR(cl_int Result) { + switch (Result) { + case CL_SUCCESS: + return UR_RESULT_SUCCESS; + case CL_OUT_OF_HOST_MEMORY: + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + case CL_INVALID_VALUE: + case CL_INVALID_BUILD_OPTIONS: + return UR_RESULT_ERROR_INVALID_VALUE; + case CL_INVALID_PLATFORM: + return UR_RESULT_ERROR_INVALID_PLATFORM; + case CL_DEVICE_NOT_FOUND: + return UR_RESULT_ERROR_DEVICE_NOT_FOUND; + case CL_INVALID_OPERATION: + return UR_RESULT_ERROR_INVALID_OPERATION; + case CL_INVALID_ARG_VALUE: + return UR_RESULT_ERROR_INVALID_ARGUMENT; + case CL_INVALID_EVENT: + return UR_RESULT_ERROR_INVALID_EVENT; + case CL_INVALID_EVENT_WAIT_LIST: + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + case CL_INVALID_BINARY: + return UR_RESULT_ERROR_INVALID_BINARY; + case CL_INVALID_KERNEL_NAME: + return UR_RESULT_ERROR_INVALID_KERNEL_NAME; + case CL_BUILD_PROGRAM_FAILURE: + return UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE; + case CL_INVALID_WORK_GROUP_SIZE: + return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE; + case CL_INVALID_WORK_ITEM_SIZE: + return UR_RESULT_ERROR_INVALID_WORK_ITEM_SIZE; + case CL_INVALID_WORK_DIMENSION: + return UR_RESULT_ERROR_INVALID_WORK_DIMENSION; + case CL_OUT_OF_RESOURCES: + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + case CL_INVALID_MEM_OBJECT: + return UR_RESULT_ERROR_INVALID_MEM_OBJECT; + default: + return UR_RESULT_ERROR_UNKNOWN; + } +} + +void cl_adapter::die(const char *Message) { + std::cerr << "ur_die: " << Message << "\n"; + std::terminate(); +} + +/// Common API for getting the native handle of a UR object +/// +/// \param URObj is the UR object to get the native handle of +/// \param NativeHandle is a pointer to be set to the native handle +/// +/// UR_RESULT_SUCCESS +ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle) { + *NativeHandle = reinterpret_cast(URObj); + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp new file mode 100644 index 0000000000000..f78710d0dfea6 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp @@ -0,0 +1,327 @@ +//===--------- common.hpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +/** + * Call an OpenCL API and, if the result is not CL_SUCCESS, automatically map + * the OpenCL error to UR and return from the current function. + */ +#define CL_RETURN_ON_FAILURE(clCall) \ + if (const cl_int cl_result_macro = clCall; cl_result_macro != CL_SUCCESS) { \ + return mapCLErrorToUR(cl_result_macro); \ + } + +/** + * Call an UR API and, if the result is not UR_RESULT_SUCCESS, automatically + * return from the current function. + */ +#define UR_RETURN_ON_FAILURE(urCall) \ + if (const ur_result_t ur_result_macro = urCall; \ + ur_result_macro != UR_RESULT_SUCCESS) { \ + return ur_result_macro; \ + } + +/** + * Call an OpenCL API and, if the result is not CL_SUCCESS, automatically return + * from the current function and set the pointer `outPtr` to nullptr. The OpenCL + * error is mapped to UR + */ +#define CL_RETURN_ON_FAILURE_AND_SET_NULL(clCall, outPtr) \ + if (const cl_int cl_result_macro = clCall != CL_SUCCESS) { \ + if (outPtr != nullptr) { \ + *outPtr = nullptr; \ + } \ + return mapCLErrorToUR(cl_result_macro); \ + } + +namespace oclv { +class OpenCLVersion { +protected: + unsigned int OCLMajor; + unsigned int OCLMinor; + +public: + OpenCLVersion() : OCLMajor(0), OCLMinor(0) {} + + OpenCLVersion(unsigned int OclMajor, unsigned int OclMinor) + : OCLMajor(OclMajor), OCLMinor(OclMinor) { + if (!isValid()) { + OclMajor = OclMinor = 0; + } + } + + OpenCLVersion(const char *Version) : OpenCLVersion(std::string(Version)) {} + + OpenCLVersion(const std::string &Version) : OCLMajor(0), OCLMinor(0) { + /* The OpenCL specification defines the full version string as + * 'OpenCL' for platforms and as + * 'OpenCL' for devices. + */ + std::regex Rx("OpenCL ([0-9]+)\\.([0-9]+)"); + std::smatch Match; + + if (std::regex_search(Version, Match, Rx) && (Match.size() == 3)) { + OCLMajor = strtoul(Match[1].str().c_str(), nullptr, 10); + OCLMinor = strtoul(Match[2].str().c_str(), nullptr, 10); + + if (!isValid()) { + OCLMajor = OCLMinor = 0; + } + } + } + + bool operator==(const OpenCLVersion &V) const { + return OCLMajor == V.OCLMajor && OCLMinor == V.OCLMinor; + } + + bool operator!=(const OpenCLVersion &V) const { return !(*this == V); } + + bool operator<(const OpenCLVersion &V) const { + if (OCLMajor == V.OCLMajor) + return OCLMinor < V.OCLMinor; + + return OCLMajor < V.OCLMajor; + } + + bool operator>(const OpenCLVersion &V) const { return V < *this; } + + bool operator<=(const OpenCLVersion &V) const { + return (*this < V) || (*this == V); + } + + bool operator>=(const OpenCLVersion &V) const { + return (*this > V) || (*this == V); + } + + bool isValid() const { + switch (OCLMajor) { + case 0: + return false; + case 1: + case 2: + return OCLMinor <= 2; + case UINT_MAX: + return false; + default: + return OCLMinor != UINT_MAX; + } + } + + unsigned int getMajor() const { return OCLMajor; } + unsigned int getMinor() const { return OCLMinor; } +}; + +inline const OpenCLVersion V1_0(1, 0); +inline const OpenCLVersion V1_1(1, 1); +inline const OpenCLVersion V1_2(1, 2); +inline const OpenCLVersion V2_0(2, 0); +inline const OpenCLVersion V2_1(2, 1); +inline const OpenCLVersion V2_2(2, 2); +inline const OpenCLVersion V3_0(3, 0); + +} // namespace oclv + +namespace cl_adapter { +constexpr size_t MaxMessageSize = 256; +extern thread_local int32_t ErrorMessageCode; +extern thread_local char ErrorMessage[MaxMessageSize]; + +// Utility function for setting a message and warning +[[maybe_unused]] void setErrorMessage(const char *Message, + ur_result_t ErrorCode); + +[[noreturn]] void die(const char *Message); + +template To cast(From Value) { + + if constexpr (std::is_pointer_v) { + static_assert(std::is_pointer_v == std::is_pointer_v, + "Cast failed pointer check"); + return reinterpret_cast(Value); + } else { + static_assert(sizeof(From) == sizeof(To), "Cast failed size check"); + static_assert(std::is_signed_v == std::is_signed_v, + "Cast failed sign check"); + return static_cast(Value); + } +} +} // namespace cl_adapter + +namespace cl_ext { +// Older versions of GCC don't like "const" here +#if defined(__GNUC__) && (__GNUC__ < 7 || (__GNU__C == 7 && __GNUC_MINOR__ < 2)) +#define CONSTFIX constexpr +#else +#define CONSTFIX const +#endif + +// Names of USM functions that are queried from OpenCL +CONSTFIX char HostMemAllocName[] = "clHostMemAllocINTEL"; +CONSTFIX char DeviceMemAllocName[] = "clDeviceMemAllocINTEL"; +CONSTFIX char SharedMemAllocName[] = "clSharedMemAllocINTEL"; +CONSTFIX char MemBlockingFreeName[] = "clMemBlockingFreeINTEL"; +CONSTFIX char CreateBufferWithPropertiesName[] = + "clCreateBufferWithPropertiesINTEL"; +CONSTFIX char SetKernelArgMemPointerName[] = "clSetKernelArgMemPointerINTEL"; +CONSTFIX char EnqueueMemFillName[] = "clEnqueueMemFillINTEL"; +CONSTFIX char EnqueueMemcpyName[] = "clEnqueueMemcpyINTEL"; +CONSTFIX char GetMemAllocInfoName[] = "clGetMemAllocInfoINTEL"; +CONSTFIX char SetProgramSpecializationConstantName[] = + "clSetProgramSpecializationConstant"; +CONSTFIX char GetDeviceFunctionPointerName[] = + "clGetDeviceFunctionPointerINTEL"; +CONSTFIX char EnqueueWriteGlobalVariableName[] = + "clEnqueueWriteGlobalVariableINTEL"; +CONSTFIX char EnqueueReadGlobalVariableName[] = + "clEnqueueReadGlobalVariableINTEL"; +// Names of host pipe functions queried from OpenCL +CONSTFIX char EnqueueReadHostPipeName[] = "clEnqueueReadHostPipeINTEL"; +CONSTFIX char EnqueueWriteHostPipeName[] = "clEnqueueWriteHostPipeINTEL"; + +#undef CONSTFIX + +using clGetDeviceFunctionPointer_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_device_id device, cl_program program, + const char *FuncName, cl_ulong *ret_ptr); + +using clEnqueueWriteGlobalVariable_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool, + size_t, size_t, const void *, cl_uint, const cl_event *, + cl_event *); + +using clEnqueueReadGlobalVariable_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool, + size_t, size_t, void *, cl_uint, const cl_event *, + cl_event *); + +using clSetProgramSpecializationConstant_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_program program, cl_uint spec_id, size_t spec_size, + const void *spec_value); + +using clEnqueueReadHostPipeINTEL_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_queue queue, cl_program program, + const char *pipe_symbol, cl_bool blocking, void *ptr, + size_t size, cl_uint num_events_in_waitlist, + const cl_event *events_waitlist, cl_event *event); + +using clEnqueueWriteHostPipeINTEL_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_queue queue, cl_program program, + const char *pipe_symbol, cl_bool blocking, + const void *ptr, size_t size, + cl_uint num_events_in_waitlist, + const cl_event *events_waitlist, cl_event *event); + +template struct FuncPtrCache { + std::map Map; + std::mutex Mutex; +}; + +// FIXME: There's currently no mechanism for cleaning up this cache, meaning +// that it is invalidated whenever a context is destroyed. This could lead to +// reusing an invalid function pointer if another context happens to have the +// same native handle. +struct ExtFuncPtrCacheT { + FuncPtrCache clHostMemAllocINTELCache; + FuncPtrCache clDeviceMemAllocINTELCache; + FuncPtrCache clSharedMemAllocINTELCache; + FuncPtrCache clGetDeviceFunctionPointerCache; + FuncPtrCache + clCreateBufferWithPropertiesINTELCache; + FuncPtrCache clMemBlockingFreeINTELCache; + FuncPtrCache + clSetKernelArgMemPointerINTELCache; + FuncPtrCache clEnqueueMemFillINTELCache; + FuncPtrCache clEnqueueMemcpyINTELCache; + FuncPtrCache clGetMemAllocInfoINTELCache; + FuncPtrCache + clEnqueueWriteGlobalVariableCache; + FuncPtrCache clEnqueueReadGlobalVariableCache; + FuncPtrCache clEnqueueReadHostPipeINTELCache; + FuncPtrCache clEnqueueWriteHostPipeINTELCache; + FuncPtrCache + clSetProgramSpecializationConstantCache; +}; +// A raw pointer is used here since the lifetime of this map has to be tied to +// piTeardown to avoid issues with static destruction order (a user application +// might have static objects that indirectly access this cache in their +// destructor). +inline ExtFuncPtrCacheT *ExtFuncPtrCache; + +// USM helper function to get an extension function pointer +template +static ur_result_t getExtFuncFromContext(cl_context Context, + FuncPtrCache &FPtrCache, + const char *FuncName, T *Fptr) { + // TODO + // Potentially redo caching as UR interface changes. + // if cached, return cached FuncPtr + std::lock_guard CacheLock{FPtrCache.Mutex}; + std::map &FPtrMap = FPtrCache.Map; + auto It = FPtrMap.find(Context); + if (It != FPtrMap.end()) { + auto F = It->second; + // if cached that extension is not available return nullptr and + // UR_RESULT_ERROR_INVALID_VALUE + *Fptr = F; + return F ? UR_RESULT_SUCCESS : UR_RESULT_ERROR_INVALID_VALUE; + } + + cl_uint DeviceCount; + cl_int RetErr = clGetContextInfo(Context, CL_CONTEXT_NUM_DEVICES, + sizeof(cl_uint), &DeviceCount, nullptr); + + if (RetErr != CL_SUCCESS || DeviceCount < 1) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + + std::vector DevicesInCtx(DeviceCount); + RetErr = clGetContextInfo(Context, CL_CONTEXT_DEVICES, + DeviceCount * sizeof(cl_device_id), + DevicesInCtx.data(), nullptr); + + if (RetErr != CL_SUCCESS) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + + cl_platform_id CurPlatform; + RetErr = clGetDeviceInfo(DevicesInCtx[0], CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &CurPlatform, nullptr); + + if (RetErr != CL_SUCCESS) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + + T FuncPtr = reinterpret_cast( + clGetExtensionFunctionAddressForPlatform(CurPlatform, FuncName)); + + if (!FuncPtr) { + // Cache that the extension is not available + FPtrMap[Context] = nullptr; + return UR_RESULT_ERROR_INVALID_VALUE; + } + + *Fptr = FuncPtr; + FPtrMap[Context] = FuncPtr; + + return UR_RESULT_SUCCESS; +} +} // namespace cl_ext + +ur_result_t mapCLErrorToUR(cl_int Result); + +ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp new file mode 100644 index 0000000000000..ef678d0090fde --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp @@ -0,0 +1,135 @@ +//===--------- context.cpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "context.hpp" + +ur_result_t cl_adapter::getDevicesFromContext( + ur_context_handle_t hContext, + std::unique_ptr> &DevicesInCtx) { + + cl_uint DeviceCount; + CL_RETURN_ON_FAILURE(clGetContextInfo(cl_adapter::cast(hContext), + CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), + &DeviceCount, nullptr)); + + if (DeviceCount < 1) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + + DevicesInCtx = std::make_unique>(DeviceCount); + + CL_RETURN_ON_FAILURE(clGetContextInfo( + cl_adapter::cast(hContext), CL_CONTEXT_DEVICES, + DeviceCount * sizeof(cl_device_id), (*DevicesInCtx).data(), nullptr)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( + uint32_t DeviceCount, const ur_device_handle_t *phDevices, + const ur_context_properties_t *, ur_context_handle_t *phContext) { + + cl_int Ret; + *phContext = cl_adapter::cast( + clCreateContext(nullptr, cl_adapter::cast(DeviceCount), + cl_adapter::cast(phDevices), + nullptr, nullptr, cl_adapter::cast(&Ret))); + + return mapCLErrorToUR(Ret); +} + +static cl_int mapURContextInfoToCL(ur_context_info_t URPropName) { + + cl_int CLPropName; + switch (URPropName) { + case UR_CONTEXT_INFO_NUM_DEVICES: + CLPropName = CL_CONTEXT_NUM_DEVICES; + break; + case UR_CONTEXT_INFO_DEVICES: + CLPropName = CL_CONTEXT_DEVICES; + break; + case UR_CONTEXT_INFO_REFERENCE_COUNT: + CLPropName = CL_CONTEXT_REFERENCE_COUNT; + break; + default: + CLPropName = -1; + } + + return CLPropName; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, + size_t propSize, void *pPropValue, size_t *pPropSizeRet) { + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + const cl_int CLPropName = mapURContextInfoToCL(propName); + + switch (static_cast(propName)) { + /* 2D USM memops are not supported. */ + case UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: + case UR_CONTEXT_INFO_USM_FILL2D_SUPPORT: { + return ReturnValue(false); + } + case UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + /* These queries should be dealt with in context_impl.cpp by calling the + * queries of each device separately and building the intersection set. */ + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } + case UR_CONTEXT_INFO_NUM_DEVICES: + case UR_CONTEXT_INFO_DEVICES: + case UR_CONTEXT_INFO_REFERENCE_COUNT: { + + CL_RETURN_ON_FAILURE( + clGetContextInfo(cl_adapter::cast(hContext), CLPropName, + propSize, pPropValue, pPropSizeRet)); + return UR_RESULT_SUCCESS; + } + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL +urContextRelease(ur_context_handle_t hContext) { + + cl_int Ret = clReleaseContext(cl_adapter::cast(hContext)); + return mapCLErrorToUR(Ret); +} + +UR_APIEXPORT ur_result_t UR_APICALL +urContextRetain(ur_context_handle_t hContext) { + + cl_int Ret = clRetainContext(cl_adapter::cast(hContext)); + return mapCLErrorToUR(Ret); +} + +UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( + ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { + + *phNativeContext = reinterpret_cast(hContext); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( + ur_native_handle_t hNativeContext, uint32_t, const ur_device_handle_t *, + const ur_context_native_properties_t *, ur_context_handle_t *phContext) { + + *phContext = reinterpret_cast(hNativeContext); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_context_extended_deleter_t pfnDeleter, + [[maybe_unused]] void *pUserData) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp new file mode 100644 index 0000000000000..0581cd786539a --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp @@ -0,0 +1,16 @@ +//===--------- context.hpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +namespace cl_adapter { +ur_result_t +getDevicesFromContext(ur_context_handle_t hContext, + std::unique_ptr> &DevicesInCtx); +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp new file mode 100644 index 0000000000000..3fc6f5d491466 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp @@ -0,0 +1,1112 @@ +//===--------- device.hpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "device.hpp" +#include "common.hpp" +#include "platform.hpp" + +#include + +ur_result_t cl_adapter::getDeviceVersion(cl_device_id Dev, + oclv::OpenCLVersion &Version) { + + size_t DevVerSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Dev, CL_DEVICE_VERSION, 0, nullptr, &DevVerSize)); + + std::string DevVer(DevVerSize, '\0'); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_VERSION, DevVerSize, + DevVer.data(), nullptr)); + + Version = oclv::OpenCLVersion(DevVer); + if (!Version.isValid()) { + return UR_RESULT_ERROR_INVALID_DEVICE; + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t cl_adapter::checkDeviceExtensions( + cl_device_id Dev, const std::vector &Exts, bool &Supported) { + size_t ExtSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); + + std::string ExtStr(ExtSize, '\0'); + + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize, + ExtStr.data(), nullptr)); + + Supported = true; + for (const std::string &Ext : Exts) { + if (!(Supported = (ExtStr.find(Ext) != std::string::npos))) { + break; + } + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, + ur_device_type_t DeviceType, + uint32_t NumEntries, + ur_device_handle_t *phDevices, + uint32_t *pNumDevices) { + + cl_device_type Type; + switch (DeviceType) { + case UR_DEVICE_TYPE_ALL: + Type = CL_DEVICE_TYPE_ALL; + break; + case UR_DEVICE_TYPE_GPU: + Type = CL_DEVICE_TYPE_GPU; + break; + case UR_DEVICE_TYPE_CPU: + Type = CL_DEVICE_TYPE_CPU; + break; + case UR_DEVICE_TYPE_FPGA: + case UR_DEVICE_TYPE_MCA: + case UR_DEVICE_TYPE_VPU: + Type = CL_DEVICE_TYPE_ACCELERATOR; + break; + case UR_DEVICE_TYPE_DEFAULT: + Type = UR_DEVICE_TYPE_DEFAULT; + break; + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + cl_int Result = clGetDeviceIDs(cl_adapter::cast(hPlatform), + Type, cl_adapter::cast(NumEntries), + cl_adapter::cast(phDevices), + cl_adapter::cast(pNumDevices)); + + // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in num_devices + if (Result == CL_DEVICE_NOT_FOUND) { + Result = CL_SUCCESS; + if (pNumDevices) { + *pNumDevices = 0; + } + } + + return mapCLErrorToUR(Result); +} + +static ur_device_fp_capability_flags_t +mapCLDeviceFpConfigToUR(cl_device_fp_config CLValue) { + + ur_device_fp_capability_flags_t URValue = 0; + if (CLValue & CL_FP_DENORM) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_DENORM; + } + if (CLValue & CL_FP_INF_NAN) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN; + } + if (CLValue & CL_FP_ROUND_TO_NEAREST) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST; + } + if (CLValue & CL_FP_ROUND_TO_ZERO) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO; + } + if (CLValue & CL_FP_ROUND_TO_INF) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF; + } + if (CLValue & CL_FP_FMA) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_FMA; + } + if (CLValue & CL_FP_SOFT_FLOAT) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_SOFT_FLOAT; + } + if (CLValue & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; + } + + return URValue; +} + +static cl_int mapURDeviceInfoToCL(ur_device_info_t URPropName) { + + switch (static_cast(URPropName)) { + case UR_DEVICE_INFO_TYPE: + return CL_DEVICE_TYPE; + case UR_DEVICE_INFO_PARENT_DEVICE: + return CL_DEVICE_PARENT_DEVICE; + case UR_DEVICE_INFO_PLATFORM: + return CL_DEVICE_PLATFORM; + case UR_DEVICE_INFO_VENDOR_ID: + return CL_DEVICE_VENDOR_ID; + case UR_DEVICE_INFO_EXTENSIONS: + return CL_DEVICE_EXTENSIONS; + case UR_DEVICE_INFO_NAME: + return CL_DEVICE_NAME; + case UR_DEVICE_INFO_COMPILER_AVAILABLE: + return CL_DEVICE_COMPILER_AVAILABLE; + case UR_DEVICE_INFO_LINKER_AVAILABLE: + return CL_DEVICE_LINKER_AVAILABLE; + case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: + return CL_DEVICE_MAX_COMPUTE_UNITS; + case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: + return CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS; + case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE: + return CL_DEVICE_MAX_WORK_GROUP_SIZE; + case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: + return CL_DEVICE_MAX_WORK_ITEM_SIZES; + case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: + return CL_DEVICE_MAX_CLOCK_FREQUENCY; + case UR_DEVICE_INFO_ADDRESS_BITS: + return CL_DEVICE_ADDRESS_BITS; + case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: + return CL_DEVICE_MAX_MEM_ALLOC_SIZE; + case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: + return CL_DEVICE_GLOBAL_MEM_SIZE; + case UR_DEVICE_INFO_LOCAL_MEM_SIZE: + return CL_DEVICE_LOCAL_MEM_SIZE; + case UR_DEVICE_INFO_IMAGE_SUPPORTED: + return CL_DEVICE_IMAGE_SUPPORT; + case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: + return CL_DEVICE_HOST_UNIFIED_MEMORY; + case UR_DEVICE_INFO_AVAILABLE: + return CL_DEVICE_AVAILABLE; + case UR_DEVICE_INFO_VENDOR: + return CL_DEVICE_VENDOR; + case UR_DEVICE_INFO_DRIVER_VERSION: + return CL_DRIVER_VERSION; + case UR_DEVICE_INFO_VERSION: + return CL_DEVICE_VERSION; + case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: + return CL_DEVICE_PARTITION_MAX_SUB_DEVICES; + case UR_DEVICE_INFO_REFERENCE_COUNT: + return CL_DEVICE_REFERENCE_COUNT; + case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: + return CL_DEVICE_PARTITION_PROPERTIES; + case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: + return CL_DEVICE_PARTITION_AFFINITY_DOMAIN; + case UR_DEVICE_INFO_PARTITION_TYPE: + return CL_DEVICE_PARTITION_TYPE; + case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: + return CL_DEVICE_OPENCL_C_VERSION; + case UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: + return CL_DEVICE_PREFERRED_INTEROP_USER_SYNC; + case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: + return CL_DEVICE_PRINTF_BUFFER_SIZE; + case UR_DEVICE_INFO_PROFILE: + return CL_DEVICE_PROFILE; + case UR_DEVICE_INFO_BUILT_IN_KERNELS: + return CL_DEVICE_BUILT_IN_KERNELS; + case UR_DEVICE_INFO_QUEUE_PROPERTIES: + return CL_DEVICE_QUEUE_PROPERTIES; + case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: + return CL_DEVICE_QUEUE_ON_HOST_PROPERTIES; + case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: + return CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES; + case UR_DEVICE_INFO_EXECUTION_CAPABILITIES: + return CL_DEVICE_EXECUTION_CAPABILITIES; + case UR_DEVICE_INFO_ENDIAN_LITTLE: + return CL_DEVICE_ENDIAN_LITTLE; + case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: + return CL_DEVICE_ERROR_CORRECTION_SUPPORT; + case UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: + return CL_DEVICE_PROFILING_TIMER_RESOLUTION; + case UR_DEVICE_INFO_LOCAL_MEM_TYPE: + return CL_DEVICE_LOCAL_MEM_TYPE; + case UR_DEVICE_INFO_MAX_CONSTANT_ARGS: + return CL_DEVICE_MAX_CONSTANT_ARGS; + case UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: + return CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE; + case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: + return CL_DEVICE_GLOBAL_MEM_CACHE_TYPE; + case UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: + return CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE; + case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: + return CL_DEVICE_GLOBAL_MEM_CACHE_SIZE; + case UR_DEVICE_INFO_MAX_PARAMETER_SIZE: + return CL_DEVICE_MAX_PARAMETER_SIZE; + case UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: + return CL_DEVICE_MEM_BASE_ADDR_ALIGN; + case UR_DEVICE_INFO_MAX_SAMPLERS: + return CL_DEVICE_MAX_SAMPLERS; + case UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS: + return CL_DEVICE_MAX_READ_IMAGE_ARGS; + case UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: + return CL_DEVICE_MAX_WRITE_IMAGE_ARGS; + case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS: + return CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS; + case UR_DEVICE_INFO_SINGLE_FP_CONFIG: + return CL_DEVICE_SINGLE_FP_CONFIG; + case UR_DEVICE_INFO_HALF_FP_CONFIG: + return CL_DEVICE_HALF_FP_CONFIG; + case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: + return CL_DEVICE_DOUBLE_FP_CONFIG; + case UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH: + return CL_DEVICE_IMAGE2D_MAX_WIDTH; + case UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: + return CL_DEVICE_IMAGE2D_MAX_HEIGHT; + case UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH: + return CL_DEVICE_IMAGE3D_MAX_WIDTH; + case UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: + return CL_DEVICE_IMAGE3D_MAX_HEIGHT; + case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH: + return CL_DEVICE_IMAGE3D_MAX_DEPTH; + case UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: + return CL_DEVICE_IMAGE_MAX_BUFFER_SIZE; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: + return CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT: + return CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT: + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT: + return CL_DEVICE_NATIVE_VECTOR_WIDTH_INT; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT: + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG: + return CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG: + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT: + return CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT: + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: + return CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: + return CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF; + case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: + return CL_DEVICE_MAX_NUM_SUB_GROUPS; + case UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: + return CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS; + case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: + return CL_DEVICE_SUB_GROUP_SIZES_INTEL; + case UR_DEVICE_INFO_IL_VERSION: + return CL_DEVICE_IL_VERSION; + case UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: + return CL_DEVICE_IMAGE_MAX_ARRAY_SIZE; + case UR_DEVICE_INFO_USM_HOST_SUPPORT: + return CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL; + case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: + return CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL; + case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: + return CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL; + case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: + return CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL; + case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: + return CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL; + case UR_DEVICE_INFO_IP_VERSION: + return CL_DEVICE_IP_VERSION_INTEL; + default: + return -1; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, + ur_device_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + const cl_device_info CLPropName = mapURDeviceInfoToCL(propName); + + /* TODO UR: Casting to uint32_t to silence warnings due to some values not + * being part of the enum. Can be removed once all UR_EXT enums are promoted + * to UR */ + switch (static_cast(propName)) { + case UR_DEVICE_INFO_TYPE: { + cl_device_type CLType; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + sizeof(cl_device_type), &CLType, nullptr)); + + /* TODO UR: If the device is an Accelerator (FPGA, VPU, etc.), there is not + * enough information in the OpenCL runtime to know exactly which type it + * is. Assuming FPGA for now */ + /* TODO UR: In OpenCL, a device can have multiple types (e.g. CPU and GPU). + * We are potentially losing information by returning only one type */ + ur_device_type_t URDeviceType = UR_DEVICE_TYPE_DEFAULT; + if (CLType & CL_DEVICE_TYPE_CPU) { + URDeviceType = UR_DEVICE_TYPE_CPU; + } else if (CLType & CL_DEVICE_TYPE_GPU) { + URDeviceType = UR_DEVICE_TYPE_GPU; + } else if (CLType & CL_DEVICE_TYPE_ACCELERATOR) { + URDeviceType = UR_DEVICE_TYPE_FPGA; + } + + return ReturnValue(URDeviceType); + } + case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: { + oclv::OpenCLVersion Version; + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), Version)); + + const std::string Results = std::to_string(Version.getMajor()) + "." + + std::to_string(Version.getMinor()); + return ReturnValue(Results.c_str(), Results.size() + 1); + } + case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { + size_t CLSize; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, 0, + nullptr, &CLSize)); + const size_t NProperties = CLSize / sizeof(cl_device_partition_property); + + std::vector CLValue(NProperties); + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + CLSize, CLValue.data(), nullptr)); + + /* The OpenCL implementation returns a value of 0 if no properties are + * supported. UR will return a size of 0 for now. + */ + if (pPropSizeRet && CLValue[0] == 0) { + *pPropSizeRet = 0; + return UR_RESULT_SUCCESS; + } + + std::vector URValue{}; + for (size_t i = 0; i < NProperties; ++i) { + if (CLValue[i] != CL_DEVICE_PARTITION_BY_NAMES_INTEL && CLValue[i] != 0) { + URValue.push_back(static_cast(CLValue[i])); + } + } + return ReturnValue(URValue.data(), URValue.size()); + } + case UR_DEVICE_INFO_PARTITION_TYPE: { + + size_t CLSize; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, 0, + nullptr, &CLSize)); + const size_t NProperties = CLSize / sizeof(cl_device_partition_property); + + /* The OpenCL implementation returns either a size of 0 or a value of 0 if + * the device is not a sub-device. UR will return a size of 0 for now. + * TODO Ideally, this could become an error once PI is removed from SYCL RT + */ + if (pPropSizeRet && (CLSize == 0 || NProperties == 1)) { + *pPropSizeRet = 0; + return UR_RESULT_SUCCESS; + } + + auto CLValue = + reinterpret_cast(alloca(CLSize)); + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + CLSize, CLValue, nullptr)); + + std::vector URValue(NProperties - 1); + + /* OpenCL will always return exactly one partition type followed by one or + * more values. */ + for (uint32_t i = 0; i < URValue.size(); ++i) { + URValue[i].type = static_cast(CLValue[0]); + switch (URValue[i].type) { + case UR_DEVICE_PARTITION_EQUALLY: { + URValue[i].value.equally = CLValue[i + 1]; + break; + } + case UR_DEVICE_PARTITION_BY_COUNTS: { + URValue[i].value.count = CLValue[i + 1]; + break; + } + case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: { + URValue[i].value.affinity_domain = CLValue[i + 1]; + break; + } + default: { + return UR_RESULT_ERROR_UNKNOWN; + } + } + } + + return ReturnValue(URValue.data(), URValue.size()); + } + case UR_DEVICE_INFO_MAX_WORK_GROUPS_3D: { + /* Returns the maximum sizes of a work group for each dimension one could + * use to submit a kernel. There is no such query defined in OpenCL. So + * we'll return the maximum value. */ + static constexpr uint32_t MaxWorkItemDimensions = 3u; + static constexpr size_t Max = (std::numeric_limits::max)(); + + struct { + size_t sizes[MaxWorkItemDimensions]; + } ReturnSizes; + + ReturnSizes.sizes[0] = Max; + ReturnSizes.sizes[1] = Max; + ReturnSizes.sizes[2] = Max; + return ReturnValue(ReturnSizes); + } + case UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: { + return ReturnValue(static_cast(1u)); + } + case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: { + /* Corresponding OpenCL query is only available starting with OpenCL 2.1 + * and we have to emulate it on older OpenCL runtimes. */ + oclv::OpenCLVersion DevVer; + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), DevVer)); + + if (DevVer >= oclv::V2_1) { + cl_uint CLValue; + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl_adapter::cast(hDevice), CL_DEVICE_MAX_NUM_SUB_GROUPS, + sizeof(cl_uint), &CLValue, nullptr)); + + if (CLValue == 0u) { + /* OpenCL returns 0 if sub-groups are not supported, but SYCL 2020 + * spec says that minimum possible value is 1. */ + return ReturnValue(1u); + } else { + return ReturnValue(static_cast(CLValue)); + } + } else { + /* Otherwise, we can't query anything, because even cl_khr_subgroups + * does not provide similar query. Therefore, simply return minimum + * possible value 1 here. */ + return ReturnValue(1u); + } + } + case UR_DEVICE_INFO_SINGLE_FP_CONFIG: + case UR_DEVICE_INFO_HALF_FP_CONFIG: + case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: { + /* CL type: cl_device_fp_config + * UR type: ur_device_fp_capability_flags_t */ + if (propName == UR_DEVICE_INFO_HALF_FP_CONFIG) { + bool Supported; + CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), {"cl_khr_fp16"}, Supported)); + + if (!Supported) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + } + + cl_device_fp_config CLValue; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + sizeof(cl_device_fp_config), &CLValue, nullptr)); + + return ReturnValue(mapCLDeviceFpConfigToUR(CLValue)); + } + + case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { + /* This query is missing before OpenCL 3.0. Check version and handle + * appropriately */ + oclv::OpenCLVersion DevVer; + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), DevVer)); + + /* Minimum required capability to be returned. For OpenCL 1.2, this is all + * that is required */ + ur_memory_order_capability_flags_t URCapabilities = + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED; + + if (DevVer >= oclv::V3_0) { + /* For OpenCL >=3.0, the query should be implemented */ + cl_device_atomic_capabilities CLCapabilities; + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl_adapter::cast(hDevice), + CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); + + /* Mask operation to only consider atomic_memory_order* capabilities */ + const cl_int Mask = CL_DEVICE_ATOMIC_ORDER_RELAXED | + CL_DEVICE_ATOMIC_ORDER_ACQ_REL | + CL_DEVICE_ATOMIC_ORDER_SEQ_CST; + CLCapabilities &= Mask; + + /* The memory order capabilities are hierarchical, if one is implied, all + * preceding capabilities are implied as well. Especially in the case of + * ACQ_REL. */ + if (CLCapabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { + URCapabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; + } + if (CLCapabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) { + URCapabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE; + } + } else if (DevVer >= oclv::V2_0) { + /* For OpenCL 2.x, return all capabilities. + * (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_memory_consistency_model) + */ + URCapabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL | + UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; + } + /* cl_device_atomic_capabilities is uint64_t and + * ur_memory_order_capability_flags_t is uint32_t */ + return ReturnValue( + static_cast(URCapabilities)); + } + case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { + /* Initialize result to minimum mandated capabilities according to + * SYCL2020 4.6.3.2. Because scopes are hierarchical, wider scopes support + * all narrower scopes. At a minimum, each device must support WORK_ITEM, + * SUB_GROUP and WORK_GROUP. + * (https://github.com/KhronosGroup/SYCL-Docs/pull/382) */ + ur_memory_scope_capability_flags_t URCapabilities = + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; + + oclv::OpenCLVersion DevVer; + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), DevVer)); + + cl_device_atomic_capabilities CLCapabilities; + if (DevVer >= oclv::V3_0) { + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl_adapter::cast(hDevice), + CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); + + assert((CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && + "Violates minimum mandated guarantee"); + + /* Because scopes are hierarchical, wider scopes support all narrower + * scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and + * WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382). We + * already initialized to these minimum mandated capabilities. Just check + * wider scopes. */ + if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE; + } + + if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + } + } else { + /* This info is only available in OpenCL version >= 3.0. Just return + * minimum mandated capabilities for older versions. OpenCL 1.x minimum + * mandated capabilities are WORK_GROUP, we already initialized using it. + */ + if (DevVer >= oclv::V2_0) { + /* OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE | + * ALL_DEVICES */ + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + } + } + + /* cl_device_atomic_capabilities is uint64_t and + * ur_memory_scope_capability_flags_t is uint32_t */ + return ReturnValue( + static_cast(URCapabilities)); + } + case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { + /* Initialize result to minimum mandated capabilities according to + * SYCL2020 4.6.3.2 */ + ur_memory_order_capability_flags_t URCapabilities = + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL; + + oclv::OpenCLVersion DevVer; + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), DevVer)); + + cl_device_atomic_capabilities CLCapabilities; + if (DevVer >= oclv::V3_0) { + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl_adapter::cast(hDevice), + CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); + + assert((CLCapabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED) && + "Violates minimum mandated guarantee"); + assert((CLCapabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) && + "Violates minimum mandated guarantee"); + + /* We already initialized to minimum mandated capabilities. Just check + * stronger orders. */ + if (CLCapabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { + URCapabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; + } + } else { + /* This info is only available in OpenCL version >= 3.0. Just return + * minimum mandated capabilities for older versions. OpenCL 1.x minimum + * mandated capabilities are RELAXED | ACQ_REL, we already initialized + * using these. */ + if (DevVer >= oclv::V2_0) { + /* OpenCL 2.x minimum mandated capabilities are RELAXED | ACQ_REL | + * SEQ_CST */ + URCapabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; + } + } + + /* cl_device_atomic_capabilities is uint64_t and + * ur_memory_order_capability_flags_t is uint32_t */ + return ReturnValue( + static_cast(URCapabilities)); + } + case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + /* Initialize result to minimum mandated capabilities according to + * SYCL2020 4.6.3.2. Because scopes are hierarchical, wider scopes support + * all narrower scopes. At a minimum, each device must support WORK_ITEM, + * SUB_GROUP and WORK_GROUP. + * (https://github.com/KhronosGroup/SYCL-Docs/pull/382) */ + ur_memory_scope_capability_flags_t URCapabilities = + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; + + oclv::OpenCLVersion DevVer; + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), DevVer)); + + cl_device_atomic_capabilities CLCapabilities; + if (DevVer >= oclv::V3_0) { + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl_adapter::cast(hDevice), + CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); + + assert((CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && + "Violates minimum mandated guarantee"); + + /* Because scopes are hierarchical, wider scopes support all narrower + * scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and + * WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382). We + * already initialized to these minimum mandated capabilities. Just check + * wider scopes. */ + if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE; + } + + if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + } + } else { + /* This info is only available in OpenCL version >= 3.0. Just return + * minimum mandated capabilities for older versions. OpenCL 1.x minimum + * mandated capabilities are WORK_GROUP, we already initialized using it. + */ + if (DevVer >= oclv::V2_0) { + /* OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE | + * ALL_DEVICES */ + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + } + } + + /* cl_device_atomic_capabilities is uint64_t and + * ur_memory_scope_capability_flags_t is uint32_t */ + return ReturnValue( + static_cast(URCapabilities)); + } + + case UR_DEVICE_INFO_IMAGE_SRGB: { + return ReturnValue(true); + } + + case UR_DEVICE_INFO_BFLOAT16: { + return ReturnValue(false); + } + case UR_DEVICE_INFO_ATOMIC_64: { + bool Supported = false; + CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), + {"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics"}, + Supported)); + + return ReturnValue(Supported); + } + case UR_DEVICE_INFO_BUILD_ON_SUBDEVICE: { + + cl_device_type DevType = CL_DEVICE_TYPE_DEFAULT; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, + sizeof(cl_device_type), &DevType, nullptr)); + + return ReturnValue(DevType == CL_DEVICE_TYPE_GPU); + } + case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT: { + bool Supported = false; + CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), + {"cl_intel_mem_channel_property"}, Supported)); + + return ReturnValue(Supported); + } + case UR_DEVICE_INFO_ESIMD_SUPPORT: { + bool Supported = false; + cl_device_type DevType = CL_DEVICE_TYPE_DEFAULT; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, + sizeof(cl_device_type), &DevType, nullptr)); + + cl_uint VendorID = 0; + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl_adapter::cast(hDevice), CL_DEVICE_VENDOR_ID, + sizeof(VendorID), &VendorID, nullptr)); + + /* ESIMD is only supported by Intel GPUs. */ + Supported = DevType == CL_DEVICE_TYPE_GPU && VendorID == 0x8086; + + return ReturnValue(Supported); + } + case UR_DEVICE_INFO_QUEUE_PROPERTIES: + case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: + case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: + case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: + case UR_DEVICE_INFO_LOCAL_MEM_TYPE: + case UR_DEVICE_INFO_EXECUTION_CAPABILITIES: + case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: + case UR_DEVICE_INFO_USM_HOST_SUPPORT: + case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: + case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: + case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: + case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: { + /* CL type: cl_bitfield / enum + * UR type: ur_flags_t (uint32_t) */ + + cl_bitfield CLValue; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + sizeof(cl_bitfield), &CLValue, nullptr)); + + /* We can just static_cast the output because OpenCL and UR bitfields + * map 1 to 1 for these properties. cl_bitfield is uint64_t and ur_flags_t + * types are uint32_t */ + return ReturnValue(static_cast(CLValue)); + } + case UR_DEVICE_INFO_IMAGE_SUPPORTED: + case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: + case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: + case UR_DEVICE_INFO_ENDIAN_LITTLE: + case UR_DEVICE_INFO_AVAILABLE: + case UR_DEVICE_INFO_COMPILER_AVAILABLE: + case UR_DEVICE_INFO_LINKER_AVAILABLE: + case UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: + case UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS: + case UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: { + /* CL type: cl_bool + * UR type: ur_bool_t */ + + cl_bool CLValue; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + sizeof(cl_bool), &CLValue, nullptr)); + + /* cl_bool is uint32_t and ur_bool_t is bool */ + return ReturnValue(static_cast(CLValue)); + } + case UR_DEVICE_INFO_VENDOR_ID: + case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: + case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: + case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: + case UR_DEVICE_INFO_ADDRESS_BITS: + case UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS: + case UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: + case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS: + case UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: + case UR_DEVICE_INFO_MAX_SAMPLERS: + case UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: + case UR_DEVICE_INFO_MAX_CONSTANT_ARGS: + case UR_DEVICE_INFO_REFERENCE_COUNT: + case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: + case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: + case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: + case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: + case UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: + case UR_DEVICE_INFO_LOCAL_MEM_SIZE: + case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE: + case UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH: + case UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: + case UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH: + case UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: + case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH: + case UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: + case UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: + case UR_DEVICE_INFO_MAX_PARAMETER_SIZE: + case UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: + case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: + case UR_DEVICE_INFO_PLATFORM: + case UR_DEVICE_INFO_PARENT_DEVICE: + case UR_DEVICE_INFO_IL_VERSION: + case UR_DEVICE_INFO_NAME: + case UR_DEVICE_INFO_VENDOR: + case UR_DEVICE_INFO_DRIVER_VERSION: + case UR_DEVICE_INFO_PROFILE: + case UR_DEVICE_INFO_VERSION: + case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: + case UR_DEVICE_INFO_EXTENSIONS: + case UR_DEVICE_INFO_BUILT_IN_KERNELS: + case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: + case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: + case UR_DEVICE_INFO_IP_VERSION: { + /* We can just use the OpenCL outputs because the sizes of OpenCL types + * are the same as UR. + * | CL | UR | Size | + * | char[] | char[] | 8 | + * | cl_uint | uint32_t | 4 | + * | cl_ulong | uint64_t | 8 | + * | size_t | size_t | 8 | + * | cl_platform_id | ur_platform_handle_t | 8 | + * | ur_device_handle_t | cl_device_id | 8 | + */ + + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + propSize, pPropValue, pPropSizeRet)); + + return UR_RESULT_SUCCESS; + } + /* TODO: Check regularly to see if support is enabled in OpenCL. Intel GPU + * EU device-specific information extensions. Some of the queries are + * enabled by cl_intel_device_attribute_query extension, but it's not yet in + * the Registry. */ + case UR_DEVICE_INFO_PCI_ADDRESS: + case UR_DEVICE_INFO_GPU_EU_COUNT: + case UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH: + case UR_DEVICE_INFO_GPU_EU_SLICES: + case UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: + case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: + case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU: + case UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH: + /* TODO: Check if device UUID extension is enabled in OpenCL. For details + * about Intel UUID extension, see + * sycl/doc/extensions/supported/sycl_ext_intel_device_info.md */ + case UR_DEVICE_INFO_UUID: + /* This enums have no equivalent in OpenCL */ + case UR_DEVICE_INFO_DEVICE_ID: + case UR_DEVICE_INFO_GLOBAL_MEM_FREE: + case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: + case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: + case UR_DEVICE_INFO_ASYNC_BARRIER: + case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED: { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + default: { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + } +} + +UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( + ur_device_handle_t hDevice, + const ur_device_partition_properties_t *pProperties, uint32_t NumDevices, + ur_device_handle_t *phSubDevices, uint32_t *pNumDevicesRet) { + + std::vector CLProperties( + pProperties->PropCount + 2); + + /* The type must be the same for all properties since OpenCL doesn't support + * property lists with multiple types */ + CLProperties[0] = + static_cast(pProperties->pProperties->type); + + for (uint32_t i = 0; i < pProperties->PropCount; ++i) { + cl_device_partition_property CLProperty; + switch (pProperties->pProperties->type) { + case UR_DEVICE_PARTITION_EQUALLY: { + CLProperty = static_cast( + pProperties->pProperties->value.equally); + break; + } + case UR_DEVICE_PARTITION_BY_COUNTS: { + CLProperty = static_cast( + pProperties->pProperties->value.count); + break; + } + case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: { + CLProperty = static_cast( + pProperties->pProperties->value.affinity_domain); + break; + } + default: { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + } + CLProperties[i + 1] = CLProperty; + } + + /* Terminate the list with 0 */ + CLProperties[CLProperties.size() - 1] = 0; + + cl_uint CLNumDevicesRet; + CL_RETURN_ON_FAILURE( + clCreateSubDevices(cl_adapter::cast(hDevice), + CLProperties.data(), 0, nullptr, &CLNumDevicesRet)); + + if (pNumDevicesRet) { + *pNumDevicesRet = CLNumDevicesRet; + } + + /*If NumDevices is less than the number of sub-devices available, then the + * function shall only retrieve that number of sub-devices. */ + if (phSubDevices) { + std::vector CLSubDevices(CLNumDevicesRet); + CL_RETURN_ON_FAILURE(clCreateSubDevices( + cl_adapter::cast(hDevice), CLProperties.data(), + CLNumDevicesRet, CLSubDevices.data(), nullptr)); + + std::memcpy(phSubDevices, CLSubDevices.data(), + sizeof(cl_device_id) * NumDevices); + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceRetain(ur_device_handle_t hDevice) { + + cl_int Result = clRetainDevice(cl_adapter::cast(hDevice)); + + return mapCLErrorToUR(Result); +} + +UR_APIEXPORT ur_result_t UR_APICALL +urDeviceRelease(ur_device_handle_t hDevice) { + + cl_int Result = clReleaseDevice(cl_adapter::cast(hDevice)); + + return mapCLErrorToUR(Result); +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( + ur_device_handle_t hDevice, ur_native_handle_t *phNativeDevice) { + + *phNativeDevice = reinterpret_cast(hDevice); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( + ur_native_handle_t hNativeDevice, ur_platform_handle_t, + const ur_device_native_properties_t *, ur_device_handle_t *phDevice) { + + *phDevice = reinterpret_cast(hNativeDevice); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( + ur_device_handle_t hDevice, uint64_t *pDeviceTimestamp, + uint64_t *pHostTimestamp) { + oclv::OpenCLVersion DevVer, PlatVer; + cl_platform_id Platform; + cl_device_id DeviceId = cl_adapter::cast(hDevice); + + // TODO: Cache OpenCL version for each device and platform + auto RetErr = clGetDeviceInfo(DeviceId, CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &Platform, nullptr); + CL_RETURN_ON_FAILURE(RetErr); + + RetErr = cl_adapter::getDeviceVersion(DeviceId, DevVer); + CL_RETURN_ON_FAILURE(RetErr); + + RetErr = cl_adapter::getPlatformVersion(Platform, PlatVer); + + if (PlatVer < oclv::V2_1 || DevVer < oclv::V2_1) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + if (pDeviceTimestamp) { + uint64_t Dummy; + clGetDeviceAndHostTimer(DeviceId, pDeviceTimestamp, + pHostTimestamp == nullptr ? &Dummy + : pHostTimestamp); + + } else if (pHostTimestamp) { + clGetHostTimer(DeviceId, pHostTimestamp); + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary( + ur_device_handle_t hDevice, const ur_device_binary_t *pBinaries, + uint32_t NumBinaries, uint32_t *pSelectedBinary) { + // TODO: this is a bare-bones implementation for choosing a device image + // that would be compatible with the targeted device. An AOT-compiled + // image is preferred over SPIR-V for known devices (i.e. Intel devices) + // The implementation makes no effort to differentiate between multiple images + // for the given device, and simply picks the first one compatible + // Real implementation will use the same mechanism OpenCL ICD dispatcher + // uses. Something like: + // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_ERROR_INVALID_CONTEXT); + // return context->dispatch->piextDeviceSelectIR( + // ctx, images, num_images, selected_image); + // where context->dispatch is set to the dispatch table provided by PI + // plugin for platform/device the ctx was created for. + + // Choose the binary target for the provided device + const char *ImageTarget = nullptr; + // Get the type of the device + cl_device_type DeviceType; + constexpr uint32_t InvalidInd = std::numeric_limits::max(); + cl_int RetErr = + clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, + sizeof(cl_device_type), &DeviceType, nullptr); + if (RetErr != CL_SUCCESS) { + *pSelectedBinary = InvalidInd; + CL_RETURN_ON_FAILURE(RetErr); + } + + switch (DeviceType) { + // TODO: Factor out vendor specifics into a separate source + // E.g. sycl/source/detail/vendor/intel/detail/pi_opencl.cpp? + + // We'll attempt to find an image that was AOT-compiled + // from a SPIR-V image into an image specific for: + + case CL_DEVICE_TYPE_CPU: // OpenCL 64-bit CPU + ImageTarget = UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64; + break; + case CL_DEVICE_TYPE_GPU: // OpenCL 64-bit GEN GPU + ImageTarget = UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; + break; + case CL_DEVICE_TYPE_ACCELERATOR: // OpenCL 64-bit FPGA + ImageTarget = UR_DEVICE_BINARY_TARGET_SPIRV64_FPGA; + break; + default: + // Otherwise, we'll attempt to find and JIT-compile + // a device-independent SPIR-V image + ImageTarget = UR_DEVICE_BINARY_TARGET_SPIRV64; + break; + } + + // Find the appropriate device image, fallback to spirv if not found + uint32_t Fallback = InvalidInd; + for (uint32_t i = 0; i < NumBinaries; ++i) { + if (strcmp(pBinaries[i].pDeviceTargetSpec, ImageTarget) == 0) { + *pSelectedBinary = i; + return UR_RESULT_SUCCESS; + } + if (strcmp(pBinaries[i].pDeviceTargetSpec, + UR_DEVICE_BINARY_TARGET_SPIRV64) == 0) + Fallback = i; + } + // Points to a spirv image, if such indeed was found + if ((*pSelectedBinary = Fallback) != InvalidInd) + return UR_RESULT_SUCCESS; + // No image can be loaded for the given device + return UR_RESULT_ERROR_INVALID_BINARY; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp new file mode 100644 index 0000000000000..ad5a1fa3b207d --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp @@ -0,0 +1,18 @@ +//===--------- device.hpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +namespace cl_adapter { +ur_result_t getDeviceVersion(cl_device_id Dev, oclv::OpenCLVersion &Version); + +ur_result_t checkDeviceExtensions(cl_device_id Dev, + const std::vector &Exts, + bool &Supported); +} // namespace cl_adapter diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/enqueue.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/enqueue.cpp new file mode 100644 index 0000000000000..01c353236a78a --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/enqueue.cpp @@ -0,0 +1,399 @@ +//===--------- enqueue.cpp - OpenCL Adapter --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" + +cl_map_flags convertURMapFlagsToCL(ur_map_flags_t URFlags) { + cl_map_flags CLFlags = 0; + if (URFlags & UR_MAP_FLAG_READ) { + CLFlags |= CL_MAP_READ; + } + if (URFlags & UR_MAP_FLAG_WRITE) { + CLFlags |= CL_MAP_WRITE; + } + if (URFlags & UR_MAP_FLAG_WRITE_INVALIDATE_REGION) { + CLFlags |= CL_MAP_WRITE_INVALIDATE_REGION; + } + + return CLFlags; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueNDRangeKernel( + cl_adapter::cast(hQueue), + cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, + pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( + ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueMarkerWithWaitList( + cl_adapter::cast(hQueue), numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( + ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueBarrierWithWaitList( + cl_adapter::cast(hQueue), numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, + size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueReadBuffer( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), blockingRead, offset, size, pDst, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, + size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueWriteBuffer( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), blockingWrite, offset, size, pSrc, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueReadBufferRect( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), blockingRead, + cl_adapter::cast(&bufferOrigin), + cl_adapter::cast(&hostOrigin), + cl_adapter::cast(®ion), bufferRowPitch, + bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueWriteBufferRect( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), blockingWrite, + cl_adapter::cast(&bufferOrigin), + cl_adapter::cast(&hostOrigin), + cl_adapter::cast(®ion), bufferRowPitch, + bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( + ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueCopyBuffer( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBufferSrc), + cl_adapter::cast(hBufferDst), srcOffset, dstOffset, size, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueCopyBufferRect( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBufferSrc), + cl_adapter::cast(hBufferDst), + cl_adapter::cast(&srcOrigin), + cl_adapter::cast(&dstOrigin), + cl_adapter::cast(®ion), srcRowPitch, srcSlicePitch, + dstRowPitch, dstSlicePitch, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, const void *pPattern, + size_t patternSize, size_t offset, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueFillBuffer( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), pPattern, patternSize, offset, size, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( + ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead, + ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueReadImage( + cl_adapter::cast(hQueue), + cl_adapter::cast(hImage), blockingRead, + cl_adapter::cast(&origin), + cl_adapter::cast(®ion), rowPitch, slicePitch, pDst, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( + ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite, + ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueWriteImage( + cl_adapter::cast(hQueue), + cl_adapter::cast(hImage), blockingWrite, + cl_adapter::cast(&origin), + cl_adapter::cast(®ion), rowPitch, slicePitch, pSrc, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( + ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, + ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueCopyImage( + cl_adapter::cast(hQueue), + cl_adapter::cast(hImageSrc), cl_adapter::cast(hImageDst), + cl_adapter::cast(&srcOrigin), + cl_adapter::cast(&dstOrigin), + cl_adapter::cast(®ion), numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingMap, + ur_map_flags_t mapFlags, size_t offset, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, void **ppRetMap) { + + cl_int Err; + *ppRetMap = clEnqueueMapBuffer( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), blockingMap, + convertURMapFlagsToCL(mapFlags), offset, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent), &Err); + + CL_RETURN_ON_FAILURE(Err); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( + ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueUnmapMemObject( + cl_adapter::cast(hQueue), + cl_adapter::cast(hMem), pMappedPtr, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingWrite, size_t count, size_t offset, const void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + cl_context Ctx = nullptr; + cl_int Res = + clGetCommandQueueInfo(cl_adapter::cast(hQueue), + CL_QUEUE_CONTEXT, sizeof(Ctx), &Ctx, nullptr); + + if (Res != CL_SUCCESS) + return mapCLErrorToUR(Res); + + cl_ext::clEnqueueWriteGlobalVariable_fn F = nullptr; + Res = cl_ext::getExtFuncFromContext( + Ctx, cl_ext::ExtFuncPtrCache->clEnqueueWriteGlobalVariableCache, + cl_ext::EnqueueWriteGlobalVariableName, &F); + + if (!F || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + Res = F(cl_adapter::cast(hQueue), + cl_adapter::cast(hProgram), name, blockingWrite, count, + offset, pSrc, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent)); + + return mapCLErrorToUR(Res); +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingRead, size_t count, size_t offset, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + cl_context Ctx = nullptr; + cl_int Res = + clGetCommandQueueInfo(cl_adapter::cast(hQueue), + CL_QUEUE_CONTEXT, sizeof(Ctx), &Ctx, nullptr); + + if (Res != CL_SUCCESS) + return mapCLErrorToUR(Res); + + cl_ext::clEnqueueReadGlobalVariable_fn F = nullptr; + Res = cl_ext::getExtFuncFromContext( + Ctx, cl_ext::ExtFuncPtrCache->clEnqueueReadGlobalVariableCache, + cl_ext::EnqueueReadGlobalVariableName, &F); + + if (!F || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + Res = F(cl_adapter::cast(hQueue), + cl_adapter::cast(hProgram), name, blockingRead, count, + offset, pDst, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent)); + + return mapCLErrorToUR(Res); +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, void *pDst, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + cl_context CLContext; + cl_int CLErr = clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, + sizeof(cl_context), &CLContext, nullptr); + if (CLErr != CL_SUCCESS) { + return mapCLErrorToUR(CLErr); + } + + clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; + ur_result_t RetVal = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueReadHostPipeINTELCache, + cl_ext::EnqueueReadHostPipeName, &FuncPtr); + + if (FuncPtr) { + RetVal = mapCLErrorToUR( + FuncPtr(cl_adapter::cast(hQueue), + cl_adapter::cast(hProgram), pipe_symbol, blocking, + pDst, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + } + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, void *pSrc, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + cl_context CLContext; + cl_int CLErr = clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, + sizeof(cl_context), &CLContext, nullptr); + if (CLErr != CL_SUCCESS) { + return mapCLErrorToUR(CLErr); + } + + clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; + ur_result_t RetVal = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueWriteHostPipeINTELCache, + cl_ext::EnqueueWriteHostPipeName, &FuncPtr); + + if (FuncPtr) { + RetVal = mapCLErrorToUR( + FuncPtr(cl_adapter::cast(hQueue), + cl_adapter::cast(hProgram), pipe_symbol, blocking, + pSrc, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + } + + return RetVal; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp new file mode 100644 index 0000000000000..744a6da54bad5 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp @@ -0,0 +1,134 @@ +//===--------- memory.cpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" + +cl_event_info convertUREventInfoToCL(const ur_event_info_t PropName) { + switch (PropName) { + case UR_EVENT_INFO_COMMAND_QUEUE: + return CL_EVENT_COMMAND_QUEUE; + break; + case UR_EVENT_INFO_CONTEXT: + return CL_EVENT_CONTEXT; + break; + case UR_EVENT_INFO_COMMAND_TYPE: + return CL_EVENT_COMMAND_TYPE; + break; + case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS: + return CL_EVENT_COMMAND_EXECUTION_STATUS; + break; + case UR_EVENT_INFO_REFERENCE_COUNT: + return CL_EVENT_REFERENCE_COUNT; + break; + default: + return -1; + break; + } +} + +cl_profiling_info +convertURProfilingInfoToCL(const ur_profiling_info_t PropName) { + switch (PropName) { + case UR_PROFILING_INFO_COMMAND_QUEUED: + return CL_PROFILING_COMMAND_QUEUED; + case UR_PROFILING_INFO_COMMAND_SUBMIT: + return CL_PROFILING_COMMAND_SUBMIT; + case UR_PROFILING_INFO_COMMAND_START: + return CL_PROFILING_COMMAND_START; + // TODO(ur) add UR_PROFILING_INFO_COMMAND_COMPLETE once spec has been updated + case UR_PROFILING_INFO_COMMAND_END: + return CL_PROFILING_COMMAND_END; + default: + return -1; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( + ur_native_handle_t hNativeEvent, + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] const ur_event_native_properties_t *pProperties, + ur_event_handle_t *phEvent) { + *phEvent = reinterpret_cast(hNativeEvent); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( + ur_event_handle_t hEvent, ur_native_handle_t *phNativeEvent) { + return getNativeHandle(hEvent, phNativeEvent); +} + +UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { + cl_int RetErr = clReleaseEvent(cl_adapter::cast(hEvent)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { + cl_int RetErr = clRetainEvent(cl_adapter::cast(hEvent)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) { + cl_int RetErr = clWaitForEvents( + numEvents, cl_adapter::cast(phEventWaitList)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, + ur_event_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + cl_event_info CLEventInfo = convertUREventInfoToCL(propName); + cl_int RetErr = + clGetEventInfo(cl_adapter::cast(hEvent), CLEventInfo, propSize, + pPropValue, pPropSizeRet); + CL_RETURN_ON_FAILURE(RetErr); + + if (RetErr == CL_SUCCESS && + propName == UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) { + /* If the CL_EVENT_COMMAND_EXECUTION_STATUS info value is CL_QUEUED, change + * it to CL_SUBMITTED. sycl::info::event::event_command_status has no + * equivalent to CL_QUEUED. + * + * FIXME UR Port: This should not be part of the UR adapter. Since PI_QUEUED + * exists, SYCL RT should be changed to handle this situation. In addition, + * SYCL RT is relying on PI_QUEUED status to make sure that the queues are + * flushed. */ + const auto param_value_int = static_cast(pPropValue); + if (*param_value_int == UR_EVENT_STATUS_QUEUED) { + *param_value_int = UR_EVENT_STATUS_SUBMITTED; + } + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( + ur_event_handle_t hEvent, ur_profiling_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + cl_profiling_info CLProfilingInfo = convertURProfilingInfoToCL(propName); + cl_int RetErr = clGetEventProfilingInfo(cl_adapter::cast(hEvent), + CLProfilingInfo, propSize, pPropValue, + pPropSizeRet); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urEventSetCallback(ur_event_handle_t hEvent, ur_execution_info_t execStatus, + ur_event_callback_t pfnNotify, void *pUserData) { + std::ignore = hEvent; + std::ignore = execStatus; + std::ignore = pfnNotify; + std::ignore = pUserData; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/image.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/image.cpp new file mode 100644 index 0000000000000..5e7c2405a31cc --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/image.cpp @@ -0,0 +1,174 @@ +//===---------- image.cpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] const ur_usm_desc_t *pUSMDesc, + [[maybe_unused]] ur_usm_pool_handle_t pool, + [[maybe_unused]] size_t widthInBytes, [[maybe_unused]] size_t height, + [[maybe_unused]] size_t elementSizeBytes, [[maybe_unused]] void **ppMem, + [[maybe_unused]] size_t *pResultPitch) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urBindlessImagesUnsampledImageHandleDestroyExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_handle_t hImage) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urBindlessImagesSampledImageHandleDestroyExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_handle_t hImage) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] const ur_image_format_t *pImageFormat, + [[maybe_unused]] const ur_image_desc_t *pImageDesc, + [[maybe_unused]] ur_exp_image_mem_handle_t *phImageMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageFreeExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem, + [[maybe_unused]] const ur_image_format_t *pImageFormat, + [[maybe_unused]] const ur_image_desc_t *pImageDesc, + [[maybe_unused]] ur_mem_handle_t *phMem, + [[maybe_unused]] ur_exp_image_handle_t *phImage) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem, + [[maybe_unused]] const ur_image_format_t *pImageFormat, + [[maybe_unused]] const ur_image_desc_t *pImageDesc, + [[maybe_unused]] ur_sampler_handle_t hSampler, + [[maybe_unused]] ur_mem_handle_t *phMem, + [[maybe_unused]] ur_exp_image_handle_t *phImage) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( + [[maybe_unused]] ur_queue_handle_t hQueue, [[maybe_unused]] void *pDst, + [[maybe_unused]] void *pSrc, + [[maybe_unused]] const ur_image_format_t *pImageFormat, + [[maybe_unused]] const ur_image_desc_t *pImageDesc, + [[maybe_unused]] ur_exp_image_copy_flags_t imageCopyFlags, + [[maybe_unused]] ur_rect_offset_t srcOffset, + [[maybe_unused]] ur_rect_offset_t dstOffset, + [[maybe_unused]] ur_rect_region_t copyExtent, + [[maybe_unused]] ur_rect_region_t hostExtent, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( + [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem, + [[maybe_unused]] ur_image_info_t propName, + [[maybe_unused]] void *pPropValue, [[maybe_unused]] size_t *pPropSizeRet) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem, + [[maybe_unused]] uint32_t mipmapLevel, + [[maybe_unused]] ur_exp_image_mem_handle_t *phImageMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urBindlessImagesMipmapFreeExp([[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_mem_handle_t hMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportOpaqueFDExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, [[maybe_unused]] size_t size, + [[maybe_unused]] ur_exp_interop_mem_desc_t *pInteropMemDesc, + [[maybe_unused]] ur_exp_interop_mem_handle_t *phInteropMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] const ur_image_format_t *pImageFormat, + [[maybe_unused]] const ur_image_desc_t *pImageDesc, + [[maybe_unused]] ur_exp_interop_mem_handle_t hInteropMem, + [[maybe_unused]] ur_exp_image_mem_handle_t *phImageMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseInteropExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_interop_mem_handle_t hInteropMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urBindlessImagesImportExternalSemaphoreOpaqueFDExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_interop_semaphore_desc_t *pInteropSemaphoreDesc, + [[maybe_unused]] ur_exp_interop_semaphore_handle_t + *phInteropSemaphoreHandle) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesDestroyExternalSemaphoreExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_interop_semaphore_handle_t hInteropSemaphore) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( + [[maybe_unused]] ur_queue_handle_t hQueue, + [[maybe_unused]] ur_exp_interop_semaphore_handle_t hSemaphore, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( + [[maybe_unused]] ur_queue_handle_t hQueue, + [[maybe_unused]] ur_exp_interop_semaphore_handle_t hSemaphore, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp new file mode 100644 index 0000000000000..8867be5f76727 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp @@ -0,0 +1,362 @@ +//===----------- kernel.cpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-------------------------------------------------------------------===// +#include "common.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL +urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, + ur_kernel_handle_t *phKernel) { + + cl_int CLResult; + *phKernel = cl_adapter::cast(clCreateKernel( + cl_adapter::cast(hProgram), pKernelName, &CLResult)); + CL_RETURN_ON_FAILURE(CLResult); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( + ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, + const ur_kernel_arg_value_properties_t *, const void *pArgValue) { + + CL_RETURN_ON_FAILURE(clSetKernelArg(cl_adapter::cast(hKernel), + cl_adapter::cast(argIndex), + argSize, pArgValue)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex, + size_t argSize, const ur_kernel_arg_local_properties_t *) { + + CL_RETURN_ON_FAILURE(clSetKernelArg(cl_adapter::cast(hKernel), + cl_adapter::cast(argIndex), + argSize, nullptr)); + + return UR_RESULT_SUCCESS; +} + +static cl_int mapURKernelInfoToCL(ur_kernel_info_t URPropName) { + + switch (static_cast(URPropName)) { + case UR_KERNEL_INFO_FUNCTION_NAME: + return CL_KERNEL_FUNCTION_NAME; + case UR_KERNEL_INFO_NUM_ARGS: + return CL_KERNEL_NUM_ARGS; + case UR_KERNEL_INFO_REFERENCE_COUNT: + return CL_KERNEL_REFERENCE_COUNT; + case UR_KERNEL_INFO_CONTEXT: + return CL_KERNEL_CONTEXT; + case UR_KERNEL_INFO_PROGRAM: + return CL_KERNEL_PROGRAM; + case UR_KERNEL_INFO_ATTRIBUTES: + return CL_KERNEL_ATTRIBUTES; + case UR_KERNEL_INFO_NUM_REGS: + return CL_KERNEL_NUM_ARGS; + default: + return -1; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, + ur_kernel_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + + CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), + mapURKernelInfoToCL(propName), propSize, + pPropValue, pPropSizeRet)); + + return UR_RESULT_SUCCESS; +} + +static cl_int mapURKernelGroupInfoToCL(ur_kernel_group_info_t URPropName) { + + switch (static_cast(URPropName)) { + case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: + return CL_KERNEL_GLOBAL_WORK_SIZE; + case UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: + return CL_KERNEL_WORK_GROUP_SIZE; + case UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: + return CL_KERNEL_COMPILE_WORK_GROUP_SIZE; + case UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: + return CL_KERNEL_LOCAL_MEM_SIZE; + case UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: + return CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE; + case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: + return CL_KERNEL_PRIVATE_MEM_SIZE; + default: + return -1; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL +urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, + ur_kernel_group_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + + CL_RETURN_ON_FAILURE(clGetKernelWorkGroupInfo( + cl_adapter::cast(hKernel), + cl_adapter::cast(hDevice), + mapURKernelGroupInfoToCL(propName), propSize, pPropValue, pPropSizeRet)); + + return UR_RESULT_SUCCESS; +} + +static cl_int +mapURKernelSubGroupInfoToCL(ur_kernel_sub_group_info_t URPropName) { + + switch (static_cast(URPropName)) { + case UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE: + return CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE; + case UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS: + return CL_KERNEL_MAX_NUM_SUB_GROUPS; + case UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS: + return CL_KERNEL_COMPILE_NUM_SUB_GROUPS; + case UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL: + return CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL; + default: + return -1; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL +urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, + ur_kernel_sub_group_info_t propName, size_t, + void *pPropValue, size_t *pPropSizeRet) { + + std::shared_ptr InputValue; + size_t InputValueSize = 0; + size_t RetVal; + + if (propName == UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE) { + // OpenCL needs an input value for PI_KERNEL_MAX_SUB_GROUP_SIZE so if no + // value is given we use the max work item size of the device in the first + // dimension to avoid truncation of max sub-group size. + uint32_t MaxDims = 0; + ur_result_t URRet = + urDeviceGetInfo(hDevice, UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS, + sizeof(uint32_t), &MaxDims, nullptr); + if (URRet != UR_RESULT_SUCCESS) + return URRet; + std::shared_ptr WgSizes{new size_t[MaxDims]}; + URRet = urDeviceGetInfo(hDevice, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES, + MaxDims * sizeof(size_t), WgSizes.get(), nullptr); + if (URRet != UR_RESULT_SUCCESS) + return URRet; + for (size_t i = 1; i < MaxDims; ++i) + WgSizes.get()[i] = 1; + InputValue = std::move(WgSizes); + InputValueSize = MaxDims * sizeof(size_t); + } + + cl_int Ret = clGetKernelSubGroupInfo(cl_adapter::cast(hKernel), + cl_adapter::cast(hDevice), + mapURKernelSubGroupInfoToCL(propName), + InputValueSize, InputValue.get(), + sizeof(size_t), &RetVal, pPropSizeRet); + + if (Ret == CL_INVALID_OPERATION) { + // clGetKernelSubGroupInfo returns CL_INVALID_OPERATION if the device does + // not support subgroups. + if (propName == UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS) { + RetVal = 1; // Minimum required by SYCL 2020 spec + Ret = CL_SUCCESS; + } else if (propName == UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS) { + RetVal = 0; // Not specified by kernel + Ret = CL_SUCCESS; + } else if (propName == UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE) { + // Return the maximum sub group size for the device + size_t ResultSize = 0; + // Two calls to urDeviceGetInfo are needed: the first determines the size + // required to store the result, and the second returns the actual size + // values. + ur_result_t URRet = + urDeviceGetInfo(hDevice, UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, 0, + nullptr, &ResultSize); + if (URRet != UR_RESULT_SUCCESS) { + return URRet; + } + assert(ResultSize % sizeof(size_t) == 0); + std::vector Result(ResultSize / sizeof(size_t)); + URRet = urDeviceGetInfo(hDevice, UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, + ResultSize, Result.data(), nullptr); + if (URRet != UR_RESULT_SUCCESS) { + return URRet; + } + RetVal = *std::max_element(Result.begin(), Result.end()); + Ret = CL_SUCCESS; + } else if (propName == UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL) { + RetVal = 0; // Not specified by kernel + Ret = CL_SUCCESS; + } + } + + *(static_cast(pPropValue)) = static_cast(RetVal); + if (pPropSizeRet) + *pPropSizeRet = sizeof(uint32_t); + + CL_RETURN_ON_FAILURE(Ret); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(ur_kernel_handle_t hKernel) { + CL_RETURN_ON_FAILURE(clRetainKernel(cl_adapter::cast(hKernel))); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urKernelRelease(ur_kernel_handle_t hKernel) { + CL_RETURN_ON_FAILURE(clReleaseKernel(cl_adapter::cast(hKernel))); + return UR_RESULT_SUCCESS; +} + +/** + * Enables indirect access of pointers in kernels. Necessary to avoid telling CL + * about every pointer that might be used. + */ +static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { + + cl_bool TrueVal = CL_TRUE; + clHostMemAllocINTEL_fn HFunc = nullptr; + clSharedMemAllocINTEL_fn SFunc = nullptr; + clDeviceMemAllocINTEL_fn DFunc = nullptr; + cl_context CLContext; + + /* We test that each alloc type is supported before we actually try to set + * KernelExecInfo. */ + CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), + CL_KERNEL_CONTEXT, sizeof(cl_context), + &CLContext, nullptr)); + + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, + cl_ext::HostMemAllocName, &HFunc)); + + if (HFunc) { + CL_RETURN_ON_FAILURE( + clSetKernelExecInfo(cl_adapter::cast(hKernel), + CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); + } + + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, + cl_ext::DeviceMemAllocName, &DFunc)); + + if (DFunc) { + CL_RETURN_ON_FAILURE( + clSetKernelExecInfo(cl_adapter::cast(hKernel), + CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); + } + + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, + cl_ext::SharedMemAllocName, &SFunc)); + + if (SFunc) { + CL_RETURN_ON_FAILURE( + clSetKernelExecInfo(cl_adapter::cast(hKernel), + CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); + } + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( + ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName, size_t propSize, + const ur_kernel_exec_info_properties_t *, const void *pPropValue) { + + switch (propName) { + case UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS: { + if (*(static_cast(pPropValue)) == true) { + CL_RETURN_ON_FAILURE(usmSetIndirectAccess(hKernel)); + } + return UR_RESULT_SUCCESS; + } + case UR_KERNEL_EXEC_INFO_CACHE_CONFIG: { + /* Setting the cache config is unsupported in OpenCL */ + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + case UR_KERNEL_EXEC_INFO_USM_PTRS: { + CL_RETURN_ON_FAILURE(clSetKernelExecInfo( + cl_adapter::cast(hKernel), propName, propSize, pPropValue)); + return UR_RESULT_SUCCESS; + } + default: { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + } +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( + ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_pointer_properties_t *, const void *pArgValue) { + + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), + CL_KERNEL_CONTEXT, sizeof(cl_context), + &CLContext, nullptr)); + + clSetKernelArgMemPointerINTEL_fn FuncPtr = nullptr; + UR_RETURN_ON_FAILURE( + cl_ext::getExtFuncFromContext( + CLContext, + cl_ext::ExtFuncPtrCache->clSetKernelArgMemPointerINTELCache, + cl_ext::SetKernelArgMemPointerName, &FuncPtr)); + + if (FuncPtr) { + /* OpenCL passes pointers by value not by reference. This means we need to + * deref the arg to get the pointer value */ + auto PtrToPtr = reinterpret_cast(pArgValue); + auto DerefPtr = reinterpret_cast(*PtrToPtr); + CL_RETURN_ON_FAILURE(FuncPtr(cl_adapter::cast(hKernel), + cl_adapter::cast(argIndex), + DerefPtr)); + } + + return UR_RESULT_SUCCESS; +} +UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( + ur_kernel_handle_t hKernel, ur_native_handle_t *phNativeKernel) { + + *phNativeKernel = reinterpret_cast(hKernel); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( + ur_native_handle_t hNativeKernel, ur_context_handle_t, ur_program_handle_t, + const ur_kernel_native_properties_t *, ur_kernel_handle_t *phKernel) { + + *phKernel = reinterpret_cast(hNativeKernel); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( + ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_mem_obj_properties_t *, ur_mem_handle_t hArgValue) { + + cl_int RetErr = clSetKernelArg( + cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), + sizeof(hArgValue), cl_adapter::cast(&hArgValue)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( + ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_sampler_properties_t *, ur_sampler_handle_t hArgValue) { + + cl_int RetErr = clSetKernelArg( + cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), + sizeof(hArgValue), cl_adapter::cast(&hArgValue)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp new file mode 100644 index 0000000000000..31484930f965d --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp @@ -0,0 +1,389 @@ +//===--------- memory.cpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" + +cl_image_format mapURImageFormatToCL(const ur_image_format_t *PImageFormat) { + cl_image_format CLImageFormat; + switch (PImageFormat->channelOrder) { + case UR_IMAGE_CHANNEL_ORDER_A: + CLImageFormat.image_channel_order = CL_A; + break; + case UR_IMAGE_CHANNEL_ORDER_R: + CLImageFormat.image_channel_order = CL_R; + break; + case UR_IMAGE_CHANNEL_ORDER_RG: + CLImageFormat.image_channel_order = CL_RG; + break; + case UR_IMAGE_CHANNEL_ORDER_RA: + CLImageFormat.image_channel_order = CL_RA; + break; + case UR_IMAGE_CHANNEL_ORDER_RGB: + CLImageFormat.image_channel_order = CL_RGB; + break; + case UR_IMAGE_CHANNEL_ORDER_RGBA: + CLImageFormat.image_channel_order = CL_RGBA; + break; + case UR_IMAGE_CHANNEL_ORDER_BGRA: + CLImageFormat.image_channel_order = CL_BGRA; + break; + case UR_IMAGE_CHANNEL_ORDER_ARGB: + CLImageFormat.image_channel_order = CL_ARGB; + break; + case UR_IMAGE_CHANNEL_ORDER_ABGR: + CLImageFormat.image_channel_order = CL_ABGR; + break; + case UR_IMAGE_CHANNEL_ORDER_INTENSITY: + CLImageFormat.image_channel_order = CL_INTENSITY; + break; + case UR_IMAGE_CHANNEL_ORDER_LUMINANCE: + CLImageFormat.image_channel_order = CL_LUMINANCE; + break; + case UR_IMAGE_CHANNEL_ORDER_RX: + CLImageFormat.image_channel_order = CL_Rx; + break; + case UR_IMAGE_CHANNEL_ORDER_RGX: + CLImageFormat.image_channel_order = CL_RGx; + break; + case UR_IMAGE_CHANNEL_ORDER_RGBX: + CLImageFormat.image_channel_order = CL_RGBx; + break; + case UR_IMAGE_CHANNEL_ORDER_SRGBA: + CLImageFormat.image_channel_order = CL_sRGBA; + break; + default: + CLImageFormat.image_channel_order = -1; + break; + } + + switch (PImageFormat->channelType) { + case UR_IMAGE_CHANNEL_TYPE_SNORM_INT8: + CLImageFormat.image_channel_data_type = CL_SNORM_INT8; + break; + case UR_IMAGE_CHANNEL_TYPE_SNORM_INT16: + CLImageFormat.image_channel_data_type = CL_SNORM_INT16; + break; + case UR_IMAGE_CHANNEL_TYPE_UNORM_INT8: + CLImageFormat.image_channel_data_type = CL_UNORM_INT8; + break; + case UR_IMAGE_CHANNEL_TYPE_UNORM_INT16: + CLImageFormat.image_channel_data_type = CL_UNORM_INT16; + break; + case UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565: + CLImageFormat.image_channel_data_type = CL_UNORM_SHORT_565; + break; + case UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555: + CLImageFormat.image_channel_data_type = CL_UNORM_SHORT_555; + break; + case UR_IMAGE_CHANNEL_TYPE_INT_101010: + CLImageFormat.image_channel_data_type = CL_UNORM_INT_101010; + break; + case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8: + CLImageFormat.image_channel_data_type = CL_SIGNED_INT8; + break; + case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16: + CLImageFormat.image_channel_data_type = CL_SIGNED_INT16; + break; + case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32: + CLImageFormat.image_channel_data_type = CL_SIGNED_INT32; + break; + case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8: + CLImageFormat.image_channel_data_type = CL_UNSIGNED_INT8; + break; + case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16: + CLImageFormat.image_channel_data_type = CL_UNSIGNED_INT16; + break; + case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32: + CLImageFormat.image_channel_data_type = CL_UNSIGNED_INT32; + break; + case UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT: + CLImageFormat.image_channel_data_type = CL_HALF_FLOAT; + break; + case UR_IMAGE_CHANNEL_TYPE_FLOAT: + CLImageFormat.image_channel_data_type = CL_FLOAT; + break; + default: + CLImageFormat.image_channel_data_type = -1; + break; + } + + return CLImageFormat; +} + +cl_image_desc mapURImageDescToCL(const ur_image_desc_t *PImageDesc) { + cl_image_desc CLImageDesc; + CLImageDesc.image_type = + cl_adapter::cast(PImageDesc->type); + + switch (PImageDesc->type) { + case UR_MEM_TYPE_BUFFER: + CLImageDesc.image_type = CL_MEM_OBJECT_BUFFER; + break; + case UR_MEM_TYPE_IMAGE2D: + CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; + break; + case UR_MEM_TYPE_IMAGE3D: + CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; + break; + case UR_MEM_TYPE_IMAGE2D_ARRAY: + CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; + break; + case UR_MEM_TYPE_IMAGE1D: + CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; + break; + case UR_MEM_TYPE_IMAGE1D_ARRAY: + CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; + break; + case UR_MEM_TYPE_IMAGE1D_BUFFER: + CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; + break; + default: + CLImageDesc.image_type = -1; + break; + } + + CLImageDesc.image_width = PImageDesc->width; + CLImageDesc.image_height = PImageDesc->height; + CLImageDesc.image_depth = PImageDesc->depth; + CLImageDesc.image_array_size = PImageDesc->arraySize; + CLImageDesc.image_row_pitch = PImageDesc->rowPitch; + CLImageDesc.image_slice_pitch = PImageDesc->slicePitch; + CLImageDesc.num_mip_levels = PImageDesc->numMipLevel; + CLImageDesc.num_samples = PImageDesc->numSamples; + CLImageDesc.buffer = nullptr; + CLImageDesc.mem_object = nullptr; + + return CLImageDesc; +} + +cl_int mapURMemImageInfoToCL(ur_image_info_t URPropName) { + + switch (URPropName) { + case UR_IMAGE_INFO_FORMAT: + return CL_IMAGE_FORMAT; + case UR_IMAGE_INFO_ELEMENT_SIZE: + return CL_IMAGE_ELEMENT_SIZE; + case UR_IMAGE_INFO_ROW_PITCH: + return CL_IMAGE_ROW_PITCH; + case UR_IMAGE_INFO_SLICE_PITCH: + return CL_IMAGE_SLICE_PITCH; + case UR_IMAGE_INFO_WIDTH: + return CL_IMAGE_WIDTH; + case UR_IMAGE_INFO_HEIGHT: + return CL_IMAGE_HEIGHT; + case UR_IMAGE_INFO_DEPTH: + return CL_IMAGE_DEPTH; + default: + return -1; + } +} + +cl_int mapURMemInfoToCL(ur_mem_info_t URPropName) { + + switch (URPropName) { + case UR_MEM_INFO_SIZE: + return CL_MEM_SIZE; + case UR_MEM_INFO_CONTEXT: + return CL_MEM_CONTEXT; + default: + return -1; + } +} + +cl_map_flags convertURMemFlagsToCL(ur_mem_flags_t URFlags) { + cl_map_flags CLFlags = 0; + if (URFlags & UR_MEM_FLAG_READ_WRITE) { + CLFlags |= CL_MEM_READ_WRITE; + } + if (URFlags & UR_MEM_FLAG_WRITE_ONLY) { + CLFlags |= CL_MEM_WRITE_ONLY; + } + if (URFlags & UR_MEM_FLAG_READ_ONLY) { + CLFlags |= CL_MEM_READ_ONLY; + } + if (URFlags & UR_MEM_FLAG_USE_HOST_POINTER) { + CLFlags |= CL_MEM_USE_HOST_PTR; + } + if (URFlags & UR_MEM_FLAG_ALLOC_HOST_POINTER) { + CLFlags |= CL_MEM_ALLOC_HOST_PTR; + } + if (URFlags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) { + CLFlags |= CL_MEM_COPY_HOST_PTR; + } + + return CLFlags; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( + ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, + const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) { + + cl_int RetErr = CL_INVALID_OPERATION; + if (pProperties) { + // TODO: need to check if all properties are supported by OpenCL RT and + // ignore unsupported + clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr; + cl_context CLContext = cl_adapter::cast(hContext); + // First we need to look up the function pointer + RetErr = + cl_ext::getExtFuncFromContext( + CLContext, + cl_ext::ExtFuncPtrCache->clCreateBufferWithPropertiesINTELCache, + cl_ext::CreateBufferWithPropertiesName, &FuncPtr); + if (FuncPtr) { + std::vector PropertiesIntel; + auto Prop = static_cast(pProperties->pNext); + while (Prop) { + switch (Prop->stype) { + case UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES: { + auto BufferChannelProperty = + reinterpret_cast(Prop); + PropertiesIntel.push_back(CL_MEM_CHANNEL_INTEL); + PropertiesIntel.push_back(BufferChannelProperty->channel); + } break; + case UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES: { + auto BufferLocationProperty = + reinterpret_cast(Prop); + PropertiesIntel.push_back(CL_MEM_ALLOC_FLAGS_INTEL); + PropertiesIntel.push_back(BufferLocationProperty->location); + } break; + default: + break; + } + Prop = static_cast(Prop->pNext); + } + PropertiesIntel.push_back(0); + + *phBuffer = reinterpret_cast(FuncPtr( + CLContext, PropertiesIntel.data(), static_cast(flags), + size, pProperties->pHost, cl_adapter::cast(&RetErr))); + CL_RETURN_ON_FAILURE(RetErr); + } + } + + *phBuffer = reinterpret_cast(clCreateBuffer( + cl_adapter::cast(hContext), static_cast(flags), + size, pProperties->pHost, cl_adapter::cast(&RetErr))); + CL_RETURN_ON_FAILURE(RetErr); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( + ur_context_handle_t hContext, ur_mem_flags_t flags, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + void *pHost, ur_mem_handle_t *phMem) { + + cl_int RetErr = CL_INVALID_OPERATION; + + cl_image_format ImageFormat = mapURImageFormatToCL(pImageFormat); + cl_image_desc ImageDesc = mapURImageDescToCL(pImageDesc); + cl_map_flags MapFlags = convertURMemFlagsToCL(flags); + + *phMem = reinterpret_cast(clCreateImage( + cl_adapter::cast(hContext), MapFlags, &ImageFormat, + &ImageDesc, pHost, cl_adapter::cast(&RetErr))); + CL_RETURN_ON_FAILURE(RetErr); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( + ur_mem_handle_t hBuffer, ur_mem_flags_t flags, + ur_buffer_create_type_t bufferCreateType, const ur_buffer_region_t *pRegion, + ur_mem_handle_t *phMem) { + + cl_int RetErr = CL_INVALID_OPERATION; + + cl_buffer_create_type BufferCreateType; + switch (bufferCreateType) { + case UR_BUFFER_CREATE_TYPE_REGION: + BufferCreateType = CL_BUFFER_CREATE_TYPE_REGION; + break; + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + _cl_buffer_region BufferRegion; + BufferRegion.origin = pRegion->origin; + BufferRegion.size = pRegion->size; + + *phMem = reinterpret_cast(clCreateSubBuffer( + cl_adapter::cast(hBuffer), static_cast(flags), + BufferCreateType, &BufferRegion, cl_adapter::cast(&RetErr))); + CL_RETURN_ON_FAILURE(RetErr); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urMemGetNativeHandle(ur_mem_handle_t hMem, ur_native_handle_t *phNativeMem) { + return getNativeHandle(hMem, phNativeMem); +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( + ur_native_handle_t hNativeMem, + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] const ur_mem_native_properties_t *pProperties, + ur_mem_handle_t *phMem) { + + *phMem = reinterpret_cast(hNativeMem); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( + ur_native_handle_t hNativeMem, + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] const ur_image_format_t *pImageFormat, + [[maybe_unused]] const ur_image_desc_t *pImageDesc, + [[maybe_unused]] const ur_mem_native_properties_t *pProperties, + ur_mem_handle_t *phMem) { + + *phMem = reinterpret_cast(hNativeMem); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, + ur_mem_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + const cl_int CLPropName = mapURMemInfoToCL(propName); + + CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(hMemory), + CLPropName, propSize, pPropValue, + pPropSizeRet)); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, + ur_image_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + const cl_int CLPropName = mapURMemImageInfoToCL(propName); + + CL_RETURN_ON_FAILURE(clGetImageInfo(cl_adapter::cast(hMemory), + CLPropName, propSize, pPropValue, + pPropSizeRet)); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { + CL_RETURN_ON_FAILURE(clRetainMemObject(cl_adapter::cast(hMem))); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { + CL_RETURN_ON_FAILURE(clReleaseMemObject(cl_adapter::cast(hMem))); + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp new file mode 100644 index 0000000000000..ebe7d32e4db40 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp @@ -0,0 +1,141 @@ +//===--------- platform.cpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "platform.hpp" + +ur_result_t cl_adapter::getPlatformVersion(cl_platform_id Plat, + oclv::OpenCLVersion &Version) { + + size_t PlatVerSize = 0; + CL_RETURN_ON_FAILURE( + clGetPlatformInfo(Plat, CL_PLATFORM_VERSION, 0, nullptr, &PlatVerSize)); + + std::string PlatVer(PlatVerSize, '\0'); + CL_RETURN_ON_FAILURE(clGetPlatformInfo(Plat, CL_PLATFORM_VERSION, PlatVerSize, + PlatVer.data(), nullptr)); + + Version = oclv::OpenCLVersion(PlatVer); + if (!Version.isValid()) { + return UR_RESULT_ERROR_INVALID_PLATFORM; + } + + return UR_RESULT_SUCCESS; +} + +static cl_int mapURPlatformInfoToCL(ur_platform_info_t URPropName) { + + switch (URPropName) { + case UR_PLATFORM_INFO_NAME: + return CL_PLATFORM_NAME; + case UR_PLATFORM_INFO_VENDOR_NAME: + return CL_PLATFORM_VENDOR; + case UR_PLATFORM_INFO_VERSION: + return CL_PLATFORM_VERSION; + case UR_PLATFORM_INFO_EXTENSIONS: + return CL_PLATFORM_EXTENSIONS; + case UR_PLATFORM_INFO_PROFILE: + return CL_PLATFORM_PROFILE; + default: + return -1; + } +} + +UR_DLLEXPORT ur_result_t UR_APICALL +urPlatformGetInfo(ur_platform_handle_t hPlatform, ur_platform_info_t propName, + size_t propSize, void *pPropValue, size_t *pSizeRet) { + + UrReturnHelper ReturnValue(propSize, pPropValue, pSizeRet); + const cl_int CLPropName = mapURPlatformInfoToCL(propName); + + switch (static_cast(propName)) { + case UR_PLATFORM_INFO_BACKEND: + return ReturnValue(UR_PLATFORM_BACKEND_OPENCL); + case UR_PLATFORM_INFO_NAME: + case UR_PLATFORM_INFO_VENDOR_NAME: + case UR_PLATFORM_INFO_VERSION: + case UR_PLATFORM_INFO_EXTENSIONS: + case UR_PLATFORM_INFO_PROFILE: { + CL_RETURN_ON_FAILURE( + clGetPlatformInfo(cl_adapter::cast(hPlatform), + CLPropName, propSize, pPropValue, pSizeRet)); + return UR_RESULT_SUCCESS; + } + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } +} + +UR_DLLEXPORT ur_result_t UR_APICALL +urPlatformGetApiVersion([[maybe_unused]] ur_platform_handle_t hPlatform, + ur_api_version_t *pVersion) { + *pVersion = UR_API_VERSION_CURRENT; + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, + ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { + + cl_int Result = + clGetPlatformIDs(cl_adapter::cast(NumEntries), + cl_adapter::cast(phPlatforms), + cl_adapter::cast(pNumPlatforms)); + + /* Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms */ + if (Result == CL_PLATFORM_NOT_FOUND_KHR) { + Result = CL_SUCCESS; + if (pNumPlatforms) { + *pNumPlatforms = 0; + } + } + + return mapCLErrorToUR(Result); +} + +UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( + ur_platform_handle_t hPlatform, ur_native_handle_t *phNativePlatform) { + *phNativePlatform = reinterpret_cast(hPlatform); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( + ur_native_handle_t hNativePlatform, const ur_platform_native_properties_t *, + ur_platform_handle_t *phPlatform) { + *phPlatform = reinterpret_cast(hNativePlatform); + return UR_RESULT_SUCCESS; +} + +// Returns plugin specific backend option. +// Current support is only for optimization options. +// Return '-cl-opt-disable' for pFrontendOption = -O0 and '' for others. +UR_APIEXPORT ur_result_t UR_APICALL +urPlatformGetBackendOption(ur_platform_handle_t, const char *pFrontendOption, + const char **ppPlatformOption) { + using namespace std::literals; + if (pFrontendOption == nullptr) + return UR_RESULT_SUCCESS; + if (pFrontendOption == ""sv) { + *ppPlatformOption = ""; + return UR_RESULT_SUCCESS; + } + // Return '-cl-opt-disable' for frontend_option = -O0 and '' for others. + if (!strcmp(pFrontendOption, "-O0")) { + *ppPlatformOption = "-cl-opt-disable"; + return UR_RESULT_SUCCESS; + } + if (pFrontendOption == "-O1"sv || pFrontendOption == "-O2"sv || + pFrontendOption == "-O3"sv) { + *ppPlatformOption = ""; + return UR_RESULT_SUCCESS; + } + if (pFrontendOption == "-ftarget-compile-fast"sv) { + *ppPlatformOption = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'"; + return UR_RESULT_SUCCESS; + } + return UR_RESULT_ERROR_INVALID_VALUE; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.hpp new file mode 100644 index 0000000000000..340955fe7c39c --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.hpp @@ -0,0 +1,15 @@ +//===--------- platform.hpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +namespace cl_adapter { +ur_result_t getPlatformVersion(cl_platform_id Plat, + oclv::OpenCLVersion &Version); +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp new file mode 100644 index 0000000000000..e219edb83f559 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp @@ -0,0 +1,419 @@ +//===--------- platform.cpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-------------------------------------------------------------------===// + +#include "common.hpp" +#include "context.hpp" +#include "device.hpp" +#include "platform.hpp" + +static ur_result_t getDevicesFromProgram( + ur_program_handle_t hProgram, + std::unique_ptr> &DevicesInProgram) { + + cl_uint DeviceCount; + CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), + CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), + &DeviceCount, nullptr)); + + if (DeviceCount < 1) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + + DevicesInProgram = std::make_unique>(DeviceCount); + + CL_RETURN_ON_FAILURE(clGetProgramInfo( + cl_adapter::cast(hProgram), CL_PROGRAM_DEVICES, + DeviceCount * sizeof(cl_device_id), (*DevicesInProgram).data(), nullptr)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( + ur_context_handle_t hContext, const void *pIL, size_t length, + const ur_program_properties_t *, ur_program_handle_t *phProgram) { + + std::unique_ptr> DevicesInCtx; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl_adapter::getDevicesFromContext(hContext, DevicesInCtx), phProgram); + + cl_platform_id CurPlatform; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + clGetDeviceInfo((*DevicesInCtx)[0], CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &CurPlatform, nullptr), + phProgram); + + oclv::OpenCLVersion PlatVer; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl_adapter::getPlatformVersion(CurPlatform, PlatVer), phProgram); + + cl_int Err = CL_SUCCESS; + if (PlatVer >= oclv::V2_1) { + + /* Make sure all devices support CL 2.1 or newer as well. */ + for (cl_device_id Dev : *DevicesInCtx) { + oclv::OpenCLVersion DevVer; + + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl_adapter::getDeviceVersion(Dev, DevVer), phProgram); + + /* If the device does not support CL 2.1 or greater, we need to make sure + * it supports the cl_khr_il_program extension. + */ + if (DevVer < oclv::V2_1) { + bool Supported = false; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl_adapter::checkDeviceExtensions(Dev, {"cl_khr_il_program"}, + Supported), + phProgram); + + if (!Supported) { + return UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE; + } + } + } + + *phProgram = cl_adapter::cast(clCreateProgramWithIL( + cl_adapter::cast(hContext), pIL, length, &Err)); + CL_RETURN_ON_FAILURE(Err); + } else { + + /* If none of the devices conform with CL 2.1 or newer make sure they all + * support the cl_khr_il_program extension. + */ + for (cl_device_id Dev : *DevicesInCtx) { + bool Supported = false; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl_adapter::checkDeviceExtensions(Dev, {"cl_khr_il_program"}, + Supported), + phProgram); + + if (!Supported) { + return UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE; + } + } + + using ApiFuncT = + cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *); + ApiFuncT FuncPtr = + reinterpret_cast(clGetExtensionFunctionAddressForPlatform( + CurPlatform, "clCreateProgramWithILKHR")); + + assert(FuncPtr != nullptr); + + *phProgram = cl_adapter::cast( + FuncPtr(cl_adapter::cast(hContext), pIL, length, &Err)); + CL_RETURN_ON_FAILURE(Err); + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( + ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, + const uint8_t *pBinary, const ur_program_properties_t *, + ur_program_handle_t *phProgram) { + + cl_int BinaryStatus; + cl_int CLResult; + *phProgram = cl_adapter::cast(clCreateProgramWithBinary( + cl_adapter::cast(hContext), cl_adapter::cast(1u), + cl_adapter::cast(&hDevice), &size, &pBinary, + &BinaryStatus, &CLResult)); + CL_RETURN_ON_FAILURE(BinaryStatus); + CL_RETURN_ON_FAILURE(CLResult); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramCompile([[maybe_unused]] ur_context_handle_t hContext, + ur_program_handle_t hProgram, const char *pOptions) { + + std::unique_ptr> DevicesInProgram; + CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); + + CL_RETURN_ON_FAILURE(clCompileProgram(cl_adapter::cast(hProgram), + DevicesInProgram->size(), + DevicesInProgram->data(), pOptions, 0, + nullptr, nullptr, nullptr, nullptr)); + + return UR_RESULT_SUCCESS; +} + +static cl_int mapURProgramInfoToCL(ur_program_info_t URPropName) { + + switch (static_cast(URPropName)) { + case UR_PROGRAM_INFO_REFERENCE_COUNT: + return CL_PROGRAM_REFERENCE_COUNT; + case UR_PROGRAM_INFO_CONTEXT: + return CL_PROGRAM_CONTEXT; + case UR_PROGRAM_INFO_NUM_DEVICES: + return CL_PROGRAM_NUM_DEVICES; + case UR_PROGRAM_INFO_DEVICES: + return CL_PROGRAM_DEVICES; + case UR_PROGRAM_INFO_SOURCE: + return CL_PROGRAM_SOURCE; + case UR_PROGRAM_INFO_BINARY_SIZES: + return CL_PROGRAM_BINARY_SIZES; + case UR_PROGRAM_INFO_BINARIES: + return CL_PROGRAM_BINARIES; + case UR_PROGRAM_INFO_NUM_KERNELS: + return CL_PROGRAM_NUM_KERNELS; + case UR_PROGRAM_INFO_KERNEL_NAMES: + return CL_PROGRAM_KERNEL_NAMES; + default: + return -1; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, + size_t propSize, void *pPropValue, size_t *pPropSizeRet) { + + CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), + mapURProgramInfoToCL(propName), + propSize, pPropValue, pPropSizeRet)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramBuild([[maybe_unused]] ur_context_handle_t hContext, + ur_program_handle_t hProgram, const char *pOptions) { + + std::unique_ptr> DevicesInProgram; + CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); + + CL_RETURN_ON_FAILURE(clBuildProgram( + cl_adapter::cast(hProgram), DevicesInProgram->size(), + DevicesInProgram->data(), pOptions, nullptr, nullptr)); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramLink(ur_context_handle_t hContext, uint32_t count, + const ur_program_handle_t *phPrograms, const char *pOptions, + ur_program_handle_t *phProgram) { + + cl_int CLResult; + *phProgram = cl_adapter::cast( + clLinkProgram(cl_adapter::cast(hContext), 0, nullptr, + pOptions, cl_adapter::cast(count), + cl_adapter::cast(phPrograms), nullptr, + nullptr, &CLResult)); + CL_RETURN_ON_FAILURE(CLResult); + + return UR_RESULT_SUCCESS; +} + +static cl_int mapURProgramBuildInfoToCL(ur_program_build_info_t URPropName) { + + switch (static_cast(URPropName)) { + case UR_PROGRAM_BUILD_INFO_STATUS: + return CL_PROGRAM_BUILD_STATUS; + case UR_PROGRAM_BUILD_INFO_OPTIONS: + return CL_PROGRAM_BUILD_OPTIONS; + case UR_PROGRAM_BUILD_INFO_LOG: + return CL_PROGRAM_BUILD_LOG; + case UR_PROGRAM_BUILD_INFO_BINARY_TYPE: + return CL_PROGRAM_BINARY_TYPE; + default: + return -1; + } +} + +static ur_program_binary_type_t +mapCLBinaryTypeToUR(cl_program_binary_type binaryType) { + switch (binaryType) { + case CL_PROGRAM_BINARY_TYPE_NONE: + return UR_PROGRAM_BINARY_TYPE_NONE; + case CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT: + return UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; + case CL_PROGRAM_BINARY_TYPE_LIBRARY: + return UR_PROGRAM_BINARY_TYPE_LIBRARY; + case CL_PROGRAM_BINARY_TYPE_EXECUTABLE: + return UR_PROGRAM_BINARY_TYPE_EXECUTABLE; + default: + return UR_PROGRAM_BINARY_TYPE_FORCE_UINT32; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, + ur_program_build_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { + case UR_PROGRAM_BUILD_INFO_BINARY_TYPE: + cl_program_binary_type cl_value; + CL_RETURN_ON_FAILURE(clGetProgramBuildInfo( + cl_adapter::cast(hProgram), + cl_adapter::cast(hDevice), + mapURProgramBuildInfoToCL(propName), sizeof(cl_program_binary_type), + &cl_value, nullptr)); + return ReturnValue(mapCLBinaryTypeToUR(cl_value)); + case UR_PROGRAM_BUILD_INFO_LOG: + case UR_PROGRAM_BUILD_INFO_OPTIONS: + case UR_PROGRAM_BUILD_INFO_STATUS: + CL_RETURN_ON_FAILURE( + clGetProgramBuildInfo(cl_adapter::cast(hProgram), + cl_adapter::cast(hDevice), + mapURProgramBuildInfoToCL(propName), propSize, + pPropValue, pPropSizeRet)); + return UR_RESULT_SUCCESS; + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramRetain(ur_program_handle_t hProgram) { + + CL_RETURN_ON_FAILURE(clRetainProgram(cl_adapter::cast(hProgram))); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramRelease(ur_program_handle_t hProgram) { + + CL_RETURN_ON_FAILURE( + clReleaseProgram(cl_adapter::cast(hProgram))); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( + ur_program_handle_t hProgram, ur_native_handle_t *phNativeProgram) { + + *phNativeProgram = reinterpret_cast(hProgram); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( + ur_native_handle_t hNativeProgram, ur_context_handle_t, + const ur_program_native_properties_t *, ur_program_handle_t *phProgram) { + + *phProgram = reinterpret_cast(hNativeProgram); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( + ur_program_handle_t hProgram, uint32_t count, + const ur_specialization_constant_info_t *pSpecConstants) { + + cl_program CLProg = cl_adapter::cast(hProgram); + cl_context Ctx = nullptr; + size_t RetSize = 0; + + CL_RETURN_ON_FAILURE(clGetProgramInfo(CLProg, CL_PROGRAM_CONTEXT, sizeof(Ctx), + &Ctx, &RetSize)); + + cl_ext::clSetProgramSpecializationConstant_fn F = nullptr; + const ur_result_t URResult = cl_ext::getExtFuncFromContext( + Ctx, cl_ext::ExtFuncPtrCache->clSetProgramSpecializationConstantCache, + cl_ext::SetProgramSpecializationConstantName, &F); + + if (URResult != UR_RESULT_SUCCESS) { + return URResult; + } + + for (uint32_t i = 0; i < count; ++i) { + CL_RETURN_ON_FAILURE(F(CLProg, pSpecConstants[i].id, pSpecConstants[i].size, + pSpecConstants[i].pValue)); + } + + return UR_RESULT_SUCCESS; +} + +// Function gets characters between delimeter's in str +// then checks if they are equal to the sub_str. +// returns true if there is at least one instance +// returns false if there are no instances of the name +static bool isInSeparatedString(const std::string &Str, char Delimiter, + const std::string &SubStr) { + size_t Beg = 0; + size_t Length = 0; + for (const auto &x : Str) { + if (x == Delimiter) { + if (Str.substr(Beg, Length) == SubStr) + return true; + + Beg += Length + 1; + Length = 0; + continue; + } + Length++; + } + if (Length != 0) + if (Str.substr(Beg, Length) == SubStr) + return true; + + return false; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( + ur_device_handle_t hDevice, ur_program_handle_t hProgram, + const char *pFunctionName, void **ppFunctionPointer) { + + cl_context CLContext = nullptr; + CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), + CL_PROGRAM_CONTEXT, sizeof(CLContext), + &CLContext, nullptr)); + + cl_ext::clGetDeviceFunctionPointer_fn FuncT = nullptr; + + UR_RETURN_ON_FAILURE( + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clGetDeviceFunctionPointerCache, + cl_ext::GetDeviceFunctionPointerName, &FuncT)); + + if (!FuncT) { + return UR_RESULT_ERROR_INVALID_FUNCTION_NAME; + } + + // Check if the kernel name exists to prevent the OpenCL runtime from throwing + // an exception with the cpu runtime. + // TODO: Use fallback search method if the clGetDeviceFunctionPointerINTEL + // extension does not exist. Can only be done once the CPU runtime no longer + // throws exceptions. + *ppFunctionPointer = 0; + size_t Size; + CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), + CL_PROGRAM_KERNEL_NAMES, 0, nullptr, + &Size)); + + std::string KernelNames(Size, ' '); + + CL_RETURN_ON_FAILURE(clGetProgramInfo( + cl_adapter::cast(hProgram), CL_PROGRAM_KERNEL_NAMES, + KernelNames.size(), &KernelNames[0], nullptr)); + + // Get rid of the null terminator and search for the kernel name. If the + // function cannot be found, return an error code to indicate it exists. + KernelNames.pop_back(); + if (!isInSeparatedString(KernelNames, ';', pFunctionName)) { + return UR_RESULT_ERROR_INVALID_KERNEL_NAME; + } + + const cl_int CLResult = + FuncT(cl_adapter::cast(hDevice), + cl_adapter::cast(hProgram), pFunctionName, + reinterpret_cast(ppFunctionPointer)); + // GPU runtime sometimes returns CL_INVALID_ARG_VALUE if the function address + // cannot be found but the kernel exists. As the kernel does exist, return + // that the function name is invalid. + if (CLResult == CL_INVALID_ARG_VALUE) { + *ppFunctionPointer = 0; + return UR_RESULT_ERROR_INVALID_FUNCTION_NAME; + } + + CL_RETURN_ON_FAILURE(CLResult); + + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp new file mode 100644 index 0000000000000..8b5496e619768 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp @@ -0,0 +1,161 @@ +//===--------- memory.cpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" +#include "platform.hpp" + +cl_command_queue_info mapURQueueInfoToCL(const ur_queue_info_t PropName) { + + switch (PropName) { + case UR_QUEUE_INFO_CONTEXT: + return CL_QUEUE_CONTEXT; + case UR_QUEUE_INFO_DEVICE: + return CL_QUEUE_DEVICE; + case UR_QUEUE_INFO_DEVICE_DEFAULT: + return CL_QUEUE_DEVICE_DEFAULT; + case UR_QUEUE_INFO_FLAGS: + return CL_QUEUE_PROPERTIES_ARRAY; + case UR_QUEUE_INFO_REFERENCE_COUNT: + return CL_QUEUE_REFERENCE_COUNT; + case UR_QUEUE_INFO_SIZE: + return CL_QUEUE_SIZE; + default: + return -1; + } +} + +cl_command_queue_properties +convertURQueuePropertiesToCL(const ur_queue_properties_t *URQueueProperties) { + cl_command_queue_properties CLCommandQueueProperties = 0; + + if (URQueueProperties->flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) { + CLCommandQueueProperties |= CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; + } + if (URQueueProperties->flags & UR_QUEUE_FLAG_PROFILING_ENABLE) { + CLCommandQueueProperties |= CL_QUEUE_PROFILING_ENABLE; + } + if (URQueueProperties->flags & UR_QUEUE_FLAG_ON_DEVICE) { + CLCommandQueueProperties |= CL_QUEUE_ON_DEVICE; + } + if (URQueueProperties->flags & UR_QUEUE_FLAG_ON_DEVICE_DEFAULT) { + CLCommandQueueProperties |= CL_QUEUE_ON_DEVICE_DEFAULT; + } + + return CLCommandQueueProperties; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_queue_properties_t *pProperties, ur_queue_handle_t *phQueue) { + + cl_platform_id CurPlatform; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + clGetDeviceInfo(cl_adapter::cast(hDevice), + CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &CurPlatform, + nullptr), + phQueue); + + cl_command_queue_properties CLProperties = + pProperties ? convertURQueuePropertiesToCL(pProperties) : 0; + + // Properties supported by OpenCL backend. + const cl_command_queue_properties SupportByOpenCL = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE | + CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; + + oclv::OpenCLVersion Version; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl_adapter::getPlatformVersion(CurPlatform, Version), phQueue); + + cl_int RetErr = CL_INVALID_OPERATION; + + if (Version < oclv::V2_0) { + *phQueue = cl_adapter::cast( + clCreateCommandQueue(cl_adapter::cast(hContext), + cl_adapter::cast(hDevice), + CLProperties & SupportByOpenCL, &RetErr)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; + } + + /* TODO: Add support for CL_QUEUE_PRIORITY_KHR */ + cl_queue_properties CreationFlagProperties[] = { + CL_QUEUE_PROPERTIES, CLProperties & SupportByOpenCL, 0}; + *phQueue = + cl_adapter::cast(clCreateCommandQueueWithProperties( + cl_adapter::cast(hContext), + cl_adapter::cast(hDevice), CreationFlagProperties, + &RetErr)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, + ur_queue_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + if (propName == UR_QUEUE_INFO_EMPTY) { + // OpenCL doesn't provide API to check the status of the queue. + return UR_RESULT_ERROR_INVALID_VALUE; + } + + cl_command_queue_info CLCommandQueueInfo = mapURQueueInfoToCL(propName); + + cl_int RetErr = clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CLCommandQueueInfo, propSize, + pPropValue, pPropSizeRet); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *, + ur_native_handle_t *phNativeQueue) { + return getNativeHandle(hQueue, phNativeQueue); +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( + ur_native_handle_t hNativeQueue, + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] const ur_queue_native_properties_t *pProperties, + ur_queue_handle_t *phQueue) { + + *phQueue = reinterpret_cast(hNativeQueue); + cl_int RetErr = + clRetainCommandQueue(cl_adapter::cast(hNativeQueue)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { + cl_int RetErr = clFinish(cl_adapter::cast(hQueue)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { + cl_int RetErr = clFinish(cl_adapter::cast(hQueue)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { + cl_int RetErr = + clRetainCommandQueue(cl_adapter::cast(hQueue)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { + cl_int RetErr = + clReleaseCommandQueue(cl_adapter::cast(hQueue)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp new file mode 100644 index 0000000000000..1201974f88f25 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp @@ -0,0 +1,196 @@ +//===--------- sampler.cpp - OpenCL Adapter --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" + +namespace { + +cl_sampler_info ur2CLSamplerInfo(ur_sampler_info_t URInfo) { + switch (URInfo) { +#define CASE(UR_INFO, CL_INFO) \ + case UR_INFO: \ + return CL_INFO; + + CASE(UR_SAMPLER_INFO_REFERENCE_COUNT, CL_SAMPLER_REFERENCE_COUNT) + CASE(UR_SAMPLER_INFO_CONTEXT, CL_SAMPLER_CONTEXT) + CASE(UR_SAMPLER_INFO_NORMALIZED_COORDS, CL_SAMPLER_NORMALIZED_COORDS) + CASE(UR_SAMPLER_INFO_ADDRESSING_MODE, CL_SAMPLER_ADDRESSING_MODE) + CASE(UR_SAMPLER_INFO_FILTER_MODE, CL_SAMPLER_FILTER_MODE) + +#undef CASE + + default: + cl_adapter::die("Unhandled: ur_sampler_info_t"); + } +} + +cl_addressing_mode ur2CLAddressingMode(ur_sampler_addressing_mode_t Mode) { + switch (Mode) { + +#define CASE(UR_MODE, CL_MODE) \ + case UR_MODE: \ + return CL_MODE; + + CASE(UR_SAMPLER_ADDRESSING_MODE_NONE, CL_ADDRESS_NONE); + CASE(UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP_TO_EDGE); + CASE(UR_SAMPLER_ADDRESSING_MODE_CLAMP, CL_ADDRESS_CLAMP); + CASE(UR_SAMPLER_ADDRESSING_MODE_REPEAT, CL_ADDRESS_REPEAT); + CASE(UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT, + CL_ADDRESS_MIRRORED_REPEAT); + +#undef CASE + + default: + cl_adapter::die("Unhandled: ur_sampler_addressing_mode_t"); + } +} + +cl_filter_mode ur2CLFilterMode(ur_sampler_filter_mode_t Mode) { + switch (Mode) { + +#define CASE(UR_MODE, CL_MODE) \ + case UR_MODE: \ + return CL_MODE; + + CASE(UR_SAMPLER_FILTER_MODE_NEAREST, CL_FILTER_NEAREST) + CASE(UR_SAMPLER_FILTER_MODE_LINEAR, CL_FILTER_LINEAR) + +#undef CASE + + default: + cl_adapter::die("Unhandled: ur_sampler_filter_mode_t"); + } +} + +ur_sampler_addressing_mode_t cl2URAddressingMode(cl_addressing_mode Mode) { + switch (Mode) { + +#define CASE(CL_MODE, UR_MODE) \ + case CL_MODE: \ + return UR_MODE; + + CASE(CL_ADDRESS_NONE, UR_SAMPLER_ADDRESSING_MODE_NONE); + CASE(CL_ADDRESS_CLAMP_TO_EDGE, UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE); + CASE(CL_ADDRESS_CLAMP, UR_SAMPLER_ADDRESSING_MODE_CLAMP); + CASE(CL_ADDRESS_REPEAT, UR_SAMPLER_ADDRESSING_MODE_REPEAT); + CASE(CL_ADDRESS_MIRRORED_REPEAT, + UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT); + +#undef CASE + + default: + cl_adapter::die("Unhandled: cl_addressing_mode"); + } +} + +ur_sampler_filter_mode_t cl2URFilterMode(cl_filter_mode Mode) { + switch (Mode) { +#define CASE(CL_MODE, UR_MODE) \ + case CL_MODE: \ + return UR_MODE; + + CASE(CL_FILTER_NEAREST, UR_SAMPLER_FILTER_MODE_NEAREST) + CASE(CL_FILTER_LINEAR, UR_SAMPLER_FILTER_MODE_LINEAR); + +#undef CASE + + default: + cl_adapter::die("Unhandled: cl_filter_mode"); + } +} + +void cl2URSamplerInfoValue(cl_sampler_info Info, void *InfoValue) { + if (!InfoValue) { + return; + } + switch (Info) { + case CL_SAMPLER_ADDRESSING_MODE: { + cl_addressing_mode CLValue = + *reinterpret_cast(InfoValue); + *reinterpret_cast(InfoValue) = + cl2URAddressingMode(CLValue); + break; + } + case CL_SAMPLER_FILTER_MODE: { + cl_filter_mode CLMode = *reinterpret_cast(InfoValue); + *reinterpret_cast(InfoValue) = + cl2URFilterMode(CLMode); + break; + } + + default: + break; + } +} + +} // namespace + +ur_result_t urSamplerCreate(ur_context_handle_t hContext, + const ur_sampler_desc_t *pDesc, + ur_sampler_handle_t *phSampler) { + + // Initialize properties according to OpenCL 2.1 spec. + ur_result_t ErrorCode; + cl_addressing_mode AddressingMode = + ur2CLAddressingMode(pDesc->addressingMode); + cl_filter_mode FilterMode = ur2CLFilterMode(pDesc->filterMode); + + // Always call OpenCL 1.0 API + *phSampler = cl_adapter::cast(clCreateSampler( + cl_adapter::cast(hContext), + static_cast(pDesc->normalizedCoords), AddressingMode, FilterMode, + cl_adapter::cast(&ErrorCode))); + + return mapCLErrorToUR(ErrorCode); +} + +UR_APIEXPORT ur_result_t UR_APICALL +urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, + size_t propSize, void *pPropValue, size_t *pPropSizeRet) { + + cl_sampler_info SamplerInfo = ur2CLSamplerInfo(propName); + static_assert(sizeof(cl_addressing_mode) == + sizeof(ur_sampler_addressing_mode_t)); + + if (ur_result_t Err = mapCLErrorToUR( + clGetSamplerInfo(cl_adapter::cast(hSampler), SamplerInfo, + propSize, pPropValue, pPropSizeRet))) { + return Err; + } + // Convert OpenCL returns to UR + cl2URSamplerInfoValue(SamplerInfo, pPropValue); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urSamplerRetain(ur_sampler_handle_t hSampler) { + return mapCLErrorToUR( + clRetainSampler(cl_adapter::cast(hSampler))); +} + +UR_APIEXPORT ur_result_t UR_APICALL +urSamplerRelease(ur_sampler_handle_t hSampler) { + return mapCLErrorToUR( + clReleaseSampler(cl_adapter::cast(hSampler))); +} + +UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( + ur_sampler_handle_t hSampler, ur_native_handle_t *phNativeSampler) { + *phNativeSampler = reinterpret_cast( + cl_adapter::cast(hSampler)); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( + ur_native_handle_t hNativeSampler, ur_context_handle_t, + const ur_sampler_native_properties_t *, ur_sampler_handle_t *phSampler) { + *phSampler = reinterpret_cast( + cl_adapter::cast(hNativeSampler)); + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp new file mode 100644 index 0000000000000..37f7b37121966 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -0,0 +1,350 @@ +//===--------- ur_interface_loader.cpp - Unified Runtime ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include +#include + +namespace { + +// TODO - this is a duplicate of what is in the L0 plugin +// We should move this to somewhere common +ur_result_t validateProcInputs(ur_api_version_t Version, void *pDdiTable) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + // Pre 1.0 we enforce loader and adapter must have same version. + // Post 1.0 only major version match should be required. + if (Version != UR_API_VERSION_CURRENT) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + return UR_RESULT_SUCCESS; +} +} // namespace + +#if defined(__cplusplus) +extern "C" { +#endif + +UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( + ur_api_version_t Version, ur_platform_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnCreateWithNativeHandle = urPlatformCreateWithNativeHandle; + pDdiTable->pfnGet = urPlatformGet; + pDdiTable->pfnGetApiVersion = urPlatformGetApiVersion; + pDdiTable->pfnGetInfo = urPlatformGetInfo; + pDdiTable->pfnGetNativeHandle = urPlatformGetNativeHandle; + pDdiTable->pfnGetBackendOption = urPlatformGetBackendOption; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( + ur_api_version_t Version, ur_context_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnCreate = urContextCreate; + pDdiTable->pfnCreateWithNativeHandle = urContextCreateWithNativeHandle; + pDdiTable->pfnGetInfo = urContextGetInfo; + pDdiTable->pfnGetNativeHandle = urContextGetNativeHandle; + pDdiTable->pfnRelease = urContextRelease; + pDdiTable->pfnRetain = urContextRetain; + pDdiTable->pfnSetExtendedDeleter = urContextSetExtendedDeleter; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( + ur_api_version_t Version, ur_event_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnCreateWithNativeHandle = urEventCreateWithNativeHandle; + pDdiTable->pfnGetInfo = urEventGetInfo; + pDdiTable->pfnGetNativeHandle = urEventGetNativeHandle; + pDdiTable->pfnGetProfilingInfo = urEventGetProfilingInfo; + pDdiTable->pfnRelease = urEventRelease; + pDdiTable->pfnRetain = urEventRetain; + pDdiTable->pfnSetCallback = urEventSetCallback; + pDdiTable->pfnWait = urEventWait; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( + ur_api_version_t Version, ur_program_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnBuild = urProgramBuild; + pDdiTable->pfnCompile = urProgramCompile; + pDdiTable->pfnCreateWithBinary = urProgramCreateWithBinary; + pDdiTable->pfnCreateWithIL = urProgramCreateWithIL; + pDdiTable->pfnCreateWithNativeHandle = urProgramCreateWithNativeHandle; + pDdiTable->pfnGetBuildInfo = urProgramGetBuildInfo; + pDdiTable->pfnGetFunctionPointer = urProgramGetFunctionPointer; + pDdiTable->pfnGetInfo = urProgramGetInfo; + pDdiTable->pfnGetNativeHandle = urProgramGetNativeHandle; + pDdiTable->pfnLink = urProgramLink; + pDdiTable->pfnRelease = urProgramRelease; + pDdiTable->pfnRetain = urProgramRetain; + pDdiTable->pfnSetSpecializationConstants = + urProgramSetSpecializationConstants; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( + ur_api_version_t Version, ur_kernel_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnCreate = urKernelCreate; + pDdiTable->pfnCreateWithNativeHandle = urKernelCreateWithNativeHandle; + pDdiTable->pfnGetGroupInfo = urKernelGetGroupInfo; + pDdiTable->pfnGetInfo = urKernelGetInfo; + pDdiTable->pfnGetNativeHandle = urKernelGetNativeHandle; + pDdiTable->pfnGetSubGroupInfo = urKernelGetSubGroupInfo; + pDdiTable->pfnRelease = urKernelRelease; + pDdiTable->pfnRetain = urKernelRetain; + pDdiTable->pfnSetArgLocal = urKernelSetArgLocal; + pDdiTable->pfnSetArgMemObj = urKernelSetArgMemObj; + pDdiTable->pfnSetArgPointer = urKernelSetArgPointer; + pDdiTable->pfnSetArgSampler = urKernelSetArgSampler; + pDdiTable->pfnSetArgValue = urKernelSetArgValue; + pDdiTable->pfnSetExecInfo = urKernelSetExecInfo; + pDdiTable->pfnSetSpecializationConstants = nullptr; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( + ur_api_version_t Version, ur_sampler_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnCreate = urSamplerCreate; + pDdiTable->pfnCreateWithNativeHandle = urSamplerCreateWithNativeHandle; + pDdiTable->pfnGetInfo = urSamplerGetInfo; + pDdiTable->pfnGetNativeHandle = urSamplerGetNativeHandle; + pDdiTable->pfnRelease = urSamplerRelease; + pDdiTable->pfnRetain = urSamplerRetain; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL +urGetMemProcAddrTable(ur_api_version_t Version, ur_mem_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnBufferCreate = urMemBufferCreate; + pDdiTable->pfnBufferPartition = urMemBufferPartition; + pDdiTable->pfnBufferCreateWithNativeHandle = + urMemBufferCreateWithNativeHandle; + pDdiTable->pfnGetInfo = urMemGetInfo; + pDdiTable->pfnGetNativeHandle = urMemGetNativeHandle; + pDdiTable->pfnImageCreate = urMemImageCreate; + pDdiTable->pfnImageGetInfo = urMemImageGetInfo; + pDdiTable->pfnRelease = urMemRelease; + pDdiTable->pfnRetain = urMemRetain; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( + ur_api_version_t Version, ur_enqueue_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnDeviceGlobalVariableRead = urEnqueueDeviceGlobalVariableRead; + pDdiTable->pfnDeviceGlobalVariableWrite = urEnqueueDeviceGlobalVariableWrite; + pDdiTable->pfnEventsWait = urEnqueueEventsWait; + pDdiTable->pfnEventsWaitWithBarrier = urEnqueueEventsWaitWithBarrier; + pDdiTable->pfnKernelLaunch = urEnqueueKernelLaunch; + pDdiTable->pfnMemBufferCopy = urEnqueueMemBufferCopy; + pDdiTable->pfnMemBufferCopyRect = urEnqueueMemBufferCopyRect; + pDdiTable->pfnMemBufferFill = urEnqueueMemBufferFill; + pDdiTable->pfnMemBufferMap = urEnqueueMemBufferMap; + pDdiTable->pfnMemBufferRead = urEnqueueMemBufferRead; + pDdiTable->pfnMemBufferReadRect = urEnqueueMemBufferReadRect; + pDdiTable->pfnMemBufferWrite = urEnqueueMemBufferWrite; + pDdiTable->pfnMemBufferWriteRect = urEnqueueMemBufferWriteRect; + pDdiTable->pfnMemImageCopy = urEnqueueMemImageCopy; + pDdiTable->pfnMemImageRead = urEnqueueMemImageRead; + pDdiTable->pfnMemImageWrite = urEnqueueMemImageWrite; + pDdiTable->pfnMemUnmap = urEnqueueMemUnmap; + pDdiTable->pfnUSMFill2D = urEnqueueUSMFill2D; + pDdiTable->pfnUSMFill = urEnqueueUSMFill; + pDdiTable->pfnUSMAdvise = urEnqueueUSMAdvise; + pDdiTable->pfnUSMMemcpy2D = urEnqueueUSMMemcpy2D; + pDdiTable->pfnUSMMemcpy = urEnqueueUSMMemcpy; + pDdiTable->pfnUSMPrefetch = urEnqueueUSMPrefetch; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( + ur_api_version_t Version, ur_global_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnInit = urInit; + pDdiTable->pfnTearDown = urTearDown; + pDdiTable->pfnAdapterGet = urAdapterGet; + pDdiTable->pfnAdapterRelease = urAdapterRelease; + pDdiTable->pfnAdapterRetain = urAdapterRetain; + pDdiTable->pfnAdapterGetLastError = urAdapterGetLastError; + pDdiTable->pfnAdapterGetInfo = urAdapterGetInfo; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( + ur_api_version_t Version, ur_queue_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnCreate = urQueueCreate; + pDdiTable->pfnCreateWithNativeHandle = urQueueCreateWithNativeHandle; + pDdiTable->pfnFinish = urQueueFinish; + pDdiTable->pfnFlush = urQueueFlush; + pDdiTable->pfnGetInfo = urQueueGetInfo; + pDdiTable->pfnGetNativeHandle = urQueueGetNativeHandle; + pDdiTable->pfnRelease = urQueueRelease; + pDdiTable->pfnRetain = urQueueRetain; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL +urGetUSMProcAddrTable(ur_api_version_t Version, ur_usm_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnDeviceAlloc = urUSMDeviceAlloc; + pDdiTable->pfnFree = urUSMFree; + pDdiTable->pfnGetMemAllocInfo = urUSMGetMemAllocInfo; + pDdiTable->pfnHostAlloc = urUSMHostAlloc; + pDdiTable->pfnPoolCreate = nullptr; + pDdiTable->pfnPoolRetain = nullptr; + pDdiTable->pfnPoolRelease = nullptr; + pDdiTable->pfnPoolGetInfo = nullptr; + pDdiTable->pfnSharedAlloc = urUSMSharedAlloc; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( + ur_api_version_t Version, ur_usm_exp_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + + pDdiTable->pfnImportExp = urUSMImportExp; + pDdiTable->pfnReleaseExp = urUSMReleaseExp; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( + ur_api_version_t Version, ur_device_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnCreateWithNativeHandle = urDeviceCreateWithNativeHandle; + pDdiTable->pfnGet = urDeviceGet; + pDdiTable->pfnGetGlobalTimestamps = urDeviceGetGlobalTimestamps; + pDdiTable->pfnGetInfo = urDeviceGetInfo; + pDdiTable->pfnGetNativeHandle = urDeviceGetNativeHandle; + pDdiTable->pfnPartition = urDevicePartition; + pDdiTable->pfnRelease = urDeviceRelease; + pDdiTable->pfnRetain = urDeviceRetain; + pDdiTable->pfnSelectBinary = urDeviceSelectBinary; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( + ur_api_version_t version, ur_command_buffer_exp_dditable_t *pDdiTable) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + pDdiTable->pfnCreateExp = urCommandBufferCreateExp; + pDdiTable->pfnRetainExp = urCommandBufferRetainExp; + pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp; + pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; + pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; + pDdiTable->pfnAppendMemcpyUSMExp = urCommandBufferAppendMemcpyUSMExp; + pDdiTable->pfnAppendMembufferCopyExp = urCommandBufferAppendMembufferCopyExp; + pDdiTable->pfnAppendMembufferCopyRectExp = + urCommandBufferAppendMembufferCopyRectExp; + pDdiTable->pfnAppendMembufferReadExp = urCommandBufferAppendMembufferReadExp; + pDdiTable->pfnAppendMembufferReadRectExp = + urCommandBufferAppendMembufferReadRectExp; + pDdiTable->pfnAppendMembufferWriteExp = + urCommandBufferAppendMembufferWriteExp; + pDdiTable->pfnAppendMembufferWriteRectExp = + urCommandBufferAppendMembufferWriteRectExp; + pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + + return retVal; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( + ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + pDdiTable->pfnEnablePeerAccessExp = urUsmP2PEnablePeerAccessExp; + pDdiTable->pfnDisablePeerAccessExp = urUsmP2PDisablePeerAccessExp; + pDdiTable->pfnPeerAccessGetInfoExp = urUsmP2PPeerAccessGetInfoExp; + + return retVal; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( + ur_api_version_t version, ur_bindless_images_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + pDdiTable->pfnUnsampledImageHandleDestroyExp = + urBindlessImagesUnsampledImageHandleDestroyExp; + pDdiTable->pfnSampledImageHandleDestroyExp = + urBindlessImagesSampledImageHandleDestroyExp; + pDdiTable->pfnImageAllocateExp = urBindlessImagesImageAllocateExp; + pDdiTable->pfnImageFreeExp = urBindlessImagesImageFreeExp; + pDdiTable->pfnUnsampledImageCreateExp = + urBindlessImagesUnsampledImageCreateExp; + pDdiTable->pfnSampledImageCreateExp = urBindlessImagesSampledImageCreateExp; + pDdiTable->pfnImageCopyExp = urBindlessImagesImageCopyExp; + pDdiTable->pfnImageGetInfoExp = urBindlessImagesImageGetInfoExp; + pDdiTable->pfnMipmapGetLevelExp = urBindlessImagesMipmapGetLevelExp; + pDdiTable->pfnMipmapFreeExp = urBindlessImagesMipmapFreeExp; + pDdiTable->pfnImportOpaqueFDExp = urBindlessImagesImportOpaqueFDExp; + pDdiTable->pfnMapExternalArrayExp = urBindlessImagesMapExternalArrayExp; + pDdiTable->pfnReleaseInteropExp = urBindlessImagesReleaseInteropExp; + pDdiTable->pfnImportExternalSemaphoreOpaqueFDExp = + urBindlessImagesImportExternalSemaphoreOpaqueFDExp; + pDdiTable->pfnDestroyExternalSemaphoreExp = + urBindlessImagesDestroyExternalSemaphoreExp; + pDdiTable->pfnWaitExternalSemaphoreExp = + urBindlessImagesWaitExternalSemaphoreExp; + pDdiTable->pfnSignalExternalSemaphoreExp = + urBindlessImagesSignalExternalSemaphoreExp; + return UR_RESULT_SUCCESS; +} + +#if defined(__cplusplus) +} // extern "C" +#endif diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp new file mode 100644 index 0000000000000..59fd53b6831d0 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp @@ -0,0 +1,429 @@ +//===--------- usm.cpp - OpenCL Adapter -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t, size_t size, void **ppMem) { + + void *Ptr = nullptr; + ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; + uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; + + cl_mem_alloc_flags_intel Flags = 0; + cl_mem_properties_intel Properties[3]; + + if (pUSMDesc && pUSMDesc->pNext && + static_cast(pUSMDesc->pNext)->stype == + UR_STRUCTURE_TYPE_USM_HOST_DESC) { + const auto *HostDesc = + static_cast(pUSMDesc->pNext); + + if (HostDesc->flags & UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) { + Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL; + } + Properties[0] = CL_MEM_ALLOC_FLAGS_INTEL; + Properties[1] = Flags; + Properties[2] = 0; + } else { + Properties[0] = 0; + } + + // First we need to look up the function pointer + clHostMemAllocINTEL_fn FuncPtr = nullptr; + cl_context CLContext = cl_adapter::cast(hContext); + RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, + cl_ext::HostMemAllocName, &FuncPtr); + + if (FuncPtr) { + Ptr = FuncPtr(CLContext, Properties, size, Alignment, + cl_adapter::cast(&RetVal)); + } + + *ppMem = Ptr; + + // ensure we aligned the allocation correctly + if (RetVal == UR_RESULT_SUCCESS && Alignment != 0) + assert(reinterpret_cast(*ppMem) % Alignment == 0 && + "allocation not aligned correctly"); + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, + size_t size, void **ppMem) { + + void *Ptr = nullptr; + ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; + uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; + + cl_mem_alloc_flags_intel Flags = 0; + cl_mem_properties_intel Properties[3]; + if (pUSMDesc && pUSMDesc->pNext && + static_cast(pUSMDesc->pNext)->stype == + UR_STRUCTURE_TYPE_USM_DEVICE_DESC) { + const auto *HostDesc = + static_cast(pUSMDesc->pNext); + + if (HostDesc->flags & UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) { + Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL; + } + if (HostDesc->flags & UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) { + Flags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL; + } + Properties[0] = CL_MEM_ALLOC_FLAGS_INTEL; + Properties[1] = Flags; + Properties[2] = 0; + } else { + Properties[0] = 0; + } + + // First we need to look up the function pointer + clDeviceMemAllocINTEL_fn FuncPtr = nullptr; + cl_context CLContext = cl_adapter::cast(hContext); + RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, + cl_ext::DeviceMemAllocName, &FuncPtr); + + if (FuncPtr) { + Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), + cl_adapter::cast(Properties), size, + Alignment, cl_adapter::cast(&RetVal)); + } + + *ppMem = Ptr; + + // ensure we aligned the allocation correctly + if (RetVal == UR_RESULT_SUCCESS && Alignment != 0) + assert(reinterpret_cast(*ppMem) % Alignment == 0 && + "allocation not aligned correctly"); + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, + size_t size, void **ppMem) { + + void *Ptr = nullptr; + ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; + uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; + + cl_mem_alloc_flags_intel Flags = 0; + const auto *NextStruct = + (pUSMDesc ? static_cast(pUSMDesc->pNext) + : nullptr); + while (NextStruct) { + if (NextStruct->stype == UR_STRUCTURE_TYPE_USM_HOST_DESC) { + const auto *HostDesc = + reinterpret_cast(NextStruct); + if (HostDesc->flags & UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) { + Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL; + } + } else if (NextStruct->stype == UR_STRUCTURE_TYPE_USM_DEVICE_DESC) { + const auto *DevDesc = + reinterpret_cast(NextStruct); + if (DevDesc->flags & UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) { + Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL; + } + if (DevDesc->flags & UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) { + Flags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL; + } + } + NextStruct = static_cast(NextStruct->pNext); + } + + cl_mem_properties_intel Properties[3] = {CL_MEM_ALLOC_FLAGS_INTEL, Flags, 0}; + + // Passing a flags value of 0 doesn't work, so truncate the properties + if (Flags == 0) { + Properties[0] = 0; + } + + // First we need to look up the function pointer + clSharedMemAllocINTEL_fn FuncPtr = nullptr; + cl_context CLContext = cl_adapter::cast(hContext); + RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, + cl_ext::SharedMemAllocName, &FuncPtr); + + if (FuncPtr) { + Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), + cl_adapter::cast(Properties), size, + Alignment, cl_adapter::cast(&RetVal)); + } + + *ppMem = Ptr; + + assert(Alignment == 0 || + (RetVal == UR_RESULT_SUCCESS && + reinterpret_cast(*ppMem) % Alignment == 0)); + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, + void *pMem) { + + // Use a blocking free to avoid issues with indirect access from kernels that + // might be still running. + clMemBlockingFreeINTEL_fn FuncPtr = nullptr; + + cl_context CLContext = cl_adapter::cast(hContext); + ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; + RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, + cl_ext::MemBlockingFreeName, &FuncPtr); + + if (FuncPtr) { + RetVal = mapCLErrorToUR(FuncPtr(CLContext, pMem)); + } + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( + ur_queue_handle_t hQueue, void *ptr, size_t patternSize, + const void *pPattern, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + // Have to look up the context from the kernel + cl_context CLContext; + cl_int CLErr = clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, + sizeof(cl_context), &CLContext, nullptr); + if (CLErr != CL_SUCCESS) { + return mapCLErrorToUR(CLErr); + } + + clEnqueueMemFillINTEL_fn FuncPtr = nullptr; + ur_result_t RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemFillINTELCache, + cl_ext::EnqueueMemFillName, &FuncPtr); + + if (FuncPtr) { + RetVal = mapCLErrorToUR( + FuncPtr(cl_adapter::cast(hQueue), ptr, pPattern, + patternSize, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + } + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( + ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + // Have to look up the context from the kernel + cl_context CLContext; + cl_int CLErr = clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, + sizeof(cl_context), &CLContext, nullptr); + if (CLErr != CL_SUCCESS) { + return mapCLErrorToUR(CLErr); + } + + clEnqueueMemcpyINTEL_fn FuncPtr = nullptr; + ur_result_t RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, + cl_ext::EnqueueMemcpyName, &FuncPtr); + + if (FuncPtr) { + RetVal = mapCLErrorToUR( + FuncPtr(cl_adapter::cast(hQueue), blocking, pDst, + pSrc, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + } + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( + ur_queue_handle_t hQueue, [[maybe_unused]] const void *pMem, + [[maybe_unused]] size_t size, ur_usm_migration_flags_t flags, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + // flags is currently unused so fail if set + if (flags != 0) + return UR_RESULT_ERROR_INVALID_VALUE; + + return mapCLErrorToUR(clEnqueueMarkerWithWaitList( + cl_adapter::cast(hQueue), numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + /* + // Use this once impls support it. + // Have to look up the context from the kernel + cl_context CLContext; + cl_int CLErr = + clGetCommandQueueInfo(cl_adapter::cast(hQueue), + CL_QUEUE_CONTEXT, sizeof(cl_context), + &CLContext, nullptr); + if (CLErr != CL_SUCCESS) { + return map_cl_error_to_ur(CLErr); + } + + clEnqueueMigrateMemINTEL_fn FuncPtr; + ur_result_t Err = cl_ext::getExtFuncFromContext( + CLContext, "clEnqueueMigrateMemINTEL", &FuncPtr); + + ur_result_t RetVal; + if (Err != UR_RESULT_SUCCESS) { + RetVal = Err; + } else { + RetVal = map_cl_error_to_ur( + FuncPtr(cl_adapter::cast(hQueue), pMem, size, flags, + numEventsInWaitList, + reinterpret_cast(phEventWaitList), + reinterpret_cast(phEvent))); + } + */ +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( + ur_queue_handle_t hQueue, [[maybe_unused]] const void *pMem, + [[maybe_unused]] size_t size, [[maybe_unused]] ur_usm_advice_flags_t advice, + ur_event_handle_t *phEvent) { + + return mapCLErrorToUR(clEnqueueMarkerWithWaitList( + cl_adapter::cast(hQueue), 0, nullptr, + reinterpret_cast(phEvent))); + + /* + // Change to use this once drivers support it. + // Have to look up the context from the kernel + cl_context CLContext; + cl_int CLErr = + clGetCommandQueueInfo(cl_adapter::cast(hQueue), + CL_QUEUE_CONTEXT, + sizeof(cl_context), + &CLContext, nullptr); + if (CLErr != CL_SUCCESS) { + return map_cl_error_to_ur(CLErr); + } + + clEnqueueMemAdviseINTEL_fn FuncPtr; + ur_result_t Err = + cl_ext::getExtFuncFromContext( + CLContext, "clEnqueueMemAdviseINTEL", &FuncPtr); + + ur_result_t RetVal; + if (Err != UR_RESULT_SUCCESS) { + RetVal = Err; + } else { + RetVal = + map_cl_error_to_ur(FuncPtr(cl_adapter::cast(hQueue), pMem, + size, advice, 0, nullptr, reinterpret_cast(phEvent))); + } + */ +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( + [[maybe_unused]] ur_queue_handle_t hQueue, [[maybe_unused]] void *pMem, + [[maybe_unused]] size_t pitch, [[maybe_unused]] size_t patternSize, + [[maybe_unused]] const void *pPattern, [[maybe_unused]] size_t width, + [[maybe_unused]] size_t height, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_INVALID_OPERATION; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( + [[maybe_unused]] ur_queue_handle_t hQueue, [[maybe_unused]] bool blocking, + [[maybe_unused]] void *pDst, [[maybe_unused]] size_t dstPitch, + [[maybe_unused]] const void *pSrc, [[maybe_unused]] size_t srcPitch, + [[maybe_unused]] size_t width, [[maybe_unused]] size_t height, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_INVALID_OPERATION; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, + ur_usm_alloc_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + + clGetMemAllocInfoINTEL_fn FuncPtr = nullptr; + cl_context CLContext = cl_adapter::cast(hContext); + ur_result_t RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clGetMemAllocInfoINTELCache, + cl_ext::GetMemAllocInfoName, &FuncPtr); + + cl_mem_info_intel PropNameCL; + switch (propName) { + case UR_USM_ALLOC_INFO_TYPE: + PropNameCL = CL_MEM_ALLOC_TYPE_INTEL; + break; + case UR_USM_ALLOC_INFO_BASE_PTR: + PropNameCL = CL_MEM_ALLOC_BASE_PTR_INTEL; + break; + case UR_USM_ALLOC_INFO_SIZE: + PropNameCL = CL_MEM_ALLOC_SIZE_INTEL; + break; + case UR_USM_ALLOC_INFO_DEVICE: + PropNameCL = CL_MEM_ALLOC_DEVICE_INTEL; + break; + default: + return UR_RESULT_ERROR_INVALID_VALUE; + } + + if (FuncPtr) { + RetVal = + mapCLErrorToUR(FuncPtr(cl_adapter::cast(hContext), pMem, + PropNameCL, propSize, pPropValue, pPropSizeRet)); + if (RetVal == UR_RESULT_SUCCESS && pPropValue && + propName == UR_USM_ALLOC_INFO_TYPE) { + auto *AllocTypeCL = + static_cast(pPropValue); + ur_usm_type_t AllocTypeUR; + switch (*AllocTypeCL) { + case CL_MEM_TYPE_HOST_INTEL: + AllocTypeUR = UR_USM_TYPE_HOST; + break; + case CL_MEM_TYPE_DEVICE_INTEL: + AllocTypeUR = UR_USM_TYPE_DEVICE; + break; + case CL_MEM_TYPE_SHARED_INTEL: + AllocTypeUR = UR_USM_TYPE_SHARED; + break; + case CL_MEM_TYPE_UNKNOWN_INTEL: + default: + AllocTypeUR = UR_USM_TYPE_UNKNOWN; + break; + } + auto *AllocTypeOut = static_cast(pPropValue); + *AllocTypeOut = AllocTypeUR; + } + } + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMImportExp([[maybe_unused]] ur_context_handle_t Context, + [[maybe_unused]] void *HostPtr, [[maybe_unused]] size_t Size) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMReleaseExp([[maybe_unused]] ur_context_handle_t Context, + [[maybe_unused]] void *HostPtr) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/usm_p2p.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/usm_p2p.cpp new file mode 100644 index 0000000000000..e7dd9083342a0 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/usm_p2p.cpp @@ -0,0 +1,39 @@ +//===--------- usm_p2p.cpp - OpenCL Adapter-------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------===// + +#include "common.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL +urUsmP2PEnablePeerAccessExp([[maybe_unused]] ur_device_handle_t commandDevice, + [[maybe_unused]] ur_device_handle_t peerDevice) { + + cl_adapter::die( + "Experimental P2P feature is not implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urUsmP2PDisablePeerAccessExp([[maybe_unused]] ur_device_handle_t commandDevice, + [[maybe_unused]] ur_device_handle_t peerDevice) { + + cl_adapter::die( + "Experimental P2P feature is not implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( + [[maybe_unused]] ur_device_handle_t commandDevice, + [[maybe_unused]] ur_device_handle_t peerDevice, + [[maybe_unused]] ur_exp_peer_info_t propName, + [[maybe_unused]] size_t propSize, [[maybe_unused]] void *pPropValue, + [[maybe_unused]] size_t *pPropSizeRet) { + + cl_adapter::die( + "Experimental P2P feature is not implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/sycl/plugins/unified_runtime/ur/ur.hpp b/sycl/plugins/unified_runtime/ur/ur.hpp index 37a6958b17612..d2846dfb71315 100644 --- a/sycl/plugins/unified_runtime/ur/ur.hpp +++ b/sycl/plugins/unified_runtime/ur/ur.hpp @@ -271,7 +271,7 @@ class UrReturnHelper { param_value_size_ret, t); } - // Array return value where element type is differrent from T + // Array return value where element type is different from T template ur_result_t operator()(const T *t, size_t s) { return ur::getInfoArray(s, param_value_size, param_value, diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index fc8355fd7bacd..11ee74902849b 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -9,31 +9,79 @@ piContextCreate piContextGetInfo +piContextRelease +piContextRetain piDeviceGetInfo +piDevicePartition +piDeviceRelease +piDeviceRetain piDevicesGet +piEnqueueEventsWait +piEnqueueEventsWaitWithBarrier +piEnqueueKernelLaunch +piEnqueueMemBufferCopy +piEnqueueMemBufferCopyRect +piEnqueueMemBufferFill piEnqueueMemBufferMap +piEnqueueMemBufferRead +piEnqueueMemBufferReadRect +piEnqueueMemBufferWrite +piEnqueueMemBufferWriteRect +piEnqueueMemImageCopy +piEnqueueMemImageFill +piEnqueueMemImageRead +piEnqueueMemImageWrite +piEnqueueMemUnmap piEventCreate piEventGetInfo +piEventGetProfilingInfo +piEventRelease +piEventRetain +piEventSetCallback +piEventSetStatus +piEventsWait piGetDeviceAndHostTimer piKernelCreate piKernelGetGroupInfo +piKernelGetInfo piKernelGetSubGroupInfo +piKernelRelease +piKernelRetain +piKernelSetArg piKernelSetExecInfo piMemBufferCreate piMemBufferPartition +piMemGetInfo piMemImageCreate +piMemImageGetInfo +piMemRelease +piMemRetain piPlatformGetInfo piPlatformsGet piPluginGetBackendOption piPluginGetLastError piPluginInit +piProgramBuild +piProgramCompile piProgramCreate piProgramCreateWithBinary +piProgramGetBuildInfo +piProgramGetInfo piProgramLink +piProgramRelease +piProgramRetain piQueueCreate +piQueueFinish +piQueueFlush piQueueGetInfo +piQueueRelease +piQueueRetain piSamplerCreate +piSamplerGetInfo +piSamplerRelease +piSamplerRetain piTearDown +piextBindlessImageSamplerCreate piextCommandBufferCreate piextCommandBufferFinalize piextCommandBufferMemBufferCopy @@ -48,14 +96,20 @@ piextCommandBufferRelease piextCommandBufferRetain piextContextCreateWithNativeHandle piextContextGetNativeHandle +piextContextSetExtendedDeleter +piextDestroyExternalSemaphore piextDeviceCreateWithNativeHandle piextDeviceGetNativeHandle piextDeviceSelectBinary +piextDisablePeerAccess +piextEnablePeerAccess piextEnqueueCommandBuffer piextEnqueueReadHostPipe piextEnqueueWriteHostPipe piextEventCreateWithNativeHandle +piextEventGetNativeHandle piextGetDeviceFunctionPointer +piextImportExternalSemaphoreOpaqueFD piextKernelCreateWithNativeHandle piextKernelGetNativeHandle piextKernelSetArgMemObj @@ -63,15 +117,31 @@ piextKernelSetArgPointer piextKernelSetArgSampler piextMemCreateWithNativeHandle piextMemGetNativeHandle +piextMemImageAllocate +piextMemImageCopy piextMemImageCreateWithNativeHandle +piextMemImageFree +piextMemImageGetInfo +piextMemImportOpaqueFD +piextMemMapExternalArray +piextMemMipmapFree +piextMemMipmapGetLevel +piextMemReleaseInterop +piextMemSampledImageCreate +piextMemSampledImageHandleDestroy +piextMemUnsampledImageCreate +piextMemUnsampledImageHandleDestroy +piextPeerAccessGetInfo piextPlatformCreateWithNativeHandle piextPlatformGetNativeHandle +piextPluginGetOpaqueData piextProgramCreateWithNativeHandle piextProgramGetNativeHandle piextProgramSetSpecializationConstant piextQueueCreate piextQueueCreateWithNativeHandle piextQueueGetNativeHandle +piextSignalExternalSemaphore piextUSMDeviceAlloc piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise @@ -84,5 +154,7 @@ piextUSMFree piextUSMGetMemAllocInfo piextUSMHostAlloc piextUSMImport +piextUSMPitchedAlloc piextUSMRelease piextUSMSharedAlloc +piextWaitExternalSemaphore