From 5211257d4d7942acca9ba5678b9a51496ffe7ccf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Wed, 17 May 2023 17:43:11 +0000 Subject: [PATCH 01/36] [SYCL][OpenCL] Create OpenCL adapter. Port Platform, Context and Device API's from PI to UR. --- sycl/plugins/CMakeLists.txt | 4 +- sycl/plugins/opencl/CMakeLists.txt | 18 + sycl/plugins/opencl/pi_opencl.cpp | 613 +--------- sycl/plugins/unified_runtime/pi2ur.hpp | 8 +- .../ur/adapters/opencl/common.cpp | 44 + .../ur/adapters/opencl/common.hpp | 133 +++ .../ur/adapters/opencl/context.cpp | 121 ++ .../ur/adapters/opencl/device.cpp | 1018 +++++++++++++++++ .../ur/adapters/opencl/device.hpp | 19 + .../ur/adapters/opencl/platform.cpp | 122 ++ .../adapters/opencl/ur_interface_loader.cpp | 258 +++++ 11 files changed, 1767 insertions(+), 591 deletions(-) create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp diff --git a/sycl/plugins/CMakeLists.txt b/sycl/plugins/CMakeLists.txt index 85fe986249cb3..395c9bb732773 100755 --- a/sycl/plugins/CMakeLists.txt +++ b/sycl/plugins/CMakeLists.txt @@ -8,8 +8,8 @@ foreach(plugin ${SYCL_ENABLE_PLUGINS}) add_subdirectory(${plugin}) endforeach() -# level_zero plugin depends today on unified_runtime plugin +# level_zero and opencl plugins depend today on unified_runtime plugin # and unified_runtime plugin is not an independent plugin, adding it explicitly -if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) +if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS OR "opencl" IN_LIST SYCL_ENABLE_PLUGINS) add_subdirectory(unified_runtime) endif() diff --git a/sycl/plugins/opencl/CMakeLists.txt b/sycl/plugins/opencl/CMakeLists.txt index 9943827e48788..65c535354f8ee 100644 --- a/sycl/plugins/opencl/CMakeLists.txt +++ b/sycl/plugins/opencl/CMakeLists.txt @@ -12,9 +12,27 @@ add_sycl_plugin(opencl SOURCES + # Some code is shared with the UR adapter + "../unified_runtime/pi2ur.hpp" + "../unified_runtime/pi2ur.cpp" + "../unified_runtime/ur/ur.hpp" + "../unified_runtime/ur/ur.cpp" + "../unified_runtime/ur/usm_allocator.cpp" + "../unified_runtime/ur/usm_allocator.hpp" + "../unified_runtime/ur/adapters/opencl/common.cpp" + "../unified_runtime/ur/adapters/opencl/common.hpp" + "../unified_runtime/ur/adapters/opencl/context.cpp" + "../unified_runtime/ur/adapters/opencl/device.cpp" + "../unified_runtime/ur/adapters/opencl/device.hpp" + "../unified_runtime/ur/adapters/opencl/platform.cpp" + # --- "${sycl_inc_dir}/sycl/detail/pi.h" "pi_opencl.cpp" + INCLUDE_DIRS + ${sycl_inc_dir} + ${CMAKE_CURRENT_SOURCE_DIR}/../unified_runtime LIBRARIES + UnifiedRuntime-Headers OpenCL-ICD ) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index a463b1281c228..8e261a324bc30 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -33,6 +33,8 @@ #include #include +#include "../unified_runtime/ur/adapters/opencl/common.hpp" + #define CHECK_ERR_SET_NULL_RET(err, ptr, reterr) \ if (err != CL_SUCCESS) { \ if (ptr != nullptr) \ @@ -79,25 +81,6 @@ CONSTFIX char clEnqueueWriteHostPipeName[] = "clEnqueueWriteHostPipeINTEL"; #undef CONSTFIX -// Global variables for PI_ERROR_PLUGIN_SPECIFIC_ERROR -constexpr size_t MaxMessageSize = 256; -thread_local pi_result ErrorMessageCode = PI_SUCCESS; -thread_local char ErrorMessage[MaxMessageSize]; - -// Utility function for setting a message and warning -[[maybe_unused]] static void setErrorMessage(const char *message, - pi_result error_code) { - assert(strlen(message) <= MaxMessageSize); - strcpy(ErrorMessage, message); - ErrorMessageCode = error_code; -} - -// Returns plugin specific error and warning messages -pi_result piPluginGetLastError(char **message) { - *message = &ErrorMessage[0]; - return ErrorMessageCode; -} - // Returns plugin specific backend option. pi_result piPluginGetBackendOption(pi_platform, const char *frontend_option, const char **backend_option) { @@ -352,467 +335,6 @@ static pi_result USMSetIndirectAccess(pi_kernel kernel) { extern "C" { -pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, - size_t paramValueSize, void *paramValue, - size_t *paramValueSizeRet) { - switch (paramName) { - // TODO: Check regularly to see if support in enabled in OpenCL. - // Intel GPU EU device-specific information extensions. - // Some of the queries are enabled by cl_intel_device_attribute_query - // extension, but it's not yet in the Registry. - case PI_DEVICE_INFO_PCI_ADDRESS: - case PI_DEVICE_INFO_GPU_EU_COUNT: - case PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH: - case PI_DEVICE_INFO_GPU_SLICES: - case PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: - case PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: - case PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU: - case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH: - // TODO: Check if device UUID extension is enabled in OpenCL. - // For details about Intel UUID extension, see - // sycl/doc/extensions/supported/sycl_ext_intel_device_info.md - case PI_DEVICE_INFO_UUID: - return PI_ERROR_INVALID_VALUE; - case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { - // This query is missing before OpenCL 3.0 - // Check version and handle appropriately - OCLV::OpenCLVersion devVer; - cl_device_id deviceID = cast(device); - cl_int ret_err = getDeviceVersion(deviceID, devVer); - if (ret_err != CL_SUCCESS) { - return cast(ret_err); - } - - // Minimum required capability to be returned - // For OpenCL 1.2, this is all that is required - pi_memory_order_capabilities capabilities = PI_MEMORY_ORDER_RELAXED; - - if (devVer >= OCLV::V3_0) { - // For OpenCL >=3.0, the query should be implemented - cl_device_atomic_capabilities cl_capabilities = 0; - cl_int ret_err = clGetDeviceInfo( - deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, - sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr); - if (ret_err != CL_SUCCESS) - return cast(ret_err); - - // Mask operation to only consider atomic_memory_order* capabilities - cl_int mask = CL_DEVICE_ATOMIC_ORDER_RELAXED | - CL_DEVICE_ATOMIC_ORDER_ACQ_REL | - CL_DEVICE_ATOMIC_ORDER_SEQ_CST; - cl_capabilities &= mask; - - // The memory order capabilities are hierarchical, if one is implied, all - // preceding capbilities are implied as well. Especially in the case of - // ACQ_REL. - if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { - capabilities |= PI_MEMORY_ORDER_SEQ_CST; - } - if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) { - capabilities |= PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_ACQUIRE | - PI_MEMORY_ORDER_RELEASE; - } - } else if (devVer >= OCLV::V2_0) { - // For OpenCL 2.x, return all capabilities - // (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_memory_consistency_model) - capabilities |= PI_MEMORY_ORDER_ACQUIRE | PI_MEMORY_ORDER_RELEASE | - PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_SEQ_CST; - } - - if (paramValue) { - if (paramValueSize < sizeof(pi_memory_order_capabilities)) - return static_cast(CL_INVALID_VALUE); - - std::memcpy(paramValue, &capabilities, sizeof(capabilities)); - } - - if (paramValueSizeRet) - *paramValueSizeRet = sizeof(capabilities); - - return static_cast(CL_SUCCESS); - } - case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { - // Initialize result to minimum mandated capabilities according to - // SYCL2020 4.6.3.2 - // Because scopes are hierarchical, wider scopes support all narrower - // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and - // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382) - pi_memory_scope_capabilities result = PI_MEMORY_SCOPE_WORK_ITEM | - PI_MEMORY_SCOPE_SUB_GROUP | - PI_MEMORY_SCOPE_WORK_GROUP; - - OCLV::OpenCLVersion devVer; - - cl_device_id deviceID = cast(device); - cl_int ret_err = getDeviceVersion(deviceID, devVer); - if (ret_err != CL_SUCCESS) - return static_cast(ret_err); - - cl_device_atomic_capabilities devCapabilities = 0; - if (devVer >= OCLV::V3_0) { - ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, - sizeof(cl_device_atomic_capabilities), - &devCapabilities, nullptr); - if (ret_err != CL_SUCCESS) - return static_cast(ret_err); - assert((devCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && - "Violates minimum mandated guarantee"); - - // Because scopes are hierarchical, wider scopes support all narrower - // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and - // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382) - // We already initialized to these minimum mandated capabilities. Just - // check wider scopes. - if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { - result |= PI_MEMORY_SCOPE_DEVICE; - } - - if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { - result |= PI_MEMORY_SCOPE_SYSTEM; - } - - } else { - // This info is only available in OpenCL version >= 3.0 - // Just return minimum mandated capabilities for older versions. - // OpenCL 1.x minimum mandated capabilities are WORK_GROUP, we - // already initialized using it. - if (devVer >= OCLV::V2_0) { - // OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE | - // ALL_DEVICES - result |= PI_MEMORY_SCOPE_DEVICE | PI_MEMORY_SCOPE_SYSTEM; - } - } - if (paramValue) { - if (paramValueSize < sizeof(cl_device_atomic_capabilities)) - return PI_ERROR_INVALID_VALUE; - - std::memcpy(paramValue, &result, sizeof(result)); - } - if (paramValueSizeRet) - *paramValueSizeRet = sizeof(result); - return PI_SUCCESS; - } - case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { - // Initialize result to minimum mandated capabilities according to - // SYCL2020 4.6.3.2 - pi_memory_order_capabilities result = - PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | - PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL; - - OCLV::OpenCLVersion devVer; - - cl_device_id deviceID = cast(device); - cl_int ret_err = getDeviceVersion(deviceID, devVer); - if (ret_err != CL_SUCCESS) - return static_cast(ret_err); - - cl_device_atomic_capabilities devCapabilities = 0; - if (devVer >= OCLV::V3_0) { - ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, - sizeof(cl_device_atomic_capabilities), - &devCapabilities, nullptr); - if (ret_err != CL_SUCCESS) - return static_cast(ret_err); - assert((devCapabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED) && - "Violates minimum mandated guarantee"); - assert((devCapabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) && - "Violates minimum mandated guarantee"); - - // We already initialized to minimum mandated capabilities. Just - // check stronger orders. - if (devCapabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { - result |= PI_MEMORY_ORDER_SEQ_CST; - } - - } else { - // This info is only available in OpenCL version >= 3.0 - // Just return minimum mandated capabilities for older versions. - // OpenCL 1.x minimum mandated capabilities are RELAXED | ACQ_REL, we - // already initialized using these. - if (devVer >= OCLV::V2_0) { - // OpenCL 2.x minimum mandated capabilities are RELAXED | ACQ_REL | - // SEQ_CST - result |= PI_MEMORY_ORDER_SEQ_CST; - } - } - if (paramValue) { - if (paramValueSize < sizeof(cl_device_atomic_capabilities)) - return PI_ERROR_INVALID_VALUE; - - std::memcpy(paramValue, &result, sizeof(result)); - } - if (paramValueSizeRet) - *paramValueSizeRet = sizeof(result); - return PI_SUCCESS; - } - case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { - // Initialize result to minimum mandated capabilities according to - // SYCL2020 4.6.3.2. - // Because scopes are hierarchical, wider scopes support all narrower - // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and - // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382) - pi_memory_scope_capabilities result = PI_MEMORY_SCOPE_WORK_ITEM | - PI_MEMORY_SCOPE_SUB_GROUP | - PI_MEMORY_SCOPE_WORK_GROUP; - - OCLV::OpenCLVersion devVer; - - cl_device_id deviceID = cast(device); - cl_int ret_err = getDeviceVersion(deviceID, devVer); - if (ret_err != CL_SUCCESS) - return static_cast(ret_err); - - cl_device_atomic_capabilities devCapabilities = 0; - if (devVer >= OCLV::V3_0) { - ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, - sizeof(cl_device_atomic_capabilities), - &devCapabilities, nullptr); - if (ret_err != CL_SUCCESS) - return static_cast(ret_err); - assert((devCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && - "Violates minimum mandated guarantee"); - - // Because scopes are hierarchical, wider scopes support all narrower - // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and - // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382) - // We already initialized to these minimum mandated capabilities. Just - // check wider scopes. - if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { - result |= PI_MEMORY_SCOPE_DEVICE; - } - - if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { - result |= PI_MEMORY_SCOPE_SYSTEM; - } - - } else { - // This info is only available in OpenCL version >= 3.0 - // Just return minimum mandated capabilities for older versions. - // OpenCL 1.x minimum mandated capabilities are WORK_GROUP, we - // already initialized using it. - if (devVer >= OCLV::V2_0) { - // OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE | - // ALL_DEVICES - result |= PI_MEMORY_SCOPE_DEVICE | PI_MEMORY_SCOPE_SYSTEM; - } - } - if (paramValue) { - if (paramValueSize < sizeof(cl_device_atomic_capabilities)) - return PI_ERROR_INVALID_VALUE; - - std::memcpy(paramValue, &result, sizeof(result)); - } - if (paramValueSizeRet) - *paramValueSizeRet = sizeof(result); - return PI_SUCCESS; - } - case PI_DEVICE_INFO_ATOMIC_64: { - cl_int ret_err = CL_SUCCESS; - cl_bool result = CL_FALSE; - bool supported = false; - - ret_err = checkDeviceExtensions( - cast(device), - {"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics"}, - supported); - if (ret_err != CL_SUCCESS) - return static_cast(ret_err); - - result = supported; - std::memcpy(paramValue, &result, sizeof(cl_bool)); - return PI_SUCCESS; - } - case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS: { - // bfloat16 math functions are not yet supported on Intel GPUs. - bool result = false; - if (paramValueSize < sizeof(result)) - return PI_ERROR_INVALID_VALUE; - std::memcpy(paramValue, &result, sizeof(result)); - return PI_SUCCESS; - } - case PI_DEVICE_INFO_IMAGE_SRGB: { - bool result = true; - if (paramValueSize < sizeof(result)) - return PI_ERROR_INVALID_VALUE; - std::memcpy(paramValue, &result, sizeof(result)); - return PI_SUCCESS; - } - case PI_DEVICE_INFO_BUILD_ON_SUBDEVICE: { - cl_device_type devType = CL_DEVICE_TYPE_DEFAULT; - cl_int res = clGetDeviceInfo(cast(device), CL_DEVICE_TYPE, - sizeof(cl_device_type), &devType, nullptr); - - // FIXME: here we assume that program built for a root GPU device can be - // used on its sub-devices without re-building - bool result = (res == CL_SUCCESS) && (devType == CL_DEVICE_TYPE_GPU); - if (paramValueSize < sizeof(result)) - return PI_ERROR_INVALID_VALUE; - std::memcpy(paramValue, &result, sizeof(result)); - return PI_SUCCESS; - } - case PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D: - // Returns the maximum sizes of a work group for each dimension one - // could use to submit a kernel. There is no such query defined in OpenCL - // so we'll return the maximum value. - { - if (paramValueSizeRet) - *paramValueSizeRet = paramValueSize; - static constexpr size_t Max = (std::numeric_limits::max)(); - size_t *out = cast(paramValue); - if (paramValueSize >= sizeof(size_t)) - out[0] = Max; - if (paramValueSize >= 2 * sizeof(size_t)) - out[1] = Max; - if (paramValueSize >= 3 * sizeof(size_t)) - out[2] = Max; - return PI_SUCCESS; - } - case PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: { - pi_int32 result = 1; - std::memcpy(paramValue, &result, sizeof(pi_int32)); - return PI_SUCCESS; - } - case PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS: { - // Corresponding OpenCL query is only available starting with OpenCL 2.1 and - // we have to emulate it on older OpenCL runtimes. - OCLV::OpenCLVersion version; - cl_int err = getDeviceVersion(cast(device), version); - if (err != CL_SUCCESS) - return static_cast(err); - - if (version >= OCLV::V2_1) { - err = clGetDeviceInfo(cast(device), - cast(paramName), paramValueSize, - paramValue, paramValueSizeRet); - if (err != CL_SUCCESS) - return static_cast(err); - - if (paramValue && *static_cast(paramValue) == 0u) { - // OpenCL returns 0 if sub-groups are not supported, but SYCL 2020 spec - // says that minimum possible value is 1. - cl_uint value = 1u; - std::memcpy(paramValue, &value, sizeof(cl_uint)); - } - - return static_cast(err); - } - - // Otherwise, we can't query anything, because even cl_khr_subgroups does - // not provide similar query. Therefore, simply return minimum possible - // value 1 here. - if (paramValue && paramValueSize < sizeof(cl_uint)) - return static_cast(CL_INVALID_VALUE); - if (paramValueSizeRet) - *paramValueSizeRet = sizeof(cl_uint); - - if (paramValue) { - cl_uint value = 1u; - std::memcpy(paramValue, &value, sizeof(cl_uint)); - } - - return static_cast(CL_SUCCESS); - } - case PI_DEVICE_INFO_BACKEND_VERSION: { - // TODO: return some meaningful for backend_version below - const char *value = ""; - size_t valueSize = (strlen(value) + 1) * sizeof(char); - if (paramValue) - std::memcpy(paramValue, value, valueSize); - if (paramValueSizeRet != nullptr) - *paramValueSizeRet = valueSize; - return PI_SUCCESS; - } - case PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT: { - cl_int ret_err = CL_SUCCESS; - bool result = false; - if (paramValueSize < sizeof(result)) - return PI_ERROR_INVALID_VALUE; - bool supported = false; - - ret_err = - checkDeviceExtensions(cast(device), - {"cl_intel_mem_channel_property"}, supported); - if (ret_err != CL_SUCCESS) - return static_cast(ret_err); - - result = supported; - std::memcpy(paramValue, &result, sizeof(result)); - return PI_SUCCESS; - } - default: - cl_int result = clGetDeviceInfo( - cast(device), cast(paramName), - paramValueSize, paramValue, paramValueSizeRet); - return static_cast(result); - } -} - -pi_result piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, - pi_uint32 *num_platforms) { - cl_int result = clGetPlatformIDs(cast(num_entries), - cast(platforms), - cast(num_platforms)); - - // Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms - if (result == CL_PLATFORM_NOT_FOUND_KHR) { - assert(num_platforms != 0); - *num_platforms = 0; - result = PI_SUCCESS; - } - return static_cast(result); -} - -pi_result piPlatformGetInfo(pi_platform platform, pi_platform_info paramName, - size_t paramValueSize, void *paramValue, - size_t *paramValueSizeRet) { - - switch (paramName) { - case PI_EXT_PLATFORM_INFO_BACKEND: { - pi_platform_backend result = PI_EXT_PLATFORM_BACKEND_OPENCL; - if (paramValue) { - if (paramValueSize < sizeof(result)) - return PI_ERROR_INVALID_VALUE; - std::memcpy(paramValue, &result, sizeof(result)); - } - if (paramValueSizeRet) - *paramValueSizeRet = sizeof(result); - return PI_SUCCESS; - } - default: { - cl_int result = clGetPlatformInfo( - cast(platform), cast(paramName), - paramValueSize, paramValue, paramValueSizeRet); - return static_cast(result); - } - } - return PI_SUCCESS; -} - -pi_result piextPlatformCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_platform *platform) { - assert(platform); - assert(nativeHandle); - *platform = reinterpret_cast(nativeHandle); - return PI_SUCCESS; -} - -pi_result piDevicesGet(pi_platform platform, pi_device_type device_type, - pi_uint32 num_entries, pi_device *devices, - pi_uint32 *num_devices) { - cl_int result = clGetDeviceIDs( - cast(platform), cast(device_type), - cast(num_entries), cast(devices), - cast(num_devices)); - - // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in num_devices - if (result == CL_DEVICE_NOT_FOUND) { - assert(num_devices != 0); - *num_devices = 0; - result = PI_SUCCESS; - } - return cast(result); -} - pi_result piextDeviceSelectBinary(pi_device device, pi_device_binary *images, pi_uint32 num_images, pi_uint32 *selected_image_ind) { @@ -884,13 +406,6 @@ pi_result piextDeviceSelectBinary(pi_device device, pi_device_binary *images, return PI_ERROR_INVALID_BINARY; } -pi_result piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_platform, pi_device *piDevice) { - assert(piDevice != nullptr); - *piDevice = reinterpret_cast(nativeHandle); - return PI_SUCCESS; -} - pi_result piextQueueCreate(pi_context Context, pi_device Device, pi_queue_properties *Properties, pi_queue *Queue) { assert(Properties); @@ -1231,65 +746,6 @@ pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program, return pi_ret_err; } -pi_result piContextCreate(const pi_context_properties *properties, - pi_uint32 num_devices, const pi_device *devices, - void (*pfn_notify)(const char *errinfo, - const void *private_info, - size_t cb, void *user_data1), - void *user_data, pi_context *retcontext) { - pi_result ret = PI_ERROR_INVALID_OPERATION; - *retcontext = cast( - clCreateContext(properties, cast(num_devices), - cast(devices), pfn_notify, - user_data, cast(&ret))); - - return ret; -} - -pi_result piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_uint32 num_devices, - const pi_device *devices, - bool ownNativeHandle, - pi_context *piContext) { - (void)num_devices; - (void)devices; - (void)ownNativeHandle; - assert(piContext != nullptr); - assert(ownNativeHandle == false); - *piContext = reinterpret_cast(nativeHandle); - return PI_SUCCESS; -} - -pi_result piContextGetInfo(pi_context context, pi_context_info paramName, - size_t paramValueSize, void *paramValue, - size_t *paramValueSizeRet) { - switch (paramName) { - case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: - case PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT: - case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT: { - // 2D USM memops are not supported. - cl_bool result = false; - std::memcpy(paramValue, &result, sizeof(cl_bool)); - return PI_SUCCESS; - } - case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: - case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: - case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: - case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { - // These queries should be dealt with in context_impl.cpp by calling the - // queries of each device separately and building the intersection set. - setErrorMessage("These queries should have never come here.", - PI_ERROR_INVALID_ARG_VALUE); - return PI_ERROR_PLUGIN_SPECIFIC_ERROR; - } - default: - cl_int result = clGetContextInfo( - cast(context), cast(paramName), - paramValueSize, paramValue, paramValueSizeRet); - return static_cast(result); - } -} - pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem, const pi_mem_properties *properties) { @@ -1446,14 +902,14 @@ pi_result piKernelGetSubGroupInfo(pi_kernel kernel, pi_device device, // dimention to avoid truncation of max sub-group size. pi_uint32 max_dims = 0; pi_result pi_ret_err = - piDeviceGetInfo(device, PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS, - sizeof(pi_uint32), &max_dims, nullptr); + pi2ur::piDeviceGetInfo(device, PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS, + sizeof(pi_uint32), &max_dims, nullptr); if (pi_ret_err != PI_SUCCESS) return pi_ret_err; std::shared_ptr WGSizes{new size_t[max_dims]}; - pi_ret_err = - piDeviceGetInfo(device, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, - max_dims * sizeof(size_t), WGSizes.get(), nullptr); + pi_ret_err = pi2ur::piDeviceGetInfo( + device, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, max_dims * sizeof(size_t), + WGSizes.get(), nullptr); if (pi_ret_err != PI_SUCCESS) return pi_ret_err; for (size_t i = 1; i < max_dims; ++i) @@ -2249,21 +1705,6 @@ static pi_result piextGetNativeHandle(void *piObj, return PI_SUCCESS; } -pi_result piextPlatformGetNativeHandle(pi_platform platform, - pi_native_handle *nativeHandle) { - return piextGetNativeHandle(platform, nativeHandle); -} - -pi_result piextDeviceGetNativeHandle(pi_device device, - pi_native_handle *nativeHandle) { - return piextGetNativeHandle(device, nativeHandle); -} - -pi_result piextContextGetNativeHandle(pi_context context, - pi_native_handle *nativeHandle) { - return piextGetNativeHandle(context, nativeHandle); -} - pi_result piextQueueGetNativeHandle(pi_queue queue, pi_native_handle *nativeHandle, int32_t *nativeHandleDesc) { @@ -2544,7 +1985,7 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, if (platVer < OCLV::V2_1 || devVer < OCLV::V2_1) { setErrorMessage( "OpenCL version for device and/or platform is less than 2.1", - PI_ERROR_INVALID_OPERATION); + UR_RESULT_ERROR_INVALID_OPERATION); return PI_ERROR_INVALID_OPERATION; } @@ -2594,28 +2035,30 @@ pi_result piPluginInit(pi_plugin *PluginInit) { (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&ocl_api); // Platform - _PI_CL(piPlatformsGet, piPlatformsGet) - _PI_CL(piPlatformGetInfo, piPlatformGetInfo) - _PI_CL(piextPlatformGetNativeHandle, piextPlatformGetNativeHandle) + _PI_CL(piPlatformsGet, pi2ur::piPlatformsGet) + _PI_CL(piPlatformGetInfo, pi2ur::piPlatformGetInfo) + _PI_CL(piextPlatformGetNativeHandle, pi2ur::piextPlatformGetNativeHandle) _PI_CL(piextPlatformCreateWithNativeHandle, - piextPlatformCreateWithNativeHandle) + pi2ur::piextPlatformCreateWithNativeHandle) // Device - _PI_CL(piDevicesGet, piDevicesGet) - _PI_CL(piDeviceGetInfo, piDeviceGetInfo) - _PI_CL(piDevicePartition, clCreateSubDevices) - _PI_CL(piDeviceRetain, clRetainDevice) - _PI_CL(piDeviceRelease, clReleaseDevice) + _PI_CL(piDevicesGet, pi2ur::piDevicesGet) + _PI_CL(piDeviceGetInfo, pi2ur::piDeviceGetInfo) + _PI_CL(piDevicePartition, pi2ur::piDevicePartition) + _PI_CL(piDeviceRetain, pi2ur::piDeviceRetain) + _PI_CL(piDeviceRelease, pi2ur::piDeviceRelease) _PI_CL(piextDeviceSelectBinary, piextDeviceSelectBinary) _PI_CL(piextGetDeviceFunctionPointer, piextGetDeviceFunctionPointer) - _PI_CL(piextDeviceGetNativeHandle, piextDeviceGetNativeHandle) - _PI_CL(piextDeviceCreateWithNativeHandle, piextDeviceCreateWithNativeHandle) + _PI_CL(piextDeviceGetNativeHandle, pi2ur::piextDeviceGetNativeHandle) + _PI_CL(piextDeviceCreateWithNativeHandle, + pi2ur::piextDeviceCreateWithNativeHandle) // Context - _PI_CL(piContextCreate, piContextCreate) - _PI_CL(piContextGetInfo, piContextGetInfo) - _PI_CL(piContextRetain, clRetainContext) - _PI_CL(piContextRelease, clReleaseContext) - _PI_CL(piextContextGetNativeHandle, piextContextGetNativeHandle) - _PI_CL(piextContextCreateWithNativeHandle, piextContextCreateWithNativeHandle) + _PI_CL(piContextCreate, pi2ur::piContextCreate) + _PI_CL(piContextGetInfo, pi2ur::piContextGetInfo) + _PI_CL(piContextRetain, pi2ur::piContextRetain) + _PI_CL(piContextRelease, pi2ur::piContextRelease) + _PI_CL(piextContextGetNativeHandle, pi2ur::piextContextGetNativeHandle) + _PI_CL(piextContextCreateWithNativeHandle, + pi2ur::piextContextCreateWithNativeHandle) // Queue _PI_CL(piQueueCreate, piQueueCreate) _PI_CL(piextQueueCreate, piextQueueCreate) @@ -2738,7 +2181,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextKernelSetArgMemObj, piextKernelSetArgMemObj) _PI_CL(piextKernelSetArgSampler, piextKernelSetArgSampler) - _PI_CL(piPluginGetLastError, piPluginGetLastError) + _PI_CL(piPluginGetLastError, pi2ur::piPluginGetLastError) _PI_CL(piTearDown, piTearDown) _PI_CL(piGetDeviceAndHostTimer, piGetDeviceAndHostTimer) _PI_CL(piPluginGetBackendOption, piPluginGetBackendOption) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 9642105dc5e61..4c418127c8312 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -1393,9 +1393,7 @@ inline pi_result piextContextCreateWithNativeHandle( pi_native_handle NativeHandle, pi_uint32 NumDevices, const pi_device *Devices, bool OwnNativeHandle, pi_context *RetContext) { PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); - PI_ASSERT(Devices, PI_ERROR_INVALID_DEVICE); PI_ASSERT(RetContext, PI_ERROR_INVALID_VALUE); - PI_ASSERT(NumDevices, PI_ERROR_INVALID_VALUE); ur_native_handle_t NativeContext = reinterpret_cast(NativeHandle); @@ -1404,8 +1402,10 @@ inline pi_result piextContextCreateWithNativeHandle( ur_context_handle_t *UrContext = reinterpret_cast(RetContext); - ur_context_native_properties_t Properties{}; - Properties.isNativeHandleOwned = OwnNativeHandle; + ur_context_native_properties_t Properties{ + UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES, nullptr, + OwnNativeHandle}; + HANDLE_ERRORS(urContextCreateWithNativeHandle( NativeContext, NumDevices, UrDevices, &Properties, UrContext)); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp new file mode 100644 index 0000000000000..5c0c8f680cc4a --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp @@ -0,0 +1,44 @@ +//===--------- common.hpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" + +// Global variables for ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR +thread_local ur_result_t ErrorMessageCode = UR_RESULT_SUCCESS; +thread_local char ErrorMessage[MaxMessageSize]; + +// Utility function for setting a message and warning +[[maybe_unused]] void setErrorMessage(const char *message, + ur_result_t error_code) { + assert(strlen(message) <= MaxMessageSize); + strcpy(ErrorMessage, message); + ErrorMessageCode = error_code; +} + +// Returns plugin specific error and warning messages; common implementation +// that can be shared between adapters +ur_result_t urGetLastResult(ur_platform_handle_t, const char **ppMessage) { + *ppMessage = &ErrorMessage[0]; + return ErrorMessageCode; +} + +ur_result_t map_cl_error_to_ur(cl_int result) { + + switch (result) { + case CL_SUCCESS: + return UR_RESULT_SUCCESS; + case CL_OUT_OF_HOST_MEMORY: + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + case CL_INVALID_VALUE: + return UR_RESULT_ERROR_INVALID_VALUE; + case CL_INVALID_PLATFORM: + return UR_RESULT_ERROR_INVALID_PLATFORM; + default: + return UR_RESULT_ERROR_UNKNOWN; + } +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp new file mode 100644 index 0000000000000..397237eb20d10 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp @@ -0,0 +1,133 @@ +//===--------- common.hpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// +#pragma once + +#include +#include +#include +#include + +#define CL_RETURN_ON_FAILURE(clCall) \ + if (const cl_int cl_result = clCall != CL_SUCCESS) { \ + return map_cl_error_to_ur(cl_result); \ + } + +constexpr size_t MaxMessageSize = 256; +extern thread_local ur_result_t ErrorMessageCode; +extern thread_local char ErrorMessage[MaxMessageSize]; + +// Utility function for setting a message and warning +[[maybe_unused]] void setErrorMessage(const char *message, + ur_result_t error_code); + +namespace OCLV { +class OpenCLVersion { +protected: + unsigned int ocl_major; + unsigned int ocl_minor; + +public: + OpenCLVersion() : ocl_major(0), ocl_minor(0) {} + + OpenCLVersion(unsigned int ocl_major, unsigned int ocl_minor) + : ocl_major(ocl_major), ocl_minor(ocl_minor) { + if (!isValid()) { + ocl_major = ocl_minor = 0; + } + } + + OpenCLVersion(const char *version) : OpenCLVersion(std::string(version)) {} + + OpenCLVersion(const std::string &version) : ocl_major(0), ocl_minor(0) { + /* The OpenCL specification defines the full version string as + * 'OpenCL' for platforms and as + * 'OpenCL' for devices. + */ + std::regex rx("OpenCL ([0-9]+)\\.([0-9]+)"); + std::smatch match; + + if (std::regex_search(version, match, rx) && (match.size() == 3)) { + ocl_major = strtoul(match[1].str().c_str(), nullptr, 10); + ocl_minor = strtoul(match[2].str().c_str(), nullptr, 10); + + if (!isValid()) { + ocl_major = ocl_minor = 0; + } + } + } + + bool operator==(const OpenCLVersion &v) const { + return ocl_major == v.ocl_major && ocl_minor == v.ocl_minor; + } + + bool operator!=(const OpenCLVersion &v) const { return !(*this == v); } + + bool operator<(const OpenCLVersion &v) const { + if (ocl_major == v.ocl_major) + return ocl_minor < v.ocl_minor; + + return ocl_major < v.ocl_major; + } + + bool operator>(const OpenCLVersion &v) const { return v < *this; } + + bool operator<=(const OpenCLVersion &v) const { + return (*this < v) || (*this == v); + } + + bool operator>=(const OpenCLVersion &v) const { + return (*this > v) || (*this == v); + } + + bool isValid() const { + switch (ocl_major) { + case 0: + return false; + case 1: + case 2: + return ocl_minor <= 2; + case UINT_MAX: + return false; + default: + return ocl_minor != UINT_MAX; + } + } + + int getMajor() const { return ocl_major; } + int getMinor() const { return ocl_minor; } +}; + +inline const OpenCLVersion V1_0(1, 0); +inline const OpenCLVersion V1_1(1, 1); +inline const OpenCLVersion V1_2(1, 2); +inline const OpenCLVersion V2_0(2, 0); +inline const OpenCLVersion V2_1(2, 1); +inline const OpenCLVersion V2_2(2, 2); +inline const OpenCLVersion V3_0(3, 0); + +} // namespace OCLV + +namespace cl { +template To cast(From value) { + + if constexpr (std::is_pointer_v) { + static_assert(std::is_pointer_v == std::is_pointer_v, + "Cast failed pointer check"); + return reinterpret_cast(value); + } else { + static_assert(sizeof(From) == sizeof(To), "Cast failed size check"); + static_assert(std::is_signed_v == std::is_signed_v, + "Cast failed sign check"); + return static_cast(value); + } +} +} // namespace cl + +ur_result_t map_cl_error_to_ur(cl_int result); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp new file mode 100644 index 0000000000000..e45d1991bbd66 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp @@ -0,0 +1,121 @@ +//===--------- context.hpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" + +#include + +UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( + uint32_t DeviceCount, const ur_device_handle_t *phDevices, + const ur_context_properties_t *, ur_context_handle_t *phContext) { + + UR_ASSERT(phDevices, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(phContext, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + cl_int ret; + *phContext = cl::cast( + clCreateContext(nullptr, cl::cast(DeviceCount), + cl::cast(phDevices), nullptr, + nullptr, cl::cast(&ret))); + + return map_cl_error_to_ur(ret); +} + +cl_int map_ur_context_info_to_cl(ur_context_info_t urPropName) { + + cl_int cl_propName; + switch (urPropName) { + case UR_CONTEXT_INFO_NUM_DEVICES: + cl_propName = CL_CONTEXT_NUM_DEVICES; + break; + case UR_CONTEXT_INFO_DEVICES: + cl_propName = CL_CONTEXT_DEVICES; + break; + case UR_CONTEXT_INFO_REFERENCE_COUNT: + cl_propName = CL_CONTEXT_REFERENCE_COUNT; + break; + default: + cl_propName = -1; + } + + return cl_propName; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, + size_t propSize, void *pPropValue, size_t *pPropSizeRet) { + + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + const cl_int cl_propName = map_ur_context_info_to_cl(propName); + + switch (static_cast(propName)) { + /* 2D USM memops are not supported. */ + case UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: + case UR_CONTEXT_INFO_USM_FILL2D_SUPPORT: { + return ReturnValue(false); + } + case UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + /* These queries should be dealt with in context_impl.cpp by calling the + * queries of each device separately and building the intersection set. */ + setErrorMessage("These queries should have never come here.", + UR_RESULT_ERROR_INVALID_ARGUMENT); + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + case UR_CONTEXT_INFO_NUM_DEVICES: + case UR_CONTEXT_INFO_DEVICES: + case UR_CONTEXT_INFO_REFERENCE_COUNT: { + + CL_RETURN_ON_FAILURE(clGetContextInfo(cl::cast(hContext), + cl_propName, propSize, pPropValue, + pPropSizeRet)); + return UR_RESULT_SUCCESS; + } + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL +urContextRelease(ur_context_handle_t hContext) { + + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + cl_int ret = clReleaseContext(cl::cast(hContext)); + return map_cl_error_to_ur(ret); +} + +UR_APIEXPORT ur_result_t UR_APICALL +urContextRetain(ur_context_handle_t hContext) { + + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + cl_int ret = clRetainContext(cl::cast(hContext)); + return map_cl_error_to_ur(ret); +} + +UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( + ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { + + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(phNativeContext, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + *phNativeContext = reinterpret_cast(hContext); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( + ur_native_handle_t hNativeContext, uint32_t, const ur_device_handle_t *, + const ur_context_native_properties_t *, ur_context_handle_t *phContext) { + + UR_ASSERT(hNativeContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + *phContext = reinterpret_cast(hNativeContext); + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp new file mode 100644 index 0000000000000..775a023bded54 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp @@ -0,0 +1,1018 @@ +//===--------- device.hpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "device.hpp" +#include "common.hpp" + +#include +#include + +cl_int getDeviceVersion(cl_device_id dev, OCLV::OpenCLVersion &version) { + cl_int ret_err = CL_INVALID_VALUE; + + size_t devVerSize = 0; + ret_err = clGetDeviceInfo(dev, CL_DEVICE_VERSION, 0, nullptr, &devVerSize); + + if (ret_err != CL_SUCCESS) { + return ret_err; + } + + std::string devVer(devVerSize, '\0'); + ret_err = clGetDeviceInfo(dev, CL_DEVICE_VERSION, devVerSize, devVer.data(), + nullptr); + + if (ret_err != CL_SUCCESS) { + return ret_err; + } + + version = OCLV::OpenCLVersion(devVer); + if (!version.isValid()) { + return CL_INVALID_DEVICE; + } + + return ret_err; +} + +cl_int checkDeviceExtensions(cl_device_id dev, + const std::vector &exts, + bool &supported) { + cl_int ret_err = CL_INVALID_VALUE; + + size_t extSize = 0; + ret_err = clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &extSize); + + if (ret_err != CL_SUCCESS) { + return ret_err; + } + + std::string extStr(extSize, '\0'); + ret_err = clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, extSize, extStr.data(), + nullptr); + + if (ret_err != CL_SUCCESS) { + return ret_err; + } + + supported = true; + for (const std::string &ext : exts) { + if (!(supported = (extStr.find(ext) != std::string::npos))) { + break; + } + } + + return ret_err; +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, + ur_device_type_t DeviceType, + uint32_t NumEntries, + ur_device_handle_t *phDevices, + uint32_t *pNumDevices) { + + cl_device_type type; + switch (DeviceType) { + case UR_DEVICE_TYPE_ALL: + type = CL_DEVICE_TYPE_ALL; + break; + case UR_DEVICE_TYPE_GPU: + type = CL_DEVICE_TYPE_GPU; + break; + case UR_DEVICE_TYPE_CPU: + type = CL_DEVICE_TYPE_CPU; + break; + case UR_DEVICE_TYPE_FPGA: + case UR_DEVICE_TYPE_MCA: + case UR_DEVICE_TYPE_VPU: + type = CL_DEVICE_TYPE_ACCELERATOR; + break; + case UR_DEVICE_TYPE_DEFAULT: + type = UR_DEVICE_TYPE_DEFAULT; + break; + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + cl_int result = clGetDeviceIDs( + cl::cast(hPlatform), type, cl::cast(NumEntries), + cl::cast(phDevices), cl::cast(pNumDevices)); + + // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in num_devices + if (result == CL_DEVICE_NOT_FOUND) { + result = CL_SUCCESS; + if (pNumDevices) { + *pNumDevices = 0; + } + } + + return map_cl_error_to_ur(result); +} + +ur_device_fp_capability_flags_t +map_ur_cl_device_fp_config_to_ur(cl_device_fp_config cl_value) { + + ur_device_fp_capability_flags_t ur_value = 0; + if (cl_value & CL_FP_DENORM) { + ur_value |= UR_DEVICE_FP_CAPABILITY_FLAG_DENORM; + } + if (cl_value & CL_FP_INF_NAN) { + ur_value |= UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN; + } + if (cl_value & CL_FP_ROUND_TO_NEAREST) { + ur_value |= UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST; + } + if (cl_value & CL_FP_ROUND_TO_ZERO) { + ur_value |= UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO; + } + if (cl_value & CL_FP_ROUND_TO_INF) { + ur_value |= UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF; + } + if (cl_value & CL_FP_FMA) { + ur_value |= UR_DEVICE_FP_CAPABILITY_FLAG_FMA; + } + if (cl_value & CL_FP_SOFT_FLOAT) { + ur_value |= UR_DEVICE_FP_CAPABILITY_FLAG_SOFT_FLOAT; + } + if (cl_value & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) { + ur_value |= UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; + } + + return ur_value; +} + +cl_int map_ur_device_info_to_cl(ur_device_info_t urPropName) { + + cl_int cl_propName; + switch (static_cast(urPropName)) { + case UR_DEVICE_INFO_TYPE: + cl_propName = CL_DEVICE_TYPE; + break; + case UR_DEVICE_INFO_PARENT_DEVICE: + cl_propName = CL_DEVICE_PARENT_DEVICE; + break; + case UR_DEVICE_INFO_PLATFORM: + cl_propName = CL_DEVICE_PLATFORM; + break; + case UR_DEVICE_INFO_VENDOR_ID: + cl_propName = CL_DEVICE_VENDOR_ID; + break; + case UR_DEVICE_INFO_EXTENSIONS: + cl_propName = CL_DEVICE_EXTENSIONS; + break; + case UR_DEVICE_INFO_NAME: + cl_propName = CL_DEVICE_NAME; + break; + case UR_DEVICE_INFO_COMPILER_AVAILABLE: + cl_propName = CL_DEVICE_COMPILER_AVAILABLE; + break; + case UR_DEVICE_INFO_LINKER_AVAILABLE: + cl_propName = CL_DEVICE_LINKER_AVAILABLE; + break; + case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: + cl_propName = CL_DEVICE_MAX_COMPUTE_UNITS; + break; + case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: + cl_propName = CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS; + break; + case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE: + cl_propName = CL_DEVICE_MAX_WORK_GROUP_SIZE; + break; + case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: + cl_propName = CL_DEVICE_MAX_WORK_ITEM_SIZES; + break; + case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: + cl_propName = CL_DEVICE_MAX_CLOCK_FREQUENCY; + break; + case UR_DEVICE_INFO_ADDRESS_BITS: + cl_propName = CL_DEVICE_ADDRESS_BITS; + break; + case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: + cl_propName = CL_DEVICE_MAX_MEM_ALLOC_SIZE; + break; + case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: + cl_propName = CL_DEVICE_GLOBAL_MEM_SIZE; + break; + case UR_DEVICE_INFO_LOCAL_MEM_SIZE: + cl_propName = CL_DEVICE_LOCAL_MEM_SIZE; + break; + case UR_DEVICE_INFO_IMAGE_SUPPORTED: + cl_propName = CL_DEVICE_IMAGE_SUPPORT; + break; + case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: + cl_propName = CL_DEVICE_HOST_UNIFIED_MEMORY; + break; + case UR_DEVICE_INFO_AVAILABLE: + cl_propName = CL_DEVICE_AVAILABLE; + break; + case UR_DEVICE_INFO_VENDOR: + cl_propName = CL_DEVICE_VENDOR; + break; + case UR_DEVICE_INFO_DRIVER_VERSION: + cl_propName = CL_DRIVER_VERSION; + break; + case UR_DEVICE_INFO_VERSION: + cl_propName = CL_DEVICE_VERSION; + break; + case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: + cl_propName = CL_DEVICE_PARTITION_MAX_SUB_DEVICES; + break; + case UR_DEVICE_INFO_REFERENCE_COUNT: + cl_propName = CL_DEVICE_REFERENCE_COUNT; + break; + case UR_DEVICE_INFO_PARTITION_PROPERTIES: + cl_propName = CL_DEVICE_PARTITION_PROPERTIES; + break; + case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: + cl_propName = CL_DEVICE_PARTITION_AFFINITY_DOMAIN; + break; + case UR_DEVICE_INFO_PARTITION_TYPE: + cl_propName = CL_DEVICE_PARTITION_TYPE; + break; + case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: + cl_propName = CL_DEVICE_OPENCL_C_VERSION; + break; + case UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: + cl_propName = CL_DEVICE_PREFERRED_INTEROP_USER_SYNC; + break; + case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: + cl_propName = CL_DEVICE_PRINTF_BUFFER_SIZE; + break; + case UR_DEVICE_INFO_PROFILE: + cl_propName = CL_DEVICE_PROFILE; + break; + case UR_DEVICE_INFO_BUILT_IN_KERNELS: + cl_propName = CL_DEVICE_BUILT_IN_KERNELS; + break; + case UR_DEVICE_INFO_QUEUE_PROPERTIES: + cl_propName = CL_DEVICE_QUEUE_PROPERTIES; + break; + case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: + cl_propName = CL_DEVICE_QUEUE_ON_HOST_PROPERTIES; + break; + case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: + cl_propName = CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES; + break; + case UR_DEVICE_INFO_EXECUTION_CAPABILITIES: + cl_propName = CL_DEVICE_EXECUTION_CAPABILITIES; + break; + case UR_DEVICE_INFO_ENDIAN_LITTLE: + cl_propName = CL_DEVICE_ENDIAN_LITTLE; + break; + case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: + cl_propName = CL_DEVICE_ERROR_CORRECTION_SUPPORT; + break; + case UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: + cl_propName = CL_DEVICE_PROFILING_TIMER_RESOLUTION; + break; + case UR_DEVICE_INFO_LOCAL_MEM_TYPE: + cl_propName = CL_DEVICE_LOCAL_MEM_TYPE; + break; + case UR_DEVICE_INFO_MAX_CONSTANT_ARGS: + cl_propName = CL_DEVICE_MAX_CONSTANT_ARGS; + break; + case UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: + cl_propName = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE; + break; + case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: + cl_propName = CL_DEVICE_GLOBAL_MEM_CACHE_TYPE; + break; + case UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: + cl_propName = CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE; + break; + case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: + cl_propName = CL_DEVICE_GLOBAL_MEM_CACHE_SIZE; + break; + case UR_DEVICE_INFO_MAX_PARAMETER_SIZE: + cl_propName = CL_DEVICE_MAX_PARAMETER_SIZE; + break; + case UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: + cl_propName = CL_DEVICE_MEM_BASE_ADDR_ALIGN; + break; + case UR_DEVICE_INFO_MAX_SAMPLERS: + cl_propName = CL_DEVICE_MAX_SAMPLERS; + break; + case UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS: + cl_propName = CL_DEVICE_MAX_READ_IMAGE_ARGS; + break; + case UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: + cl_propName = CL_DEVICE_MAX_WRITE_IMAGE_ARGS; + break; + case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS: + cl_propName = CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS; + break; + case UR_DEVICE_INFO_SINGLE_FP_CONFIG: + cl_propName = CL_DEVICE_SINGLE_FP_CONFIG; + break; + case UR_DEVICE_INFO_HALF_FP_CONFIG: + cl_propName = CL_DEVICE_HALF_FP_CONFIG; + break; + case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: + cl_propName = CL_DEVICE_DOUBLE_FP_CONFIG; + break; + case UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH: + cl_propName = CL_DEVICE_IMAGE2D_MAX_WIDTH; + break; + case UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: + cl_propName = CL_DEVICE_IMAGE2D_MAX_HEIGHT; + break; + case UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH: + cl_propName = CL_DEVICE_IMAGE3D_MAX_WIDTH; + break; + case UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: + cl_propName = CL_DEVICE_IMAGE3D_MAX_HEIGHT; + break; + case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH: + cl_propName = CL_DEVICE_IMAGE3D_MAX_DEPTH; + break; + case UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: + cl_propName = CL_DEVICE_IMAGE_MAX_BUFFER_SIZE; + break; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: + cl_propName = CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR; + break; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: + cl_propName = CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR; + break; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT: + cl_propName = CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT; + break; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT: + cl_propName = CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT; + break; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT: + cl_propName = CL_DEVICE_NATIVE_VECTOR_WIDTH_INT; + break; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT: + cl_propName = CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT; + break; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG: + cl_propName = CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG; + break; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG: + cl_propName = CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG; + break; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT: + cl_propName = CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT; + break; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT: + cl_propName = CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT; + break; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: + cl_propName = CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE; + break; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: + cl_propName = CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE; + break; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: + cl_propName = CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF; + break; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: + cl_propName = CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF; + break; + case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: + cl_propName = CL_DEVICE_MAX_NUM_SUB_GROUPS; + break; + case UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: + cl_propName = CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS; + break; + case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: + cl_propName = CL_DEVICE_SUB_GROUP_SIZES_INTEL; + break; + case UR_DEVICE_INFO_IL_VERSION: + cl_propName = CL_DEVICE_IL_VERSION; + break; + case UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: + cl_propName = CL_DEVICE_IMAGE_MAX_ARRAY_SIZE; + break; + case UR_DEVICE_INFO_USM_HOST_SUPPORT: + cl_propName = CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL; + break; + case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: + cl_propName = CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL; + break; + case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: + cl_propName = CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL; + break; + case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: + cl_propName = CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL; + break; + case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: + cl_propName = CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL; + break; + default: + cl_propName = -1; + } + + return cl_propName; +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, + ur_device_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + + UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + const cl_device_info cl_propName = map_ur_device_info_to_cl(propName); + + /* TODO UR: Casting to uint32_t to silence warnings due to some values not + * being part of the enum. Can be removed once all UR_EXT enums are promoted + * to UR */ + switch (static_cast(propName)) { + case UR_DEVICE_INFO_TYPE: { + cl_device_type cl_type; + CL_RETURN_ON_FAILURE(clGetDeviceInfo(cl::cast(hDevice), + cl_propName, sizeof(cl_device_type), + &cl_type, nullptr)); + + /* TODO UR: If the device is an Accelerator (FPGA, VPU, etc.), there is not + * enough information in the OpenCL runtime to know exactly which type it + * is. Assuming FPGA for now */ + /* TODO UR: In OpenCL, a device can have multiple types (e.g. CPU and GPU). + * We are potentially losing information by returning only one type */ + ur_device_type_t ur_device_type = UR_DEVICE_TYPE_DEFAULT; + if (cl_type & CL_DEVICE_TYPE_CPU) { + ur_device_type = UR_DEVICE_TYPE_CPU; + } else if (cl_type & CL_DEVICE_TYPE_GPU) { + ur_device_type = UR_DEVICE_TYPE_GPU; + } else if (cl_type & CL_DEVICE_TYPE_ACCELERATOR) { + ur_device_type = UR_DEVICE_TYPE_FPGA; + } + + return ReturnValue(ur_device_type); + } + case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: { + OCLV::OpenCLVersion version; + CL_RETURN_ON_FAILURE( + getDeviceVersion(cl::cast(hDevice), version)); + + const std::string results = std::to_string(version.getMajor()) + "." + + std::to_string(version.getMinor()); + return ReturnValue(results.c_str(), results.size() + 1); + } + case UR_DEVICE_INFO_PARTITION_PROPERTIES: + case UR_DEVICE_INFO_PARTITION_TYPE: { + size_t cl_size; + CL_RETURN_ON_FAILURE(clGetDeviceInfo(cl::cast(hDevice), + cl_propName, 0, nullptr, &cl_size)); + const size_t n_properties = cl_size / sizeof(cl_device_partition_property); + + /* Special case for UR_DEVICE_INFO_PARTITION_TYPE because OpenCL + * implementation returns a size of 0 if the device is not a sub-device. + * But UR implementation expects a size of 1 element with a value of 0. */ + if (propName == UR_DEVICE_INFO_PARTITION_TYPE && cl_size == 0) { + return ReturnValue(static_cast(0)); + } + + auto cl_value = + reinterpret_cast(alloca(cl_size)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(cl::cast(hDevice), + cl_propName, cl_size, cl_value, + nullptr)); + + std::vector ur_value{}; + for (size_t i = 0; i < n_properties; ++i) { + if (cl_value[i] != CL_DEVICE_PARTITION_BY_NAMES_INTEL) { + ur_value.push_back( + static_cast(cl_value[i])); + } + } + + return ReturnValue(ur_value.data(), ur_value.size()); + } + case UR_DEVICE_INFO_MAX_WORK_GROUPS_3D: { + /* Returns the maximum sizes of a work group for each dimension one could + * use to submit a kernel. There is no such query defined in OpenCL. So + * we'll return the maximum value. */ + static constexpr uint32_t max_work_item_dimensions = 3u; + static constexpr size_t Max = (std::numeric_limits::max)(); + + struct { + size_t sizes[max_work_item_dimensions]; + } return_sizes; + + return_sizes.sizes[0] = Max; + return_sizes.sizes[1] = Max; + return_sizes.sizes[2] = Max; + return ReturnValue(return_sizes); + } + case UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: { + return ReturnValue(static_cast(1u)); + } + case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: { + /* Corresponding OpenCL query is only available starting with OpenCL 2.1 + * and we have to emulate it on older OpenCL runtimes. */ + OCLV::OpenCLVersion devVer; + CL_RETURN_ON_FAILURE( + getDeviceVersion(cl::cast(hDevice), devVer)); + + if (devVer >= OCLV::V2_1) { + cl_uint cl_value; + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl::cast(hDevice), CL_DEVICE_MAX_NUM_SUB_GROUPS, + sizeof(cl_uint), &cl_value, nullptr)); + + if (cl_value == 0u) { + /* OpenCL returns 0 if sub-groups are not supported, but SYCL 2020 + * spec says that minimum possible value is 1. */ + return ReturnValue(1u); + } else { + return ReturnValue(static_cast(cl_value)); + } + } else { + /* Otherwise, we can't query anything, because even cl_khr_subgroups + * does not provide similar query. Therefore, simply return minimum + * possible value 1 here. */ + return ReturnValue(1u); + } + } + case UR_DEVICE_INFO_SINGLE_FP_CONFIG: + case UR_DEVICE_INFO_HALF_FP_CONFIG: + case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: { + /* CL type: cl_device_fp_config + * UR type: ur_device_fp_capability_flags_t */ + if (propName == UR_DEVICE_INFO_HALF_FP_CONFIG) { + bool supported; + CL_RETURN_ON_FAILURE(checkDeviceExtensions( + cl::cast(hDevice), {"cl_khr_fp16"}, supported)); + + if (!supported) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + } + + cl_device_fp_config cl_value; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl::cast(hDevice), cl_propName, + sizeof(cl_device_fp_config), &cl_value, nullptr)); + + return ReturnValue(map_ur_cl_device_fp_config_to_ur(cl_value)); + } + + case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { + /* This query is missing before OpenCL 3.0. Check version and handle + * appropriately */ + OCLV::OpenCLVersion devVer; + CL_RETURN_ON_FAILURE( + getDeviceVersion(cl::cast(hDevice), devVer)); + + /* Minimum required capability to be returned. For OpenCL 1.2, this is all + * that is required */ + ur_memory_order_capability_flags_t ur_capabilities = + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED; + + if (devVer >= OCLV::V3_0) { + /* For OpenCL >=3.0, the query should be implemented */ + cl_device_atomic_capabilities cl_capabilities; + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl::cast(hDevice), CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr)); + + /* Mask operation to only consider atomic_memory_order* capabilities */ + const cl_int mask = CL_DEVICE_ATOMIC_ORDER_RELAXED | + CL_DEVICE_ATOMIC_ORDER_ACQ_REL | + CL_DEVICE_ATOMIC_ORDER_SEQ_CST; + cl_capabilities &= mask; + + /* The memory order capabilities are hierarchical, if one is implied, all + * preceding capabilities are implied as well. Especially in the case of + * ACQ_REL. */ + if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { + ur_capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; + } + if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) { + ur_capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE; + } + } else if (devVer >= OCLV::V2_0) { + /* For OpenCL 2.x, return all capabilities. + * (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_memory_consistency_model) + */ + ur_capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL | + UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; + } + /* cl_device_atomic_capabilities is uint64_t and + * ur_memory_order_capability_flags_t is uint32_t */ + return ReturnValue( + static_cast(ur_capabilities)); + } + case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { + /* Initialize result to minimum mandated capabilities according to + * SYCL2020 4.6.3.2. Because scopes are hierarchical, wider scopes support + * all narrower scopes. At a minimum, each device must support WORK_ITEM, + * SUB_GROUP and WORK_GROUP. + * (https://github.com/KhronosGroup/SYCL-Docs/pull/382) */ + ur_memory_scope_capability_flags_t ur_capabilities = + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; + + OCLV::OpenCLVersion devVer; + CL_RETURN_ON_FAILURE( + getDeviceVersion(cl::cast(hDevice), devVer)); + + cl_device_atomic_capabilities cl_capabilities; + if (devVer >= OCLV::V3_0) { + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl::cast(hDevice), CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr)); + + assert((cl_capabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && + "Violates minimum mandated guarantee"); + + /* Because scopes are hierarchical, wider scopes support all narrower + * scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and + * WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382). We + * already initialized to these minimum mandated capabilities. Just check + * wider scopes. */ + if (cl_capabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { + ur_capabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE; + } + + if (cl_capabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { + ur_capabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + } + } else { + /* This info is only available in OpenCL version >= 3.0. Just return + * minimum mandated capabilities for older versions. OpenCL 1.x minimum + * mandated capabilities are WORK_GROUP, we already initialized using it. + */ + if (devVer >= OCLV::V2_0) { + /* OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE | + * ALL_DEVICES */ + ur_capabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + } + } + + /* cl_device_atomic_capabilities is uint64_t and + * ur_memory_scope_capability_flags_t is uint32_t */ + return ReturnValue( + static_cast(ur_capabilities)); + } + case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { + /* Initialize result to minimum mandated capabilities according to + * SYCL2020 4.6.3.2 */ + ur_memory_order_capability_flags_t ur_capabilities = + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL; + + OCLV::OpenCLVersion devVer; + CL_RETURN_ON_FAILURE( + getDeviceVersion(cl::cast(hDevice), devVer)); + + cl_device_atomic_capabilities cl_capabilities; + if (devVer >= OCLV::V3_0) { + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl::cast(hDevice), CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr)); + + assert((cl_capabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED) && + "Violates minimum mandated guarantee"); + assert((cl_capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) && + "Violates minimum mandated guarantee"); + + /* We already initialized to minimum mandated capabilities. Just check + * stronger orders. */ + if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { + ur_capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; + } + } else { + /* This info is only available in OpenCL version >= 3.0. Just return + * minimum mandated capabilities for older versions. OpenCL 1.x minimum + * mandated capabilities are RELAXED | ACQ_REL, we already initialized + * using these. */ + if (devVer >= OCLV::V2_0) { + /* OpenCL 2.x minimum mandated capabilities are RELAXED | ACQ_REL | + * SEQ_CST */ + ur_capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; + } + } + + /* cl_device_atomic_capabilities is uint64_t and + * ur_memory_order_capability_flags_t is uint32_t */ + return ReturnValue( + static_cast(ur_capabilities)); + } + case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + /* Initialize result to minimum mandated capabilities according to + * SYCL2020 4.6.3.2. Because scopes are hierarchical, wider scopes support + * all narrower scopes. At a minimum, each device must support WORK_ITEM, + * SUB_GROUP and WORK_GROUP. + * (https://github.com/KhronosGroup/SYCL-Docs/pull/382) */ + ur_memory_scope_capability_flags_t ur_capabilities = + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; + + OCLV::OpenCLVersion devVer; + CL_RETURN_ON_FAILURE( + getDeviceVersion(cl::cast(hDevice), devVer)); + + cl_device_atomic_capabilities cl_capabilities; + if (devVer >= OCLV::V3_0) { + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl::cast(hDevice), CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr)); + + assert((cl_capabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && + "Violates minimum mandated guarantee"); + + /* Because scopes are hierarchical, wider scopes support all narrower + * scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and + * WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382). We + * already initialized to these minimum mandated capabilities. Just check + * wider scopes. */ + if (cl_capabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { + ur_capabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE; + } + + if (cl_capabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { + ur_capabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + } + } else { + /* This info is only available in OpenCL version >= 3.0. Just return + * minimum mandated capabilities for older versions. OpenCL 1.x minimum + * mandated capabilities are WORK_GROUP, we already initialized using it. + */ + if (devVer >= OCLV::V2_0) { + /* OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE | + * ALL_DEVICES */ + ur_capabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + } + } + + /* cl_device_atomic_capabilities is uint64_t and + * ur_memory_scope_capability_flags_t is uint32_t */ + return ReturnValue( + static_cast(ur_capabilities)); + } + + case UR_DEVICE_INFO_IMAGE_SRGB: { + return ReturnValue(true); + } + + case UR_DEVICE_INFO_BFLOAT16: { + return ReturnValue(false); + } + case UR_DEVICE_INFO_ATOMIC_64: { + bool supported = false; + CL_RETURN_ON_FAILURE(checkDeviceExtensions( + cl::cast(hDevice), + {"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics"}, + supported)); + + return ReturnValue(supported); + } + case UR_DEVICE_INFO_BUILD_ON_SUBDEVICE: { + + cl_device_type devType = CL_DEVICE_TYPE_DEFAULT; + CL_RETURN_ON_FAILURE(clGetDeviceInfo(cl::cast(hDevice), + CL_DEVICE_TYPE, sizeof(cl_device_type), + &devType, nullptr)); + + return ReturnValue(devType == CL_DEVICE_TYPE_GPU); + } + case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT: { + bool supported = false; + CL_RETURN_ON_FAILURE( + checkDeviceExtensions(cl::cast(hDevice), + {"cl_intel_mem_channel_property"}, supported)); + + return ReturnValue(supported); + } + case UR_DEVICE_INFO_QUEUE_PROPERTIES: + case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: + case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: + case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: + case UR_DEVICE_INFO_LOCAL_MEM_TYPE: + case UR_DEVICE_INFO_EXECUTION_CAPABILITIES: + case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: + case UR_DEVICE_INFO_USM_HOST_SUPPORT: + case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: + case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: + case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: + case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: { + /* CL type: cl_bitfield + * UR type: ur_flags_t (uint32_t) */ + + cl_bitfield cl_value; + CL_RETURN_ON_FAILURE(clGetDeviceInfo(cl::cast(hDevice), + cl_propName, sizeof(cl_bitfield), + &cl_value, nullptr)); + + /* We can just static_cast the output because OpenCL and UR bitfields + * map 1 to 1 for these properties. cl_bitfield is uint64_t and ur_flags_t + * types are uint32_t */ + return ReturnValue(static_cast(cl_value)); + } + case UR_DEVICE_INFO_IMAGE_SUPPORTED: + case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: + case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: + case UR_DEVICE_INFO_ENDIAN_LITTLE: + case UR_DEVICE_INFO_AVAILABLE: + case UR_DEVICE_INFO_COMPILER_AVAILABLE: + case UR_DEVICE_INFO_LINKER_AVAILABLE: + case UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: + case UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS: + case UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: { + /* CL type: cl_bool + * UR type: ur_bool_t */ + + cl_bool cl_value; + CL_RETURN_ON_FAILURE(clGetDeviceInfo(cl::cast(hDevice), + cl_propName, sizeof(cl_bool), + &cl_value, nullptr)); + + /* cl_bool is uint32_t and ur_bool_t is bool */ + return ReturnValue(static_cast(cl_value)); + } + case UR_DEVICE_INFO_VENDOR_ID: + case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: + case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: + case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: + case UR_DEVICE_INFO_ADDRESS_BITS: + case UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS: + case UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: + case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS: + case UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: + case UR_DEVICE_INFO_MAX_SAMPLERS: + case UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: + case UR_DEVICE_INFO_MAX_CONSTANT_ARGS: + case UR_DEVICE_INFO_REFERENCE_COUNT: + case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: + case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: + case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: + case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: + case UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: + case UR_DEVICE_INFO_LOCAL_MEM_SIZE: + case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE: + case UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH: + case UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: + case UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH: + case UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: + case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH: + case UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: + case UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: + case UR_DEVICE_INFO_MAX_PARAMETER_SIZE: + case UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: + case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: + case UR_DEVICE_INFO_PLATFORM: + case UR_DEVICE_INFO_PARENT_DEVICE: + case UR_DEVICE_INFO_IL_VERSION: + case UR_DEVICE_INFO_NAME: + case UR_DEVICE_INFO_VENDOR: + case UR_DEVICE_INFO_DRIVER_VERSION: + case UR_DEVICE_INFO_PROFILE: + case UR_DEVICE_INFO_VERSION: + case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: + case UR_DEVICE_INFO_EXTENSIONS: + case UR_DEVICE_INFO_BUILT_IN_KERNELS: + case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: + case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: { + /* We can just use the OpenCL outputs because the sizes of OpenCL types + * are the same as UR. + * | CL | UR | Size | + * | char[] | char[] | 8 | + * | cl_uint | uint32_t | 4 | + * | cl_ulong | uint64_t | 8 | + * | size_t | size_t | 8 | + * | cl_platform_id | ur_platform_handle_t | 8 | + * | ur_device_handle_t | cl_device_id | 8 | + */ + + CL_RETURN_ON_FAILURE(clGetDeviceInfo(cl::cast(hDevice), + cl_propName, propSize, pPropValue, + pPropSizeRet)); + + return UR_RESULT_SUCCESS; + } + /* TODO: Check regularly to see if support is enabled in OpenCL. Intel GPU + * EU device-specific information extensions. Some of the queries are + * enabled by cl_intel_device_attribute_query extension, but it's not yet in + * the Registry. */ + case UR_DEVICE_INFO_PCI_ADDRESS: + case UR_DEVICE_INFO_GPU_EU_COUNT: + case UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH: + case UR_DEVICE_INFO_GPU_EU_SLICES: + case UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: + case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: + case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU: + case UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH: + /* TODO: Check if device UUID extension is enabled in OpenCL. For details + * about Intel UUID extension, see + * sycl/doc/extensions/supported/sycl_ext_intel_device_info.md */ + case UR_DEVICE_INFO_UUID: + /* This enums have no equivalent in OpenCL */ + case UR_DEVICE_INFO_DEVICE_ID: + case UR_DEVICE_INFO_GLOBAL_MEM_FREE: + case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: + case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: + case UR_DEVICE_INFO_ASYNC_BARRIER: + case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED: { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + default: { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + } +} + +UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( + ur_device_handle_t hDevice, + const ur_device_partition_property_t *pProperties, uint32_t NumDevices, + ur_device_handle_t *phSubDevices, uint32_t *pNumDevicesRet) { + + UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pProperties, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + cl_uint cl_num_devices_ret; + CL_RETURN_ON_FAILURE(clCreateSubDevices( + cl::cast(hDevice), + cl::cast(pProperties), 0, nullptr, + &cl_num_devices_ret)); + + if (pNumDevicesRet) { + *pNumDevicesRet = cl_num_devices_ret; + } + + /*If NumDevices is less than the number of sub-devices available, then the + * function shall only retrieve that number of sub-devices. */ + if (phSubDevices) { + std::vector cl_sub_devices(cl_num_devices_ret); + CL_RETURN_ON_FAILURE(clCreateSubDevices( + cl::cast(hDevice), + cl::cast(pProperties), + cl_num_devices_ret, cl_sub_devices.data(), nullptr)); + + std::memcpy(phSubDevices, cl_sub_devices.data(), + sizeof(cl_device_id) * NumDevices); + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceRetain(ur_device_handle_t hDevice) { + + UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + cl_int result = clRetainDevice(cl::cast(hDevice)); + + return map_cl_error_to_ur(result); +} + +UR_APIEXPORT ur_result_t UR_APICALL +urDeviceRelease(ur_device_handle_t hDevice) { + + UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + cl_int result = clReleaseDevice(cl::cast(hDevice)); + + return map_cl_error_to_ur(result); +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( + ur_device_handle_t hDevice, ur_native_handle_t *phNativeDevice) { + + UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(phNativeDevice, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + *phNativeDevice = reinterpret_cast(hDevice); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( + ur_native_handle_t hNativeDevice, ur_platform_handle_t, + ur_device_handle_t *phDevice) { + + UR_ASSERT(hNativeDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + *phDevice = reinterpret_cast(hNativeDevice); + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp new file mode 100644 index 0000000000000..596a608546429 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp @@ -0,0 +1,19 @@ +//===--------- device.hpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +#include +#include + +cl_int getDeviceVersion(cl_device_id dev, OCLV::OpenCLVersion &version); + +cl_int checkDeviceExtensions(cl_device_id dev, + const std::vector &exts, + bool &supported); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp new file mode 100644 index 0000000000000..a9d1c1e5d4294 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp @@ -0,0 +1,122 @@ +//===--------- platform.hpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" + +#include + +cl_int map_ur_platform_info_to_cl(ur_platform_info_t urPropName) { + + cl_int cl_propName; + switch (urPropName) { + case UR_PLATFORM_INFO_NAME: + cl_propName = CL_PLATFORM_NAME; + break; + case UR_PLATFORM_INFO_VENDOR_NAME: + cl_propName = CL_PLATFORM_VENDOR; + break; + case UR_PLATFORM_INFO_VERSION: + cl_propName = CL_PLATFORM_VERSION; + break; + case UR_PLATFORM_INFO_EXTENSIONS: + cl_propName = CL_PLATFORM_EXTENSIONS; + break; + case UR_PLATFORM_INFO_PROFILE: + cl_propName = CL_PLATFORM_PROFILE; + break; + default: + cl_propName = -1; + } + + return cl_propName; +} + +UR_DLLEXPORT ur_result_t UR_APICALL +urPlatformGetInfo(ur_platform_handle_t hPlatform, ur_platform_info_t propName, + size_t propSize, void *pPropValue, size_t *pSizeRet) { + + UR_ASSERT(hPlatform, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UrReturnHelper ReturnValue(propSize, pPropValue, pSizeRet); + const cl_int cl_propName = map_ur_platform_info_to_cl(propName); + + switch (static_cast(propName)) { + case UR_PLATFORM_INFO_BACKEND: + return ReturnValue(UR_PLATFORM_BACKEND_OPENCL); + case UR_PLATFORM_INFO_NAME: + case UR_PLATFORM_INFO_VENDOR_NAME: + case UR_PLATFORM_INFO_VERSION: + case UR_PLATFORM_INFO_EXTENSIONS: + case UR_PLATFORM_INFO_PROFILE: { + CL_RETURN_ON_FAILURE(clGetPlatformInfo(cl::cast(hPlatform), + cl_propName, propSize, pPropValue, + pSizeRet)); + return UR_RESULT_SUCCESS; + } + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } +} + +UR_DLLEXPORT ur_result_t UR_APICALL urPlatformGetApiVersion( + ur_platform_handle_t hPlatform, ur_api_version_t *pVersion) { + UR_ASSERT(hPlatform, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pVersion, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + *pVersion = UR_API_VERSION_CURRENT; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL +urPlatformGet(uint32_t NumEntries, ur_platform_handle_t *phPlatforms, + uint32_t *pNumPlatforms) { + + UR_ASSERT(phPlatforms || pNumPlatforms, UR_RESULT_ERROR_INVALID_VALUE); + UR_ASSERT(!phPlatforms || NumEntries > 0, UR_RESULT_ERROR_INVALID_SIZE); + + cl_int result = clGetPlatformIDs(cl::cast(NumEntries), + cl::cast(phPlatforms), + cl::cast(pNumPlatforms)); + + /* Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms */ + if (result == CL_PLATFORM_NOT_FOUND_KHR) { + result = CL_SUCCESS; + if (pNumPlatforms) { + *pNumPlatforms = 0; + } + } + + return map_cl_error_to_ur(result); +} + +UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( + ur_platform_handle_t hPlatform, ur_native_handle_t *phNativePlatform) { + + UR_ASSERT(hPlatform, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(phNativePlatform, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + *phNativePlatform = reinterpret_cast(hPlatform); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( + ur_native_handle_t hNativePlatform, ur_platform_handle_t *phPlatform) { + + UR_ASSERT(hNativePlatform, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + *phPlatform = reinterpret_cast(hNativePlatform); + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t) { + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urTearDown(void *pParams) { + UR_ASSERT(pParams, UR_RESULT_ERROR_INVALID_NULL_POINTER); + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp new file mode 100644 index 0000000000000..ef3f31e3a010e --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -0,0 +1,258 @@ +//===--------- ur_interface_loader.cpp - Unified Runtime ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include +#include + +namespace { + +// TODO - this is a duplicate of what is in the L0 plugin +// We should move this to somewhere common +ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + // Pre 1.0 we enforce loader and adapter must have same version. + // Post 1.0 only major version match should be required. + if (version != UR_API_VERSION_CURRENT) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + return UR_RESULT_SUCCESS; +} +} // namespace + +#if defined(__cplusplus) +extern "C" { +#endif + +UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( + ur_api_version_t version, ur_platform_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + pDdiTable->pfnCreateWithNativeHandle = urPlatformCreateWithNativeHandle; + pDdiTable->pfnGet = urPlatformGet; + pDdiTable->pfnGetApiVersion = urPlatformGetApiVersion; + pDdiTable->pfnGetInfo = urPlatformGetInfo; + pDdiTable->pfnGetNativeHandle = urPlatformGetNativeHandle; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( + ur_api_version_t version, ur_context_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + pDdiTable->pfnCreate = urContextCreate; + pDdiTable->pfnCreateWithNativeHandle = urContextCreateWithNativeHandle; + pDdiTable->pfnGetInfo = urContextGetInfo; + pDdiTable->pfnGetNativeHandle = urContextGetNativeHandle; + pDdiTable->pfnRelease = urContextRelease; + pDdiTable->pfnRetain = urContextRetain; +// pDdiTable->pfnSetExtendedDeleter = urContextSetExtendedDeleter; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( + ur_api_version_t version, ur_event_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } +// pDdiTable->pfnCreateWithNativeHandle = urEventCreateWithNativeHandle; +// pDdiTable->pfnGetInfo = urEventGetInfo; +// pDdiTable->pfnGetNativeHandle = urEventGetNativeHandle; +// pDdiTable->pfnGetProfilingInfo = urEventGetProfilingInfo; +// pDdiTable->pfnRelease = urEventRelease; +// pDdiTable->pfnRetain = urEventRetain; +// pDdiTable->pfnSetCallback = urEventSetCallback; +// pDdiTable->pfnWait = urEventWait; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( + ur_api_version_t version, ur_program_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } +// pDdiTable->pfnBuild = urProgramBuild; +// pDdiTable->pfnCompile = urProgramCompile; +// pDdiTable->pfnCreateWithBinary = urProgramCreateWithBinary; +// pDdiTable->pfnCreateWithIL = urProgramCreateWithIL; +// pDdiTable->pfnCreateWithNativeHandle = urProgramCreateWithNativeHandle; +// pDdiTable->pfnGetBuildInfo = urProgramGetBuildInfo; +// pDdiTable->pfnGetFunctionPointer = nullptr; +// pDdiTable->pfnGetInfo = urProgramGetInfo; +// pDdiTable->pfnGetNativeHandle = urProgramGetNativeHandle; +// pDdiTable->pfnLink = urProgramLink; +// pDdiTable->pfnRelease = urProgramRelease; +// pDdiTable->pfnRetain = urProgramRetain; +// pDdiTable->pfnSetSpecializationConstants = +// urProgramSetSpecializationConstants; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( + ur_api_version_t version, ur_kernel_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } +// pDdiTable->pfnCreate = urKernelCreate; +// pDdiTable->pfnCreateWithNativeHandle = urKernelCreateWithNativeHandle; +// pDdiTable->pfnGetGroupInfo = urKernelGetGroupInfo; +// pDdiTable->pfnGetInfo = urKernelGetInfo; +// pDdiTable->pfnGetNativeHandle = urKernelGetNativeHandle; +// pDdiTable->pfnGetSubGroupInfo = urKernelGetSubGroupInfo; +// pDdiTable->pfnRelease = urKernelRelease; +// pDdiTable->pfnRetain = urKernelRetain; +// pDdiTable->pfnSetArgLocal = nullptr; +// pDdiTable->pfnSetArgMemObj = nullptr; +// pDdiTable->pfnSetArgPointer = urKernelSetArgPointer; +// pDdiTable->pfnSetArgSampler = nullptr; +// pDdiTable->pfnSetArgValue = urKernelSetArgValue; +// pDdiTable->pfnSetExecInfo = urKernelSetExecInfo; +// pDdiTable->pfnSetSpecializationConstants = nullptr; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( + ur_api_version_t version, ur_sampler_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } +// pDdiTable->pfnCreate = urSamplerCreate; +// pDdiTable->pfnCreateWithNativeHandle = nullptr; +// pDdiTable->pfnGetInfo = urSamplerGetInfo; +// pDdiTable->pfnGetNativeHandle = nullptr; +// pDdiTable->pfnRelease = urSamplerRelease; +// pDdiTable->pfnRetain = urSamplerRetain; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL +urGetMemProcAddrTable(ur_api_version_t version, ur_mem_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } +// pDdiTable->pfnBufferCreate = nullptr; +// pDdiTable->pfnBufferPartition = nullptr; +// pDdiTable->pfnCreateWithNativeHandle = nullptr; +// pDdiTable->pfnGetInfo = nullptr; +// pDdiTable->pfnGetNativeHandle = nullptr; +// pDdiTable->pfnImageCreate = nullptr; +// pDdiTable->pfnImageGetInfo = nullptr; +// pDdiTable->pfnRelease = nullptr; +// pDdiTable->pfnRetain = nullptr; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( + ur_api_version_t version, ur_enqueue_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } +// pDdiTable->pfnDeviceGlobalVariableRead = nullptr; +// pDdiTable->pfnDeviceGlobalVariableWrite = nullptr; +// pDdiTable->pfnEventsWait = urEnqueueEventsWait; +// pDdiTable->pfnEventsWaitWithBarrier = urEnqueueEventsWaitWithBarrier; +// pDdiTable->pfnKernelLaunch = urEnqueueKernelLaunch; +// pDdiTable->pfnMemBufferCopy = nullptr; +// pDdiTable->pfnMemBufferCopyRect = nullptr; +// pDdiTable->pfnMemBufferFill = nullptr; +// pDdiTable->pfnMemBufferMap = nullptr; +// pDdiTable->pfnMemBufferRead = nullptr; +// pDdiTable->pfnMemBufferReadRect = nullptr; +// pDdiTable->pfnMemBufferWrite = nullptr; +// pDdiTable->pfnMemBufferWriteRect = nullptr; +// pDdiTable->pfnMemImageCopy = nullptr; +// pDdiTable->pfnMemImageRead = nullptr; +// pDdiTable->pfnMemImageWrite = nullptr; +// pDdiTable->pfnMemUnmap = nullptr; +// pDdiTable->pfnUSMFill2D = nullptr; +// pDdiTable->pfnUSMFill = nullptr; +// pDdiTable->pfnUSMAdvise = nullptr; +// pDdiTable->pfnUSMMemcpy2D = nullptr; +// pDdiTable->pfnUSMMemcpy = nullptr; +// pDdiTable->pfnUSMPrefetch = nullptr; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( + ur_api_version_t version, ur_global_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + pDdiTable->pfnGetLastResult = urGetLastResult; + pDdiTable->pfnInit = urInit; + pDdiTable->pfnTearDown = urTearDown; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( + ur_api_version_t version, ur_queue_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } +// pDdiTable->pfnCreate = urQueueCreate; +// pDdiTable->pfnCreateWithNativeHandle = urQueueCreateWithNativeHandle; +// pDdiTable->pfnFinish = urQueueFinish; +// pDdiTable->pfnFlush = urQueueFlush; +// pDdiTable->pfnGetInfo = urQueueGetInfo; +// pDdiTable->pfnGetNativeHandle = urQueueGetNativeHandle; +// pDdiTable->pfnRelease = urQueueRelease; +// pDdiTable->pfnRetain = urQueueRetain; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL +urGetUSMProcAddrTable(ur_api_version_t version, ur_usm_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } +// pDdiTable->pfnDeviceAlloc = nullptr; +// pDdiTable->pfnFree = nullptr; +// pDdiTable->pfnGetMemAllocInfo = nullptr; +// pDdiTable->pfnHostAlloc = nullptr; +// pDdiTable->pfnPoolCreate = nullptr; +// pDdiTable->pfnPoolDestroy = nullptr; +// pDdiTable->pfnPoolDestroy = nullptr; +// pDdiTable->pfnSharedAlloc = nullptr; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( + ur_api_version_t version, ur_device_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + pDdiTable->pfnCreateWithNativeHandle = urDeviceCreateWithNativeHandle; + pDdiTable->pfnGet = urDeviceGet; +// pDdiTable->pfnGetGlobalTimestamps = urDeviceGetGlobalTimestamps; + pDdiTable->pfnGetInfo = urDeviceGetInfo; + pDdiTable->pfnGetNativeHandle = urDeviceGetNativeHandle; + pDdiTable->pfnPartition = urDevicePartition; + pDdiTable->pfnRelease = urDeviceRelease; + pDdiTable->pfnRetain = urDeviceRetain; +// pDdiTable->pfnSelectBinary = nullptr; + return UR_RESULT_SUCCESS; +} + +#if defined(__cplusplus) +} // extern "C" +#endif From 2843f0a7f14107fe3a04e7af66ef9fb6fba33be4 Mon Sep 17 00:00:00 2001 From: Martin Morrison-Grant Date: Tue, 23 May 2023 13:44:34 +0000 Subject: [PATCH 02/36] [SYCL][OpenCL] Port Memory endpoints from PI to UR. --- sycl/plugins/opencl/CMakeLists.txt | 1 + sycl/plugins/unified_runtime/pi2ur.hpp | 5 +- .../ur/adapters/opencl/common.cpp | 29 ++- .../ur/adapters/opencl/common.hpp | 179 +++++++++++++++++- .../ur/adapters/opencl/context.cpp | 2 +- .../ur/adapters/opencl/memory.cpp | 152 +++++++++++++++ .../ur/adapters/opencl/platform.cpp | 11 ++ .../adapters/opencl/ur_interface_loader.cpp | 18 +- 8 files changed, 371 insertions(+), 26 deletions(-) create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp diff --git a/sycl/plugins/opencl/CMakeLists.txt b/sycl/plugins/opencl/CMakeLists.txt index 65c535354f8ee..926014699c9ce 100644 --- a/sycl/plugins/opencl/CMakeLists.txt +++ b/sycl/plugins/opencl/CMakeLists.txt @@ -25,6 +25,7 @@ add_sycl_plugin(opencl "../unified_runtime/ur/adapters/opencl/device.cpp" "../unified_runtime/ur/adapters/opencl/device.hpp" "../unified_runtime/ur/adapters/opencl/platform.cpp" + "../unified_runtime/ur/adapters/opencl/memory.cpp" # --- "${sycl_inc_dir}/sycl/detail/pi.h" "pi_opencl.cpp" diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 4c418127c8312..5f737b6ed6d5e 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -11,8 +11,10 @@ #include #include #include +#include #include + // Map of UR error codes to PI error codes static pi_result ur2piResult(ur_result_t urResult) { if (urResult == UR_RESULT_SUCCESS) @@ -1403,8 +1405,7 @@ inline pi_result piextContextCreateWithNativeHandle( reinterpret_cast(RetContext); ur_context_native_properties_t Properties{ - UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES, nullptr, - OwnNativeHandle}; + UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES, nullptr, OwnNativeHandle}; HANDLE_ERRORS(urContextCreateWithNativeHandle( NativeContext, NumDevices, UrDevices, &Properties, UrContext)); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp index 5c0c8f680cc4a..99c6a7d2c7322 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp @@ -8,27 +8,30 @@ #include "common.hpp" +#include + +namespace cl { // Global variables for ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR thread_local ur_result_t ErrorMessageCode = UR_RESULT_SUCCESS; -thread_local char ErrorMessage[MaxMessageSize]; +thread_local char ErrorMessage[cl::MaxMessageSize]; // Utility function for setting a message and warning [[maybe_unused]] void setErrorMessage(const char *message, ur_result_t error_code) { - assert(strlen(message) <= MaxMessageSize); - strcpy(ErrorMessage, message); + assert(strlen(message) <= cl::MaxMessageSize); + strcpy(cl::ErrorMessage, message); ErrorMessageCode = error_code; } +} // namespace cl // Returns plugin specific error and warning messages; common implementation // that can be shared between adapters ur_result_t urGetLastResult(ur_platform_handle_t, const char **ppMessage) { - *ppMessage = &ErrorMessage[0]; - return ErrorMessageCode; + *ppMessage = &cl::ErrorMessage[0]; + return cl::ErrorMessageCode; } ur_result_t map_cl_error_to_ur(cl_int result) { - switch (result) { case CL_SUCCESS: return UR_RESULT_SUCCESS; @@ -42,3 +45,17 @@ ur_result_t map_cl_error_to_ur(cl_int result) { return UR_RESULT_ERROR_UNKNOWN; } } + +/// Common API for getting the native handle of a UR object +/// +/// \param urObj is the UR object to get the native handle of +/// \param nativeHandle is a pointer to be set to the native handle +/// +/// PI_SUCCESS +ur_result_t urGetNativeHandle(void *urObj, ur_native_handle_t *nativeHandle) { + UR_ASSERT(!nativeHandle, UR_RESULT_ERROR_INVALID_NULL_POINTER) + *nativeHandle = reinterpret_cast(urObj); + return UR_RESULT_SUCCESS; +} + +cl_ext::ExtFuncPtrCacheT *ExtFuncPtrCache = new cl_ext::ExtFuncPtrCacheT(); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp index 397237eb20d10..b4eed72f928f9 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp @@ -12,19 +12,13 @@ #include #include +#include + #define CL_RETURN_ON_FAILURE(clCall) \ if (const cl_int cl_result = clCall != CL_SUCCESS) { \ return map_cl_error_to_ur(cl_result); \ } -constexpr size_t MaxMessageSize = 256; -extern thread_local ur_result_t ErrorMessageCode; -extern thread_local char ErrorMessage[MaxMessageSize]; - -// Utility function for setting a message and warning -[[maybe_unused]] void setErrorMessage(const char *message, - ur_result_t error_code); - namespace OCLV { class OpenCLVersion { protected: @@ -115,6 +109,14 @@ inline const OpenCLVersion V3_0(3, 0); } // namespace OCLV namespace cl { +constexpr size_t MaxMessageSize = 256; +extern thread_local ur_result_t ErrorMessageCode; +extern thread_local char ErrorMessage[MaxMessageSize]; + +// Utility function for setting a message and warning +[[maybe_unused]] void setErrorMessage(const char *message, + ur_result_t error_code); + template To cast(From value) { if constexpr (std::is_pointer_v) { @@ -131,3 +133,164 @@ template To cast(From value) { } // namespace cl ur_result_t map_cl_error_to_ur(cl_int result); + +ur_result_t urGetNativeHandle(void *urObj, ur_native_handle_t *nativeHandle); + +namespace cl_ext { +// Older versions of GCC don't like "const" here +#if defined(__GNUC__) && (__GNUC__ < 7 || (__GNU__C == 7 && __GNUC_MINOR__ < 2)) +#define CONSTFIX constexpr +#else +#define CONSTFIX const +#endif + +// Names of USM functions that are queried from OpenCL +CONSTFIX char clHostMemAllocName[] = "clHostMemAllocINTEL"; +CONSTFIX char clDeviceMemAllocName[] = "clDeviceMemAllocINTEL"; +CONSTFIX char clSharedMemAllocName[] = "clSharedMemAllocINTEL"; +CONSTFIX char clMemBlockingFreeName[] = "clMemBlockingFreeINTEL"; +CONSTFIX char clCreateBufferWithPropertiesName[] = + "clCreateBufferWithPropertiesINTEL"; +CONSTFIX char clSetKernelArgMemPointerName[] = "clSetKernelArgMemPointerINTEL"; +CONSTFIX char clEnqueueMemFillName[] = "clEnqueueMemFillINTEL"; +CONSTFIX char clEnqueueMemcpyName[] = "clEnqueueMemcpyINTEL"; +CONSTFIX char clGetMemAllocInfoName[] = "clGetMemAllocInfoINTEL"; +CONSTFIX char clSetProgramSpecializationConstantName[] = + "clSetProgramSpecializationConstant"; +CONSTFIX char clGetDeviceFunctionPointerName[] = + "clGetDeviceFunctionPointerINTEL"; +CONSTFIX char clEnqueueWriteGlobalVariableName[] = + "clEnqueueWriteGlobalVariableINTEL"; +CONSTFIX char clEnqueueReadGlobalVariableName[] = + "clEnqueueReadGlobalVariableINTEL"; +// Names of host pipe functions queried from OpenCL +CONSTFIX char clEnqueueReadHostPipeName[] = "clEnqueueReadHostPipeINTEL"; +CONSTFIX char clEnqueueWriteHostPipeName[] = "clEnqueueWriteHostPipeINTEL"; + +#undef CONSTFIX + +using clGetDeviceFunctionPointer_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_device_id device, cl_program program, + const char *FuncName, cl_ulong *ret_ptr); + +using clEnqueueWriteGlobalVariable_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool, + size_t, size_t, const void *, cl_uint, const cl_event *, + cl_event *); + +using clEnqueueReadGlobalVariable_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool, + size_t, size_t, void *, cl_uint, const cl_event *, + cl_event *); + +using clSetProgramSpecializationConstant_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_program program, cl_uint spec_id, size_t spec_size, + const void *spec_value); + +using clEnqueueReadHostPipeINTEL_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_queue queue, cl_program program, + const char *pipe_symbol, cl_bool blocking, void *ptr, + size_t size, cl_uint num_events_in_waitlist, + const cl_event *events_waitlist, cl_event *event); + +using clEnqueueWriteHostPipeINTEL_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_queue queue, cl_program program, + const char *pipe_symbol, cl_bool blocking, void *ptr, + size_t size, cl_uint num_events_in_waitlist, + const cl_event *events_waitlist, cl_event *event); + +template struct FuncPtrCache { + std::map Map; + std::mutex Mutex; +}; + +// FIXME: There's currently no mechanism for cleaning up this cache, meaning +// that it is invalidated whenever a context is destroyed. This could lead to +// reusing an invalid function pointer if another context happends to have the +// same native handle. +struct ExtFuncPtrCacheT { + FuncPtrCache clHostMemAllocINTELCache; + FuncPtrCache clDeviceMemAllocINTELCache; + FuncPtrCache clSharedMemAllocINTELCache; + FuncPtrCache clGetDeviceFunctionPointerCache; + FuncPtrCache + clCreateBufferWithPropertiesINTELCache; + FuncPtrCache clMemBlockingFreeINTELCache; + FuncPtrCache + clSetKernelArgMemPointerINTELCache; + FuncPtrCache clEnqueueMemFillINTELCache; + FuncPtrCache clEnqueueMemcpyINTELCache; + FuncPtrCache clGetMemAllocInfoINTELCache; + FuncPtrCache + clEnqueueWriteGlobalVariableCache; + FuncPtrCache clEnqueueReadGlobalVariableCache; + FuncPtrCache clEnqueueReadHostPipeINTELCache; + FuncPtrCache clEnqueueWriteHostPipeINTELCache; + FuncPtrCache + clSetProgramSpecializationConstantCache; +}; +// A raw pointer is used here since the lifetime of this map has to be tied to +// piTeardown to avoid issues with static destruction order (a user application +// might have static objects that indirectly access this cache in their +// destructor). +inline ExtFuncPtrCacheT *ExtFuncPtrCache; + +// USM helper function to get an extension function pointer +template +static ur_result_t getExtFuncFromContext(cl_context context, + FuncPtrCache &FPtrCache, + const char *FuncName, T *fptr) { + // TODO + // Potentially redo caching as UR interface changes. + // if cached, return cached FuncPtr + std::lock_guard CacheLock{FPtrCache.Mutex}; + std::map &FPtrMap = FPtrCache.Map; + auto It = FPtrMap.find(context); + if (It != FPtrMap.end()) { + auto F = It->second; + // if cached that extension is not available return nullptr and + // UR_RESULT_ERROR_INVALID_VALUE + *fptr = F; + return F ? UR_RESULT_SUCCESS : UR_RESULT_ERROR_INVALID_VALUE; + } + + cl_uint deviceCount; + cl_int ret_err = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, + sizeof(cl_uint), &deviceCount, nullptr); + + if (ret_err != CL_SUCCESS || deviceCount < 1) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + + std::vector devicesInCtx(deviceCount); + ret_err = clGetContextInfo(context, CL_CONTEXT_DEVICES, + deviceCount * sizeof(cl_device_id), + devicesInCtx.data(), nullptr); + + if (ret_err != CL_SUCCESS) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + + cl_platform_id curPlatform; + ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &curPlatform, nullptr); + + if (ret_err != CL_SUCCESS) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + + T FuncPtr = + (T)clGetExtensionFunctionAddressForPlatform(curPlatform, FuncName); + + if (!FuncPtr) { + // Cache that the extension is not available + FPtrMap[context] = nullptr; + return UR_RESULT_ERROR_INVALID_VALUE; + } + + *fptr = FuncPtr; + FPtrMap[context] = FuncPtr; + + return UR_RESULT_SUCCESS; +} +} // namespace cl_ext diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp index e45d1991bbd66..27dcd4cb821b0 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp @@ -66,7 +66,7 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { /* These queries should be dealt with in context_impl.cpp by calling the * queries of each device separately and building the intersection set. */ - setErrorMessage("These queries should have never come here.", + cl::setErrorMessage("These queries should have never come here.", UR_RESULT_ERROR_INVALID_ARGUMENT); return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp new file mode 100644 index 0000000000000..2b85a77f76e2b --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp @@ -0,0 +1,152 @@ +//===--------- memory.cpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" + +#include + +UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( + ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, + const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) { + cl_int ret_err = CL_INVALID_OPERATION; + if (pProperties) { + // TODO: need to check if all properties are supported by OpenCL RT and + // ignore unsupported + clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr; + cl_context CLContext = cl::cast(hContext); + // First we need to look up the function pointer + ret_err = + cl_ext::getExtFuncFromContext( + CLContext, + cl_ext::ExtFuncPtrCache->clCreateBufferWithPropertiesINTELCache, + cl_ext::clCreateBufferWithPropertiesName, &FuncPtr); + if (FuncPtr) { + std::vector propertiesIntel; + auto prop = static_cast(pProperties->pNext); + while (prop) { + switch (prop->stype) { + case UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES: { + auto bufferChannelProperty = + reinterpret_cast(prop); + propertiesIntel.push_back(CL_MEM_CHANNEL_INTEL); + propertiesIntel.push_back(bufferChannelProperty->channel); + } break; + case UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES: { + auto bufferLocationProperty = + reinterpret_cast(prop); + propertiesIntel.push_back(CL_MEM_ALLOC_FLAGS_INTEL); + propertiesIntel.push_back(bufferLocationProperty->location); + } break; + default: + break; + } + prop = static_cast(prop->pNext); + } + propertiesIntel.push_back(0); + + *phBuffer = reinterpret_cast(FuncPtr( + CLContext, propertiesIntel.data(), static_cast(flags), + size, pProperties->pHost, cl::cast(&ret_err))); + CL_RETURN_ON_FAILURE(ret_err); + } + } + + *phBuffer = reinterpret_cast(clCreateBuffer( + cl::cast(hContext), static_cast(flags), size, + pProperties->pHost, cl::cast(&ret_err))); + CL_RETURN_ON_FAILURE(ret_err); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( + ur_context_handle_t hContext, ur_mem_flags_t flags, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + void *pHost, ur_mem_handle_t *phMem) { + cl_int ret_err = CL_INVALID_OPERATION; + *phMem = reinterpret_cast(clCreateImage( + cl::cast(hContext), static_cast(flags), + cl::cast(pImageFormat), + cl::cast(pImageDesc), pHost, + cl::cast(&ret_err))); + CL_RETURN_ON_FAILURE(ret_err); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( + ur_mem_handle_t hBuffer, ur_mem_flags_t flags, + ur_buffer_create_type_t bufferCreateType, const ur_buffer_region_t *pRegion, + ur_mem_handle_t *phMem) { + cl_int ret_err = CL_INVALID_OPERATION; + *phMem = reinterpret_cast(clCreateSubBuffer( + cl::cast(hBuffer), static_cast(flags), + cl::cast(bufferCreateType), pRegion, + cl::cast(&ret_err))); + CL_RETURN_ON_FAILURE(ret_err); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urMemGetNativeHandle(ur_mem_handle_t hMem, ur_native_handle_t *phNativeMem) { + return urGetNativeHandle(hMem, phNativeMem); +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( + ur_native_handle_t hNativeMem, ur_context_handle_t hContext, + const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { + (void)hContext; + (void)pProperties; + UR_ASSERT(phMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); + *phMem = reinterpret_cast(hNativeMem); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( + ur_native_handle_t hNativeMem, ur_context_handle_t hContext, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { + (void)hContext; + (void)pImageFormat; + (void)pImageDesc; + (void)pProperties; + UR_ASSERT(phMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); + *phMem = reinterpret_cast(hNativeMem); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, + ur_mem_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl::cast(hMemory), propName, propSize, + pPropValue, pPropSizeRet)); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, + ur_image_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + CL_RETURN_ON_FAILURE(clGetImageInfo(cl::cast(hMemory), propName, propSize, + pPropValue, pPropSizeRet)); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { + CL_RETURN_ON_FAILURE(clRetainMemObject(cl::cast(hMem))); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { + CL_RETURN_ON_FAILURE(clReleaseMemObject(cl::cast(hMem))); + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp index a9d1c1e5d4294..1d740ea3d88a4 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp @@ -116,7 +116,18 @@ UR_DLLEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t) { return UR_RESULT_SUCCESS; } +// This API is called by Sycl RT to notify the end of the plugin lifetime. +// Windows: dynamically loaded plugins might have been unloaded already +// when this is called. Sycl RT holds onto the PI plugin so it can be +// called safely. But this is not transitive. If the PI plugin in turn +// dynamically loaded a different DLL, that may have been unloaded. +// TODO: add a global variable lifetime management code here (see +// pi_level_zero.cpp for reference). UR_DLLEXPORT ur_result_t UR_APICALL urTearDown(void *pParams) { UR_ASSERT(pParams, UR_RESULT_ERROR_INVALID_NULL_POINTER); + if (cl_ext::ExtFuncPtrCache) { + delete cl_ext::ExtFuncPtrCache; + cl_ext::ExtFuncPtrCache = nullptr; + } return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index ef3f31e3a010e..8ce33cb4146cc 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -145,15 +145,15 @@ urGetMemProcAddrTable(ur_api_version_t version, ur_mem_dditable_t *pDdiTable) { if (UR_RESULT_SUCCESS != result) { return result; } -// pDdiTable->pfnBufferCreate = nullptr; -// pDdiTable->pfnBufferPartition = nullptr; -// pDdiTable->pfnCreateWithNativeHandle = nullptr; -// pDdiTable->pfnGetInfo = nullptr; -// pDdiTable->pfnGetNativeHandle = nullptr; -// pDdiTable->pfnImageCreate = nullptr; -// pDdiTable->pfnImageGetInfo = nullptr; -// pDdiTable->pfnRelease = nullptr; -// pDdiTable->pfnRetain = nullptr; + pDdiTable->pfnBufferCreate = urMemBufferCreate; + pDdiTable->pfnBufferPartition = urMemBufferPartition; + pDdiTable->pfnBufferCreateWithNativeHandle = urMemBufferCreateWithNativeHandle; + pDdiTable->pfnGetInfo = urMemGetInfo; + pDdiTable->pfnGetNativeHandle = urMemGetNativeHandle; + pDdiTable->pfnImageCreate = urMemImageCreate; + pDdiTable->pfnImageGetInfo = urMemImageGetInfo; + pDdiTable->pfnRelease = urMemRelease; + pDdiTable->pfnRetain = urMemRetain; return UR_RESULT_SUCCESS; } From 6c4a6efb55e3c24147d7809767c0ff83c08a37d3 Mon Sep 17 00:00:00 2001 From: Martin Morrison-Grant Date: Tue, 30 May 2023 09:05:40 +0000 Subject: [PATCH 03/36] [SYCL][OpenCL] Fix failing e2e tests from memory endpoints. --- .../ur/adapters/opencl/common.cpp | 4 +- .../ur/adapters/opencl/memory.cpp | 286 +++++++++++++++++- .../ur/adapters/opencl/platform.cpp | 1 + .../adapters/opencl/ur_interface_loader.cpp | 1 + 4 files changed, 280 insertions(+), 12 deletions(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp index 99c6a7d2c7322..bf4be5c5c336c 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp @@ -53,9 +53,9 @@ ur_result_t map_cl_error_to_ur(cl_int result) { /// /// PI_SUCCESS ur_result_t urGetNativeHandle(void *urObj, ur_native_handle_t *nativeHandle) { - UR_ASSERT(!nativeHandle, UR_RESULT_ERROR_INVALID_NULL_POINTER) + UR_ASSERT(nativeHandle, UR_RESULT_ERROR_INVALID_NULL_POINTER) *nativeHandle = reinterpret_cast(urObj); return UR_RESULT_SUCCESS; } -cl_ext::ExtFuncPtrCacheT *ExtFuncPtrCache = new cl_ext::ExtFuncPtrCacheT(); +cl_ext::ExtFuncPtrCacheT *ExtFuncPtrCache; diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp index 2b85a77f76e2b..e056e2b8a8529 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp @@ -10,9 +10,235 @@ #include +cl_image_format map_ur_image_format_to_cl(const ur_image_format_t *pImageFormat) { + cl_image_format clImageFormat; + switch (pImageFormat->channelOrder) { + case UR_IMAGE_CHANNEL_ORDER_A: + clImageFormat.image_channel_order = CL_A; + break; + case UR_IMAGE_CHANNEL_ORDER_R: + clImageFormat.image_channel_order = CL_R; + break; + case UR_IMAGE_CHANNEL_ORDER_RG: + clImageFormat.image_channel_order = CL_RG; + break; + case UR_IMAGE_CHANNEL_ORDER_RA: + clImageFormat.image_channel_order = CL_RA; + break; + case UR_IMAGE_CHANNEL_ORDER_RGB: + clImageFormat.image_channel_order = CL_RGB; + break; + case UR_IMAGE_CHANNEL_ORDER_RGBA: + clImageFormat.image_channel_order = CL_RGBA; + break; + case UR_IMAGE_CHANNEL_ORDER_BGRA: + clImageFormat.image_channel_order = CL_BGRA; + break; + case UR_IMAGE_CHANNEL_ORDER_ARGB: + clImageFormat.image_channel_order = CL_ARGB; + break; + case UR_IMAGE_CHANNEL_ORDER_ABGR: + clImageFormat.image_channel_order = CL_ABGR; + break; + case UR_IMAGE_CHANNEL_ORDER_INTENSITY: + clImageFormat.image_channel_order = CL_INTENSITY; + break; + case UR_IMAGE_CHANNEL_ORDER_LUMINANCE: + clImageFormat.image_channel_order = CL_LUMINANCE; + break; + case UR_IMAGE_CHANNEL_ORDER_RX: + clImageFormat.image_channel_order = CL_Rx; + break; + case UR_IMAGE_CHANNEL_ORDER_RGX: + clImageFormat.image_channel_order = CL_RGx; + break; + case UR_IMAGE_CHANNEL_ORDER_RGBX: + clImageFormat.image_channel_order = CL_RGBx; + break; + case UR_IMAGE_CHANNEL_ORDER_SRGBA: + clImageFormat.image_channel_order = CL_sRGBA; + break; + default: + clImageFormat.image_channel_order = -1; + break; + } + + switch (pImageFormat->channelType) { + case UR_IMAGE_CHANNEL_TYPE_SNORM_INT8: + clImageFormat.image_channel_data_type = CL_SNORM_INT8; + break; + case UR_IMAGE_CHANNEL_TYPE_SNORM_INT16: + clImageFormat.image_channel_data_type = CL_SNORM_INT16; + break; + case UR_IMAGE_CHANNEL_TYPE_UNORM_INT8: + clImageFormat.image_channel_data_type = CL_UNORM_INT8; + break; + case UR_IMAGE_CHANNEL_TYPE_UNORM_INT16: + clImageFormat.image_channel_data_type = CL_UNORM_INT16; + break; + case UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565: + clImageFormat.image_channel_data_type = CL_UNORM_SHORT_565; + break; + case UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555: + clImageFormat.image_channel_data_type = CL_UNORM_SHORT_555; + break; + case UR_IMAGE_CHANNEL_TYPE_INT_101010: + clImageFormat.image_channel_data_type = CL_UNORM_INT_101010; + break; + case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8: + clImageFormat.image_channel_data_type = CL_SIGNED_INT8; + break; + case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16: + clImageFormat.image_channel_data_type = CL_SIGNED_INT16; + break; + case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32: + clImageFormat.image_channel_data_type = CL_SIGNED_INT32; + break; + case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8: + clImageFormat.image_channel_data_type = CL_UNSIGNED_INT8; + break; + case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16: + clImageFormat.image_channel_data_type = CL_UNSIGNED_INT16; + break; + case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32: + clImageFormat.image_channel_data_type = CL_UNSIGNED_INT32; + break; + case UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT: + clImageFormat.image_channel_data_type = CL_HALF_FLOAT; + break; + case UR_IMAGE_CHANNEL_TYPE_FLOAT: + clImageFormat.image_channel_data_type = CL_FLOAT; + break; + default: + clImageFormat.image_channel_data_type = -1; + break; + } + + return clImageFormat; +} + +cl_image_desc map_ur_image_desc_to_cl(const ur_image_desc_t *pImageDesc) { + cl_image_desc clImageDesc; + clImageDesc.image_type = cl::cast(pImageDesc->type); + + switch (pImageDesc->type) { + case UR_MEM_TYPE_BUFFER: + clImageDesc.image_type = CL_MEM_OBJECT_BUFFER; + break; + case UR_MEM_TYPE_IMAGE2D: + clImageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; + break; + case UR_MEM_TYPE_IMAGE3D: + clImageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; + break; + case UR_MEM_TYPE_IMAGE2D_ARRAY: + clImageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; + break; + case UR_MEM_TYPE_IMAGE1D: + clImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; + break; + case UR_MEM_TYPE_IMAGE1D_ARRAY: + clImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; + break; + case UR_MEM_TYPE_IMAGE1D_BUFFER: + clImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; + break; + default: + clImageDesc.image_type = -1; + break; + } + + clImageDesc.image_width = pImageDesc->width; + clImageDesc.image_height = pImageDesc->height; + clImageDesc.image_depth = pImageDesc->depth; + clImageDesc.image_array_size = pImageDesc->arraySize; + clImageDesc.image_row_pitch = pImageDesc->rowPitch; + clImageDesc.image_slice_pitch = pImageDesc->slicePitch; + clImageDesc.num_mip_levels = pImageDesc->numMipLevel; + clImageDesc.num_samples = pImageDesc->numSamples; + clImageDesc.buffer = nullptr; + clImageDesc.mem_object = nullptr; + + return clImageDesc; +} + +cl_int map_ur_mem_image_info_to_cl(ur_image_info_t urPropName) { + cl_int clPropName; + switch (urPropName) { + case UR_IMAGE_INFO_FORMAT: + clPropName = CL_IMAGE_FORMAT; + break; + case UR_IMAGE_INFO_ELEMENT_SIZE: + clPropName = CL_IMAGE_ELEMENT_SIZE; + break; + case UR_IMAGE_INFO_ROW_PITCH: + clPropName = CL_IMAGE_ROW_PITCH; + break; + case UR_IMAGE_INFO_SLICE_PITCH: + clPropName = CL_IMAGE_SLICE_PITCH; + break; + case UR_IMAGE_INFO_WIDTH: + clPropName = CL_IMAGE_WIDTH; + break; + case UR_IMAGE_INFO_HEIGHT: + clPropName = CL_IMAGE_HEIGHT; + break; + case UR_IMAGE_INFO_DEPTH: + clPropName = CL_IMAGE_DEPTH; + break; + default: + clPropName = -1; + } + + return clPropName; +} + +cl_int map_ur_mem_info_to_cl(ur_mem_info_t urPropName) { + cl_int clPropName; + switch (urPropName) { + case UR_MEM_INFO_SIZE: + clPropName = CL_MEM_SIZE; + break; + case UR_MEM_INFO_CONTEXT: + clPropName = CL_MEM_CONTEXT; + break; + default: + clPropName = -1; + } + + return clPropName; +} + +cl_map_flags convert_ur_mem_flags_to_cl(ur_mem_flags_t ur_flags) { + cl_map_flags cl_flags = 0; + if (ur_flags & UR_MEM_FLAG_READ_WRITE) { + cl_flags |= CL_MEM_READ_WRITE; + } + if (ur_flags & UR_MEM_FLAG_WRITE_ONLY) { + cl_flags |= CL_MEM_WRITE_ONLY; + } + if (ur_flags & UR_MEM_FLAG_READ_ONLY) { + cl_flags |= CL_MEM_READ_ONLY; + } + if (ur_flags & UR_MEM_FLAG_USE_HOST_POINTER) { + cl_flags |= CL_MEM_USE_HOST_PTR; + } + if (ur_flags & UR_MEM_FLAG_ALLOC_HOST_POINTER) { + cl_flags |= CL_MEM_ALLOC_HOST_PTR; + } + if (ur_flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) { + cl_flags |= CL_MEM_COPY_HOST_PTR; + } + + return cl_flags; +} + UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) { + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(phBuffer, UR_RESULT_ERROR_INVALID_NULL_POINTER); + cl_int ret_err = CL_INVALID_OPERATION; if (pProperties) { // TODO: need to check if all properties are supported by OpenCL RT and @@ -68,11 +294,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( ur_context_handle_t hContext, ur_mem_flags_t flags, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, void *pHost, ur_mem_handle_t *phMem) { + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(phMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); + cl_int ret_err = CL_INVALID_OPERATION; + + cl_image_format image_format = map_ur_image_format_to_cl(pImageFormat); + cl_image_desc image_desc = map_ur_image_desc_to_cl(pImageDesc); + cl_map_flags map_flags = convert_ur_mem_flags_to_cl(flags); + *phMem = reinterpret_cast(clCreateImage( - cl::cast(hContext), static_cast(flags), - cl::cast(pImageFormat), - cl::cast(pImageDesc), pHost, + cl::cast(hContext), + map_flags, &image_format, + &image_desc, pHost, cl::cast(&ret_err))); CL_RETURN_ON_FAILURE(ret_err); @@ -83,11 +317,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( ur_mem_handle_t hBuffer, ur_mem_flags_t flags, ur_buffer_create_type_t bufferCreateType, const ur_buffer_region_t *pRegion, ur_mem_handle_t *phMem) { + UR_ASSERT(hBuffer, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(phMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); + cl_int ret_err = CL_INVALID_OPERATION; + + cl_buffer_create_type buffer_create_type; + switch (bufferCreateType) { + case UR_BUFFER_CREATE_TYPE_REGION: + buffer_create_type = CL_BUFFER_CREATE_TYPE_REGION; + break; + default: + break; + } + + _cl_buffer_region buffer_region; + buffer_region.origin = pRegion->origin; + buffer_region.size = pRegion->size; + *phMem = reinterpret_cast(clCreateSubBuffer( cl::cast(hBuffer), static_cast(flags), - cl::cast(bufferCreateType), pRegion, - cl::cast(&ret_err))); + buffer_create_type, &buffer_region, cl::cast(&ret_err))); CL_RETURN_ON_FAILURE(ret_err); return UR_RESULT_SUCCESS; @@ -103,6 +353,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { (void)hContext; (void)pProperties; + UR_ASSERT(hNativeMem, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(phMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); *phMem = reinterpret_cast(hNativeMem); return UR_RESULT_SUCCESS; @@ -116,6 +368,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( (void)pImageFormat; (void)pImageDesc; (void)pProperties; + UR_ASSERT(hNativeMem, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(phMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); *phMem = reinterpret_cast(hNativeMem); return UR_RESULT_SUCCESS; @@ -126,8 +380,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl::cast(hMemory), propName, propSize, - pPropValue, pPropSizeRet)); + UR_ASSERT(hMemory, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + const cl_int clPropName = map_ur_mem_info_to_cl(propName); + + CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl::cast(hMemory), clPropName, + propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } @@ -135,18 +394,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, ur_image_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { - CL_RETURN_ON_FAILURE(clGetImageInfo(cl::cast(hMemory), propName, propSize, - pPropValue, pPropSizeRet)); + size_t *pPropSizeRet) { + UR_ASSERT(hMemory, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + const cl_int clPropName = map_ur_mem_image_info_to_cl(propName); + + CL_RETURN_ON_FAILURE(clGetImageInfo(cl::cast(hMemory), clPropName, + propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { + UR_ASSERT(hMem, UR_RESULT_ERROR_INVALID_NULL_HANDLE); CL_RETURN_ON_FAILURE(clRetainMemObject(cl::cast(hMem))); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { + UR_ASSERT(hMem, UR_RESULT_ERROR_INVALID_NULL_HANDLE); CL_RETURN_ON_FAILURE(clReleaseMemObject(cl::cast(hMem))); return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp index 1d740ea3d88a4..3de5d4887f4f2 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp @@ -113,6 +113,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( } UR_DLLEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t) { + cl_ext::ExtFuncPtrCache = new cl_ext::ExtFuncPtrCacheT(); return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index 8ce33cb4146cc..5c97654cc0471 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -148,6 +148,7 @@ urGetMemProcAddrTable(ur_api_version_t version, ur_mem_dditable_t *pDdiTable) { pDdiTable->pfnBufferCreate = urMemBufferCreate; pDdiTable->pfnBufferPartition = urMemBufferPartition; pDdiTable->pfnBufferCreateWithNativeHandle = urMemBufferCreateWithNativeHandle; + pDdiTable->pfnImageCreateWithNativeHandle = urMemImageCreateWithNativeHandle; pDdiTable->pfnGetInfo = urMemGetInfo; pDdiTable->pfnGetNativeHandle = urMemGetNativeHandle; pDdiTable->pfnImageCreate = urMemImageCreate; From 1d82dbaf3c94857229f1bd53ecfba9035c5550f4 Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Thu, 25 May 2023 13:37:27 +0100 Subject: [PATCH 04/36] [SYCL][PI][OpenCL][UR] Port Image and Sampler --- sycl/plugins/opencl/CMakeLists.txt | 1 + sycl/plugins/opencl/pi_opencl.cpp | 39 +--- sycl/plugins/unified_runtime/pi2ur.hpp | 2 - .../ur/adapters/opencl/common.cpp | 4 +- .../ur/adapters/opencl/common.hpp | 2 - .../ur/adapters/opencl/sampler.cpp | 209 ++++++++++++++++++ .../adapters/opencl/ur_interface_loader.cpp | 190 ++++++++-------- sycl/plugins/unified_runtime/ur/ur.hpp | 2 +- 8 files changed, 311 insertions(+), 138 deletions(-) create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp diff --git a/sycl/plugins/opencl/CMakeLists.txt b/sycl/plugins/opencl/CMakeLists.txt index 926014699c9ce..7ebd85a5ed66c 100644 --- a/sycl/plugins/opencl/CMakeLists.txt +++ b/sycl/plugins/opencl/CMakeLists.txt @@ -25,6 +25,7 @@ add_sycl_plugin(opencl "../unified_runtime/ur/adapters/opencl/device.cpp" "../unified_runtime/ur/adapters/opencl/device.hpp" "../unified_runtime/ur/adapters/opencl/platform.cpp" + "../unified_runtime/ur/adapters/opencl/sampler.cpp" "../unified_runtime/ur/adapters/opencl/memory.cpp" # --- "${sycl_inc_dir}/sycl/detail/pi.h" diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 8e261a324bc30..422a29bbe4765 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -601,37 +601,6 @@ pi_result piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle, return PI_SUCCESS; } -pi_result piSamplerCreate(pi_context context, - const pi_sampler_properties *sampler_properties, - pi_sampler *result_sampler) { - // Initialize properties according to OpenCL 2.1 spec. - pi_result error_code; - pi_bool normalizedCoords = PI_TRUE; - pi_sampler_addressing_mode addressingMode = PI_SAMPLER_ADDRESSING_MODE_CLAMP; - pi_sampler_filter_mode filterMode = PI_SAMPLER_FILTER_MODE_NEAREST; - - // Unpack sampler properties - for (std::size_t i = 0; sampler_properties && sampler_properties[i] != 0; - ++i) { - if (sampler_properties[i] == PI_SAMPLER_INFO_NORMALIZED_COORDS) { - normalizedCoords = static_cast(sampler_properties[++i]); - } else if (sampler_properties[i] == PI_SAMPLER_INFO_ADDRESSING_MODE) { - addressingMode = - static_cast(sampler_properties[++i]); - } else if (sampler_properties[i] == PI_SAMPLER_INFO_FILTER_MODE) { - filterMode = static_cast(sampler_properties[++i]); - } else { - assert(false && "Cannot recognize sampler property"); - } - } - - // Always call OpenCL 1.0 API - *result_sampler = cast( - clCreateSampler(cast(context), normalizedCoords, - addressingMode, filterMode, cast(&error_code))); - return error_code; -} - pi_result piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, const pi_mem_obj_property *arg_properties, const pi_mem *arg_value) { @@ -2117,10 +2086,10 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextEventGetNativeHandle, piextGetNativeHandle) _PI_CL(piextEventCreateWithNativeHandle, piextEventCreateWithNativeHandle) // Sampler - _PI_CL(piSamplerCreate, piSamplerCreate) - _PI_CL(piSamplerGetInfo, clGetSamplerInfo) - _PI_CL(piSamplerRetain, clRetainSampler) - _PI_CL(piSamplerRelease, clReleaseSampler) + _PI_CL(piSamplerCreate, pi2ur::piSamplerCreate) + _PI_CL(piSamplerGetInfo, pi2ur::piSamplerGetInfo) + _PI_CL(piSamplerRetain, pi2ur::piSamplerRetain) + _PI_CL(piSamplerRelease, pi2ur::piSamplerRelease) // Queue commands _PI_CL(piEnqueueKernelLaunch, clEnqueueNDRangeKernel) _PI_CL(piEnqueueEventsWait, clEnqueueMarkerWithWaitList) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 5f737b6ed6d5e..be14e342c9f88 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -11,10 +11,8 @@ #include #include #include -#include #include - // Map of UR error codes to PI error codes static pi_result ur2piResult(ur_result_t urResult) { if (urResult == UR_RESULT_SUCCESS) diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp index bf4be5c5c336c..faf4fcfe73df4 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp @@ -8,8 +8,6 @@ #include "common.hpp" -#include - namespace cl { // Global variables for ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR thread_local ur_result_t ErrorMessageCode = UR_RESULT_SUCCESS; @@ -51,7 +49,7 @@ ur_result_t map_cl_error_to_ur(cl_int result) { /// \param urObj is the UR object to get the native handle of /// \param nativeHandle is a pointer to be set to the native handle /// -/// PI_SUCCESS +/// UR_RESULT_SUCCESS ur_result_t urGetNativeHandle(void *urObj, ur_native_handle_t *nativeHandle) { UR_ASSERT(nativeHandle, UR_RESULT_ERROR_INVALID_NULL_POINTER) *nativeHandle = reinterpret_cast(urObj); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp index b4eed72f928f9..e10be131339b0 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp @@ -12,8 +12,6 @@ #include #include -#include - #define CL_RETURN_ON_FAILURE(clCall) \ if (const cl_int cl_result = clCall != CL_SUCCESS) { \ return map_cl_error_to_ur(cl_result); \ diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp new file mode 100644 index 0000000000000..6793737e1c5f0 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp @@ -0,0 +1,209 @@ +#include "cassert" +#include "common.hpp" +#include + +namespace { + +cl_sampler_info ur2clSamplerInfo(ur_sampler_info_t ur_info) { + switch (ur_info) { +#define CASE(UR_INFO, CL_INFO) \ + case UR_INFO: \ + return CL_INFO; + + CASE(UR_SAMPLER_INFO_REFERENCE_COUNT, CL_SAMPLER_REFERENCE_COUNT) + CASE(UR_SAMPLER_INFO_CONTEXT, CL_SAMPLER_CONTEXT) + CASE(UR_SAMPLER_INFO_NORMALIZED_COORDS, CL_SAMPLER_NORMALIZED_COORDS) + CASE(UR_SAMPLER_INFO_ADDRESSING_MODE, CL_SAMPLER_ADDRESSING_MODE) + CASE(UR_SAMPLER_INFO_FILTER_MODE, CL_SAMPLER_FILTER_MODE) + +#undef CASE + + default: + assert(0 && "Unhandled: ur_sampler_info_t"); + } +} + +cl_addressing_mode ur2clAddressingMode(ur_sampler_addressing_mode_t mode) { + switch (mode) { + +#define CASE(UR_MODE, CL_MODE) \ + case UR_MODE: \ + return CL_MODE; + + CASE(UR_SAMPLER_ADDRESSING_MODE_NONE, CL_ADDRESS_NONE); + CASE(UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP_TO_EDGE); + CASE(UR_SAMPLER_ADDRESSING_MODE_CLAMP, CL_ADDRESS_CLAMP); + CASE(UR_SAMPLER_ADDRESSING_MODE_REPEAT, CL_ADDRESS_REPEAT); + CASE(UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT, + CL_ADDRESS_MIRRORED_REPEAT); + +#undef CASE + + default: + assert(0 && "Unhandled: ur_sampler_addressing_mode_t"); + break; + } +} + +cl_filter_mode ur2clFilterMode(ur_sampler_filter_mode_t mode) { + switch (mode) { + +#define CASE(UR_MODE, CL_MODE) \ + case UR_MODE: \ + return CL_MODE; + + CASE(UR_SAMPLER_FILTER_MODE_NEAREST, CL_FILTER_NEAREST) + CASE(UR_SAMPLER_FILTER_MODE_LINEAR, CL_FILTER_LINEAR) + +#undef CASE + + default: + assert(0 && "Unhandled: ur_sampler_filter_mode_t"); + break; + } +} + +ur_sampler_addressing_mode_t cl2urAddressingMode(cl_addressing_mode mode) { + switch (mode) { + +#define CASE(CL_MODE, UR_MODE) \ + case CL_MODE: \ + return UR_MODE; + + CASE(CL_ADDRESS_NONE, UR_SAMPLER_ADDRESSING_MODE_NONE); + CASE(CL_ADDRESS_CLAMP_TO_EDGE, UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE); + CASE(CL_ADDRESS_CLAMP, UR_SAMPLER_ADDRESSING_MODE_CLAMP); + CASE(CL_ADDRESS_REPEAT, UR_SAMPLER_ADDRESSING_MODE_REPEAT); + CASE(CL_ADDRESS_MIRRORED_REPEAT, + UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT); + +#undef CASE + + default: + std::cout << mode << std::endl; + assert(0 && "Unhandled: cl_addressing_mode"); + break; + } +} + +ur_sampler_filter_mode_t cl2urFilterMode(cl_filter_mode mode) { + switch (mode) { +#define CASE(CL_MODE, UR_MODE) \ + case CL_MODE: \ + return UR_MODE; + + CASE(CL_FILTER_NEAREST, UR_SAMPLER_FILTER_MODE_NEAREST) + CASE(CL_FILTER_LINEAR, UR_SAMPLER_FILTER_MODE_LINEAR); + +#undef CASE + + default: + assert(0 && "Unhandled: cl_filter_mode"); + break; + } +} + +void cl2urSamplerInfoValue(cl_sampler_info info, size_t infoSize, + void *infoValue) { + if (!infoValue) { + return; + } + switch (info) { + case CL_SAMPLER_ADDRESSING_MODE: { + cl_addressing_mode clValue = + *reinterpret_cast(infoValue); + *reinterpret_cast(infoValue) = + cl2urAddressingMode(clValue); + break; + } + case CL_SAMPLER_FILTER_MODE: { + cl_filter_mode clMode = *reinterpret_cast(infoValue); + *reinterpret_cast(infoValue) = + cl2urFilterMode(clMode); + break; + } + + default: + break; + } +} + +} // namespace + +ur_result_t urSamplerCreate(ur_context_handle_t hContext, + const ur_sampler_desc_t *pDesc, + ur_sampler_handle_t *phSampler) { + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pDesc, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(phSampler, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + // Initialize properties according to OpenCL 2.1 spec. + ur_result_t error_code; + cl_addressing_mode addressingMode = + ur2clAddressingMode(pDesc->addressingMode); + cl_filter_mode filterMode = ur2clFilterMode(pDesc->filterMode); + + // Always call OpenCL 1.0 API + *phSampler = cl::cast(clCreateSampler( + cl::cast(hContext), + static_cast(pDesc->normalizedCoords), addressingMode, filterMode, + cl::cast(&error_code))); + + return map_cl_error_to_ur(error_code); +} + +UR_APIEXPORT ur_result_t UR_APICALL +urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, + size_t propSize, void *pPropValue, size_t *pPropSizeRet) { + UR_ASSERT(hSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pPropValue || pPropSizeRet, UR_RESULT_ERROR_INVALID_VALUE); + + cl_sampler_info sampler_info = ur2clSamplerInfo(propName); + static_assert(sizeof(cl_addressing_mode) == + sizeof(ur_sampler_addressing_mode_t)); + + if (ur_result_t err = map_cl_error_to_ur( + clGetSamplerInfo(cl::cast(hSampler), sampler_info, + propSize, pPropValue, pPropSizeRet))) { + return err; + } + // Convert OpenCL returns to UR + cl2urSamplerInfoValue(sampler_info, propSize, pPropValue); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urSamplerRetain(ur_sampler_handle_t hSampler) { + UR_ASSERT(hSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + return map_cl_error_to_ur(clRetainSampler(cl::cast(hSampler))); +} + +UR_APIEXPORT ur_result_t UR_APICALL +urSamplerRelease(ur_sampler_handle_t hSampler) { + UR_ASSERT(hSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + return map_cl_error_to_ur(clReleaseSampler(cl::cast(hSampler))); +} + +UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( + ur_sampler_handle_t hSampler, ur_native_handle_t *phNativeSampler) { + UR_ASSERT(hSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(phNativeSampler, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + *phNativeSampler = + reinterpret_cast(cl::cast(hSampler)); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( + ur_native_handle_t hNativeSampler, ur_context_handle_t hContext, + ur_sampler_handle_t *phSampler) { + UR_ASSERT(hNativeSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(phSampler, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + std::ignore = hContext; + *phSampler = reinterpret_cast( + cl::cast(hNativeSampler)); + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index 5c97654cc0471..969a9bbee66e3 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -56,7 +56,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( pDdiTable->pfnGetNativeHandle = urContextGetNativeHandle; pDdiTable->pfnRelease = urContextRelease; pDdiTable->pfnRetain = urContextRetain; -// pDdiTable->pfnSetExtendedDeleter = urContextSetExtendedDeleter; + // pDdiTable->pfnSetExtendedDeleter = urContextSetExtendedDeleter; return UR_RESULT_SUCCESS; } @@ -66,14 +66,14 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( if (UR_RESULT_SUCCESS != result) { return result; } -// pDdiTable->pfnCreateWithNativeHandle = urEventCreateWithNativeHandle; -// pDdiTable->pfnGetInfo = urEventGetInfo; -// pDdiTable->pfnGetNativeHandle = urEventGetNativeHandle; -// pDdiTable->pfnGetProfilingInfo = urEventGetProfilingInfo; -// pDdiTable->pfnRelease = urEventRelease; -// pDdiTable->pfnRetain = urEventRetain; -// pDdiTable->pfnSetCallback = urEventSetCallback; -// pDdiTable->pfnWait = urEventWait; + // pDdiTable->pfnCreateWithNativeHandle = urEventCreateWithNativeHandle; + // pDdiTable->pfnGetInfo = urEventGetInfo; + // pDdiTable->pfnGetNativeHandle = urEventGetNativeHandle; + // pDdiTable->pfnGetProfilingInfo = urEventGetProfilingInfo; + // pDdiTable->pfnRelease = urEventRelease; + // pDdiTable->pfnRetain = urEventRetain; + // pDdiTable->pfnSetCallback = urEventSetCallback; + // pDdiTable->pfnWait = urEventWait; return UR_RESULT_SUCCESS; } @@ -83,20 +83,20 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( if (UR_RESULT_SUCCESS != result) { return result; } -// pDdiTable->pfnBuild = urProgramBuild; -// pDdiTable->pfnCompile = urProgramCompile; -// pDdiTable->pfnCreateWithBinary = urProgramCreateWithBinary; -// pDdiTable->pfnCreateWithIL = urProgramCreateWithIL; -// pDdiTable->pfnCreateWithNativeHandle = urProgramCreateWithNativeHandle; -// pDdiTable->pfnGetBuildInfo = urProgramGetBuildInfo; -// pDdiTable->pfnGetFunctionPointer = nullptr; -// pDdiTable->pfnGetInfo = urProgramGetInfo; -// pDdiTable->pfnGetNativeHandle = urProgramGetNativeHandle; -// pDdiTable->pfnLink = urProgramLink; -// pDdiTable->pfnRelease = urProgramRelease; -// pDdiTable->pfnRetain = urProgramRetain; -// pDdiTable->pfnSetSpecializationConstants = -// urProgramSetSpecializationConstants; + // pDdiTable->pfnBuild = urProgramBuild; + // pDdiTable->pfnCompile = urProgramCompile; + // pDdiTable->pfnCreateWithBinary = urProgramCreateWithBinary; + // pDdiTable->pfnCreateWithIL = urProgramCreateWithIL; + // pDdiTable->pfnCreateWithNativeHandle = urProgramCreateWithNativeHandle; + // pDdiTable->pfnGetBuildInfo = urProgramGetBuildInfo; + // pDdiTable->pfnGetFunctionPointer = nullptr; + // pDdiTable->pfnGetInfo = urProgramGetInfo; + // pDdiTable->pfnGetNativeHandle = urProgramGetNativeHandle; + // pDdiTable->pfnLink = urProgramLink; + // pDdiTable->pfnRelease = urProgramRelease; + // pDdiTable->pfnRetain = urProgramRetain; + // pDdiTable->pfnSetSpecializationConstants = + // urProgramSetSpecializationConstants; return UR_RESULT_SUCCESS; } @@ -106,21 +106,21 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( if (UR_RESULT_SUCCESS != result) { return result; } -// pDdiTable->pfnCreate = urKernelCreate; -// pDdiTable->pfnCreateWithNativeHandle = urKernelCreateWithNativeHandle; -// pDdiTable->pfnGetGroupInfo = urKernelGetGroupInfo; -// pDdiTable->pfnGetInfo = urKernelGetInfo; -// pDdiTable->pfnGetNativeHandle = urKernelGetNativeHandle; -// pDdiTable->pfnGetSubGroupInfo = urKernelGetSubGroupInfo; -// pDdiTable->pfnRelease = urKernelRelease; -// pDdiTable->pfnRetain = urKernelRetain; -// pDdiTable->pfnSetArgLocal = nullptr; -// pDdiTable->pfnSetArgMemObj = nullptr; -// pDdiTable->pfnSetArgPointer = urKernelSetArgPointer; -// pDdiTable->pfnSetArgSampler = nullptr; -// pDdiTable->pfnSetArgValue = urKernelSetArgValue; -// pDdiTable->pfnSetExecInfo = urKernelSetExecInfo; -// pDdiTable->pfnSetSpecializationConstants = nullptr; + // pDdiTable->pfnCreate = urKernelCreate; + // pDdiTable->pfnCreateWithNativeHandle = urKernelCreateWithNativeHandle; + // pDdiTable->pfnGetGroupInfo = urKernelGetGroupInfo; + // pDdiTable->pfnGetInfo = urKernelGetInfo; + // pDdiTable->pfnGetNativeHandle = urKernelGetNativeHandle; + // pDdiTable->pfnGetSubGroupInfo = urKernelGetSubGroupInfo; + // pDdiTable->pfnRelease = urKernelRelease; + // pDdiTable->pfnRetain = urKernelRetain; + // pDdiTable->pfnSetArgLocal = nullptr; + // pDdiTable->pfnSetArgMemObj = nullptr; + // pDdiTable->pfnSetArgPointer = urKernelSetArgPointer; + // pDdiTable->pfnSetArgSampler = nullptr; + // pDdiTable->pfnSetArgValue = urKernelSetArgValue; + // pDdiTable->pfnSetExecInfo = urKernelSetExecInfo; + // pDdiTable->pfnSetSpecializationConstants = nullptr; return UR_RESULT_SUCCESS; } @@ -130,12 +130,12 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( if (UR_RESULT_SUCCESS != result) { return result; } -// pDdiTable->pfnCreate = urSamplerCreate; -// pDdiTable->pfnCreateWithNativeHandle = nullptr; -// pDdiTable->pfnGetInfo = urSamplerGetInfo; -// pDdiTable->pfnGetNativeHandle = nullptr; -// pDdiTable->pfnRelease = urSamplerRelease; -// pDdiTable->pfnRetain = urSamplerRetain; + pDdiTable->pfnCreate = urSamplerCreate; + pDdiTable->pfnCreateWithNativeHandle = urSamplerCreateWithNativeHandle; + pDdiTable->pfnGetInfo = urSamplerGetInfo; + pDdiTable->pfnGetNativeHandle = urSamplerGetNativeHandle; + pDdiTable->pfnRelease = urSamplerRelease; + pDdiTable->pfnRetain = urSamplerRetain; return UR_RESULT_SUCCESS; } @@ -145,16 +145,16 @@ urGetMemProcAddrTable(ur_api_version_t version, ur_mem_dditable_t *pDdiTable) { if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnBufferCreate = urMemBufferCreate; - pDdiTable->pfnBufferPartition = urMemBufferPartition; - pDdiTable->pfnBufferCreateWithNativeHandle = urMemBufferCreateWithNativeHandle; - pDdiTable->pfnImageCreateWithNativeHandle = urMemImageCreateWithNativeHandle; - pDdiTable->pfnGetInfo = urMemGetInfo; - pDdiTable->pfnGetNativeHandle = urMemGetNativeHandle; - pDdiTable->pfnImageCreate = urMemImageCreate; - pDdiTable->pfnImageGetInfo = urMemImageGetInfo; - pDdiTable->pfnRelease = urMemRelease; - pDdiTable->pfnRetain = urMemRetain; + pDdiTable->pfnBufferCreate = urMemBufferCreate; + pDdiTable->pfnBufferPartition = urMemBufferPartition; + pDdiTable->pfnBufferCreateWithNativeHandle = + urMemBufferCreateWithNativeHandle; + pDdiTable->pfnGetInfo = urMemGetInfo; + pDdiTable->pfnGetNativeHandle = urMemGetNativeHandle; + pDdiTable->pfnImageCreate = urMemImageCreate; + pDdiTable->pfnImageGetInfo = urMemImageGetInfo; + pDdiTable->pfnRelease = urMemRelease; + pDdiTable->pfnRetain = urMemRetain; return UR_RESULT_SUCCESS; } @@ -164,29 +164,29 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( if (UR_RESULT_SUCCESS != result) { return result; } -// pDdiTable->pfnDeviceGlobalVariableRead = nullptr; -// pDdiTable->pfnDeviceGlobalVariableWrite = nullptr; -// pDdiTable->pfnEventsWait = urEnqueueEventsWait; -// pDdiTable->pfnEventsWaitWithBarrier = urEnqueueEventsWaitWithBarrier; -// pDdiTable->pfnKernelLaunch = urEnqueueKernelLaunch; -// pDdiTable->pfnMemBufferCopy = nullptr; -// pDdiTable->pfnMemBufferCopyRect = nullptr; -// pDdiTable->pfnMemBufferFill = nullptr; -// pDdiTable->pfnMemBufferMap = nullptr; -// pDdiTable->pfnMemBufferRead = nullptr; -// pDdiTable->pfnMemBufferReadRect = nullptr; -// pDdiTable->pfnMemBufferWrite = nullptr; -// pDdiTable->pfnMemBufferWriteRect = nullptr; -// pDdiTable->pfnMemImageCopy = nullptr; -// pDdiTable->pfnMemImageRead = nullptr; -// pDdiTable->pfnMemImageWrite = nullptr; -// pDdiTable->pfnMemUnmap = nullptr; -// pDdiTable->pfnUSMFill2D = nullptr; -// pDdiTable->pfnUSMFill = nullptr; -// pDdiTable->pfnUSMAdvise = nullptr; -// pDdiTable->pfnUSMMemcpy2D = nullptr; -// pDdiTable->pfnUSMMemcpy = nullptr; -// pDdiTable->pfnUSMPrefetch = nullptr; + // pDdiTable->pfnDeviceGlobalVariableRead = nullptr; + // pDdiTable->pfnDeviceGlobalVariableWrite = nullptr; + // pDdiTable->pfnEventsWait = urEnqueueEventsWait; + // pDdiTable->pfnEventsWaitWithBarrier = urEnqueueEventsWaitWithBarrier; + // pDdiTable->pfnKernelLaunch = urEnqueueKernelLaunch; + // pDdiTable->pfnMemBufferCopy = nullptr; + // pDdiTable->pfnMemBufferCopyRect = nullptr; + // pDdiTable->pfnMemBufferFill = nullptr; + // pDdiTable->pfnMemBufferMap = nullptr; + // pDdiTable->pfnMemBufferRead = nullptr; + // pDdiTable->pfnMemBufferReadRect = nullptr; + // pDdiTable->pfnMemBufferWrite = nullptr; + // pDdiTable->pfnMemBufferWriteRect = nullptr; + // pDdiTable->pfnMemImageCopy = nullptr; + // pDdiTable->pfnMemImageRead = nullptr; + // pDdiTable->pfnMemImageWrite = nullptr; + // pDdiTable->pfnMemUnmap = nullptr; + // pDdiTable->pfnUSMFill2D = nullptr; + // pDdiTable->pfnUSMFill = nullptr; + // pDdiTable->pfnUSMAdvise = nullptr; + // pDdiTable->pfnUSMMemcpy2D = nullptr; + // pDdiTable->pfnUSMMemcpy = nullptr; + // pDdiTable->pfnUSMPrefetch = nullptr; return UR_RESULT_SUCCESS; } @@ -208,14 +208,14 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( if (UR_RESULT_SUCCESS != result) { return result; } -// pDdiTable->pfnCreate = urQueueCreate; -// pDdiTable->pfnCreateWithNativeHandle = urQueueCreateWithNativeHandle; -// pDdiTable->pfnFinish = urQueueFinish; -// pDdiTable->pfnFlush = urQueueFlush; -// pDdiTable->pfnGetInfo = urQueueGetInfo; -// pDdiTable->pfnGetNativeHandle = urQueueGetNativeHandle; -// pDdiTable->pfnRelease = urQueueRelease; -// pDdiTable->pfnRetain = urQueueRetain; + // pDdiTable->pfnCreate = urQueueCreate; + // pDdiTable->pfnCreateWithNativeHandle = urQueueCreateWithNativeHandle; + // pDdiTable->pfnFinish = urQueueFinish; + // pDdiTable->pfnFlush = urQueueFlush; + // pDdiTable->pfnGetInfo = urQueueGetInfo; + // pDdiTable->pfnGetNativeHandle = urQueueGetNativeHandle; + // pDdiTable->pfnRelease = urQueueRelease; + // pDdiTable->pfnRetain = urQueueRetain; return UR_RESULT_SUCCESS; } @@ -225,14 +225,14 @@ urGetUSMProcAddrTable(ur_api_version_t version, ur_usm_dditable_t *pDdiTable) { if (UR_RESULT_SUCCESS != result) { return result; } -// pDdiTable->pfnDeviceAlloc = nullptr; -// pDdiTable->pfnFree = nullptr; -// pDdiTable->pfnGetMemAllocInfo = nullptr; -// pDdiTable->pfnHostAlloc = nullptr; -// pDdiTable->pfnPoolCreate = nullptr; -// pDdiTable->pfnPoolDestroy = nullptr; -// pDdiTable->pfnPoolDestroy = nullptr; -// pDdiTable->pfnSharedAlloc = nullptr; + // pDdiTable->pfnDeviceAlloc = nullptr; + // pDdiTable->pfnFree = nullptr; + // pDdiTable->pfnGetMemAllocInfo = nullptr; + // pDdiTable->pfnHostAlloc = nullptr; + // pDdiTable->pfnPoolCreate = nullptr; + // pDdiTable->pfnPoolDestroy = nullptr; + // pDdiTable->pfnPoolDestroy = nullptr; + // pDdiTable->pfnSharedAlloc = nullptr; return UR_RESULT_SUCCESS; } @@ -244,13 +244,13 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( } pDdiTable->pfnCreateWithNativeHandle = urDeviceCreateWithNativeHandle; pDdiTable->pfnGet = urDeviceGet; -// pDdiTable->pfnGetGlobalTimestamps = urDeviceGetGlobalTimestamps; + // pDdiTable->pfnGetGlobalTimestamps = urDeviceGetGlobalTimestamps; pDdiTable->pfnGetInfo = urDeviceGetInfo; pDdiTable->pfnGetNativeHandle = urDeviceGetNativeHandle; pDdiTable->pfnPartition = urDevicePartition; pDdiTable->pfnRelease = urDeviceRelease; pDdiTable->pfnRetain = urDeviceRetain; -// pDdiTable->pfnSelectBinary = nullptr; + // pDdiTable->pfnSelectBinary = nullptr; return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/ur.hpp b/sycl/plugins/unified_runtime/ur/ur.hpp index f64fb18b6998c..0b1aec42a5a56 100644 --- a/sycl/plugins/unified_runtime/ur/ur.hpp +++ b/sycl/plugins/unified_runtime/ur/ur.hpp @@ -271,7 +271,7 @@ class UrReturnHelper { param_value_size_ret, t); } - // Array return value where element type is differrent from T + // Array return value where element type is different from T template ur_result_t operator()(const T *t, size_t s) { return ur::getInfoArray(s, param_value_size, param_value, From fc496302332d4ccc23e8014eb4fc0adf6ddd82a0 Mon Sep 17 00:00:00 2001 From: Callum Fare Date: Thu, 1 Jun 2023 13:54:26 +0100 Subject: [PATCH 05/36] [SYCL][OpenCL] Rename `cl` namespace to `cl_adapter` to avoid ambiguity --- .../ur/adapters/opencl/common.cpp | 12 +-- .../ur/adapters/opencl/common.hpp | 2 +- .../ur/adapters/opencl/context.cpp | 22 ++--- .../ur/adapters/opencl/device.cpp | 96 ++++++++++--------- .../ur/adapters/opencl/memory.cpp | 77 ++++++++------- .../ur/adapters/opencl/platform.cpp | 13 +-- .../ur/adapters/opencl/sampler.cpp | 20 ++-- 7 files changed, 127 insertions(+), 115 deletions(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp index faf4fcfe73df4..87ba7a5e3d42a 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp @@ -8,16 +8,16 @@ #include "common.hpp" -namespace cl { +namespace cl_adapter { // Global variables for ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR thread_local ur_result_t ErrorMessageCode = UR_RESULT_SUCCESS; -thread_local char ErrorMessage[cl::MaxMessageSize]; +thread_local char ErrorMessage[cl_adapter::MaxMessageSize]; // Utility function for setting a message and warning [[maybe_unused]] void setErrorMessage(const char *message, ur_result_t error_code) { - assert(strlen(message) <= cl::MaxMessageSize); - strcpy(cl::ErrorMessage, message); + assert(strlen(message) <= cl_adapter::MaxMessageSize); + strcpy(cl_adapter::ErrorMessage, message); ErrorMessageCode = error_code; } } // namespace cl @@ -25,8 +25,8 @@ thread_local char ErrorMessage[cl::MaxMessageSize]; // Returns plugin specific error and warning messages; common implementation // that can be shared between adapters ur_result_t urGetLastResult(ur_platform_handle_t, const char **ppMessage) { - *ppMessage = &cl::ErrorMessage[0]; - return cl::ErrorMessageCode; + *ppMessage = &cl_adapter::ErrorMessage[0]; + return cl_adapter::ErrorMessageCode; } ur_result_t map_cl_error_to_ur(cl_int result) { diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp index e10be131339b0..797f2b4a0287f 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp @@ -106,7 +106,7 @@ inline const OpenCLVersion V3_0(3, 0); } // namespace OCLV -namespace cl { +namespace cl_adapter { constexpr size_t MaxMessageSize = 256; extern thread_local ur_result_t ErrorMessageCode; extern thread_local char ErrorMessage[MaxMessageSize]; diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp index 27dcd4cb821b0..ea5f347d281e9 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp @@ -18,10 +18,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( UR_ASSERT(phContext, UR_RESULT_ERROR_INVALID_NULL_POINTER); cl_int ret; - *phContext = cl::cast( - clCreateContext(nullptr, cl::cast(DeviceCount), - cl::cast(phDevices), nullptr, - nullptr, cl::cast(&ret))); + *phContext = cl_adapter::cast( + clCreateContext(nullptr, cl_adapter::cast(DeviceCount), + cl_adapter::cast(phDevices), + nullptr, nullptr, cl_adapter::cast(&ret))); return map_cl_error_to_ur(ret); } @@ -66,17 +66,17 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { /* These queries should be dealt with in context_impl.cpp by calling the * queries of each device separately and building the intersection set. */ - cl::setErrorMessage("These queries should have never come here.", - UR_RESULT_ERROR_INVALID_ARGUMENT); + cl_adapter::setErrorMessage("These queries should have never come here.", + UR_RESULT_ERROR_INVALID_ARGUMENT); return UR_RESULT_ERROR_INVALID_ENUMERATION; } case UR_CONTEXT_INFO_NUM_DEVICES: case UR_CONTEXT_INFO_DEVICES: case UR_CONTEXT_INFO_REFERENCE_COUNT: { - CL_RETURN_ON_FAILURE(clGetContextInfo(cl::cast(hContext), - cl_propName, propSize, pPropValue, - pPropSizeRet)); + CL_RETURN_ON_FAILURE( + clGetContextInfo(cl_adapter::cast(hContext), cl_propName, + propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } default: @@ -88,7 +88,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextRelease(ur_context_handle_t hContext) { UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_int ret = clReleaseContext(cl::cast(hContext)); + cl_int ret = clReleaseContext(cl_adapter::cast(hContext)); return map_cl_error_to_ur(ret); } @@ -96,7 +96,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextRetain(ur_context_handle_t hContext) { UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_int ret = clRetainContext(cl::cast(hContext)); + cl_int ret = clRetainContext(cl_adapter::cast(hContext)); return map_cl_error_to_ur(ret); } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp index 775a023bded54..1fd0f402a8271 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp @@ -97,9 +97,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, return UR_RESULT_ERROR_INVALID_ENUMERATION; } - cl_int result = clGetDeviceIDs( - cl::cast(hPlatform), type, cl::cast(NumEntries), - cl::cast(phDevices), cl::cast(pNumDevices)); + cl_int result = clGetDeviceIDs(cl_adapter::cast(hPlatform), + type, cl_adapter::cast(NumEntries), + cl_adapter::cast(phDevices), + cl_adapter::cast(pNumDevices)); // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in num_devices if (result == CL_DEVICE_NOT_FOUND) { @@ -427,9 +428,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, switch (static_cast(propName)) { case UR_DEVICE_INFO_TYPE: { cl_device_type cl_type; - CL_RETURN_ON_FAILURE(clGetDeviceInfo(cl::cast(hDevice), - cl_propName, sizeof(cl_device_type), - &cl_type, nullptr)); + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, + sizeof(cl_device_type), &cl_type, nullptr)); /* TODO UR: If the device is an Accelerator (FPGA, VPU, etc.), there is not * enough information in the OpenCL runtime to know exactly which type it @@ -450,7 +451,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: { OCLV::OpenCLVersion version; CL_RETURN_ON_FAILURE( - getDeviceVersion(cl::cast(hDevice), version)); + getDeviceVersion(cl_adapter::cast(hDevice), version)); const std::string results = std::to_string(version.getMajor()) + "." + std::to_string(version.getMinor()); @@ -459,8 +460,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_PARTITION_PROPERTIES: case UR_DEVICE_INFO_PARTITION_TYPE: { size_t cl_size; - CL_RETURN_ON_FAILURE(clGetDeviceInfo(cl::cast(hDevice), - cl_propName, 0, nullptr, &cl_size)); + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, 0, + nullptr, &cl_size)); const size_t n_properties = cl_size / sizeof(cl_device_partition_property); /* Special case for UR_DEVICE_INFO_PARTITION_TYPE because OpenCL @@ -472,9 +474,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, auto cl_value = reinterpret_cast(alloca(cl_size)); - CL_RETURN_ON_FAILURE(clGetDeviceInfo(cl::cast(hDevice), - cl_propName, cl_size, cl_value, - nullptr)); + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, + cl_size, cl_value, nullptr)); std::vector ur_value{}; for (size_t i = 0; i < n_properties; ++i) { @@ -510,12 +512,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * and we have to emulate it on older OpenCL runtimes. */ OCLV::OpenCLVersion devVer; CL_RETURN_ON_FAILURE( - getDeviceVersion(cl::cast(hDevice), devVer)); + getDeviceVersion(cl_adapter::cast(hDevice), devVer)); if (devVer >= OCLV::V2_1) { cl_uint cl_value; CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl::cast(hDevice), CL_DEVICE_MAX_NUM_SUB_GROUPS, + cl_adapter::cast(hDevice), CL_DEVICE_MAX_NUM_SUB_GROUPS, sizeof(cl_uint), &cl_value, nullptr)); if (cl_value == 0u) { @@ -540,7 +542,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, if (propName == UR_DEVICE_INFO_HALF_FP_CONFIG) { bool supported; CL_RETURN_ON_FAILURE(checkDeviceExtensions( - cl::cast(hDevice), {"cl_khr_fp16"}, supported)); + cl_adapter::cast(hDevice), {"cl_khr_fp16"}, supported)); if (!supported) { return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -549,7 +551,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, cl_device_fp_config cl_value; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl::cast(hDevice), cl_propName, + clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, sizeof(cl_device_fp_config), &cl_value, nullptr)); return ReturnValue(map_ur_cl_device_fp_config_to_ur(cl_value)); @@ -560,7 +562,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * appropriately */ OCLV::OpenCLVersion devVer; CL_RETURN_ON_FAILURE( - getDeviceVersion(cl::cast(hDevice), devVer)); + getDeviceVersion(cl_adapter::cast(hDevice), devVer)); /* Minimum required capability to be returned. For OpenCL 1.2, this is all * that is required */ @@ -571,7 +573,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* For OpenCL >=3.0, the query should be implemented */ cl_device_atomic_capabilities cl_capabilities; CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl::cast(hDevice), CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + cl_adapter::cast(hDevice), + CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr)); /* Mask operation to only consider atomic_memory_order* capabilities */ @@ -618,12 +621,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, OCLV::OpenCLVersion devVer; CL_RETURN_ON_FAILURE( - getDeviceVersion(cl::cast(hDevice), devVer)); + getDeviceVersion(cl_adapter::cast(hDevice), devVer)); cl_device_atomic_capabilities cl_capabilities; if (devVer >= OCLV::V3_0) { CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl::cast(hDevice), CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + cl_adapter::cast(hDevice), + CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr)); assert((cl_capabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && @@ -670,12 +674,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, OCLV::OpenCLVersion devVer; CL_RETURN_ON_FAILURE( - getDeviceVersion(cl::cast(hDevice), devVer)); + getDeviceVersion(cl_adapter::cast(hDevice), devVer)); cl_device_atomic_capabilities cl_capabilities; if (devVer >= OCLV::V3_0) { CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl::cast(hDevice), CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + cl_adapter::cast(hDevice), + CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr)); assert((cl_capabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED) && @@ -718,12 +723,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, OCLV::OpenCLVersion devVer; CL_RETURN_ON_FAILURE( - getDeviceVersion(cl::cast(hDevice), devVer)); + getDeviceVersion(cl_adapter::cast(hDevice), devVer)); cl_device_atomic_capabilities cl_capabilities; if (devVer >= OCLV::V3_0) { CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl::cast(hDevice), CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + cl_adapter::cast(hDevice), + CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr)); assert((cl_capabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && @@ -770,7 +776,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_ATOMIC_64: { bool supported = false; CL_RETURN_ON_FAILURE(checkDeviceExtensions( - cl::cast(hDevice), + cl_adapter::cast(hDevice), {"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics"}, supported)); @@ -779,16 +785,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_BUILD_ON_SUBDEVICE: { cl_device_type devType = CL_DEVICE_TYPE_DEFAULT; - CL_RETURN_ON_FAILURE(clGetDeviceInfo(cl::cast(hDevice), - CL_DEVICE_TYPE, sizeof(cl_device_type), - &devType, nullptr)); + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, + sizeof(cl_device_type), &devType, nullptr)); return ReturnValue(devType == CL_DEVICE_TYPE_GPU); } case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT: { bool supported = false; CL_RETURN_ON_FAILURE( - checkDeviceExtensions(cl::cast(hDevice), + checkDeviceExtensions(cl_adapter::cast(hDevice), {"cl_intel_mem_channel_property"}, supported)); return ReturnValue(supported); @@ -809,9 +815,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * UR type: ur_flags_t (uint32_t) */ cl_bitfield cl_value; - CL_RETURN_ON_FAILURE(clGetDeviceInfo(cl::cast(hDevice), - cl_propName, sizeof(cl_bitfield), - &cl_value, nullptr)); + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, + sizeof(cl_bitfield), &cl_value, nullptr)); /* We can just static_cast the output because OpenCL and UR bitfields * map 1 to 1 for these properties. cl_bitfield is uint64_t and ur_flags_t @@ -832,9 +838,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * UR type: ur_bool_t */ cl_bool cl_value; - CL_RETURN_ON_FAILURE(clGetDeviceInfo(cl::cast(hDevice), - cl_propName, sizeof(cl_bool), - &cl_value, nullptr)); + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, + sizeof(cl_bool), &cl_value, nullptr)); /* cl_bool is uint32_t and ur_bool_t is bool */ return ReturnValue(static_cast(cl_value)); @@ -907,9 +913,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * | ur_device_handle_t | cl_device_id | 8 | */ - CL_RETURN_ON_FAILURE(clGetDeviceInfo(cl::cast(hDevice), - cl_propName, propSize, pPropValue, - pPropSizeRet)); + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, + propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } @@ -954,9 +960,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( cl_uint cl_num_devices_ret; CL_RETURN_ON_FAILURE(clCreateSubDevices( - cl::cast(hDevice), - cl::cast(pProperties), 0, nullptr, - &cl_num_devices_ret)); + cl_adapter::cast(hDevice), + cl_adapter::cast(pProperties), 0, + nullptr, &cl_num_devices_ret)); if (pNumDevicesRet) { *pNumDevicesRet = cl_num_devices_ret; @@ -967,8 +973,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( if (phSubDevices) { std::vector cl_sub_devices(cl_num_devices_ret); CL_RETURN_ON_FAILURE(clCreateSubDevices( - cl::cast(hDevice), - cl::cast(pProperties), + cl_adapter::cast(hDevice), + cl_adapter::cast(pProperties), cl_num_devices_ret, cl_sub_devices.data(), nullptr)); std::memcpy(phSubDevices, cl_sub_devices.data(), @@ -982,7 +988,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceRetain(ur_device_handle_t hDevice) { UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_int result = clRetainDevice(cl::cast(hDevice)); + cl_int result = clRetainDevice(cl_adapter::cast(hDevice)); return map_cl_error_to_ur(result); } @@ -992,7 +998,7 @@ urDeviceRelease(ur_device_handle_t hDevice) { UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_int result = clReleaseDevice(cl::cast(hDevice)); + cl_int result = clReleaseDevice(cl_adapter::cast(hDevice)); return map_cl_error_to_ur(result); } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp index e056e2b8a8529..1531ff4ab0153 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp @@ -10,7 +10,8 @@ #include -cl_image_format map_ur_image_format_to_cl(const ur_image_format_t *pImageFormat) { +cl_image_format +map_ur_image_format_to_cl(const ur_image_format_t *pImageFormat) { cl_image_format clImageFormat; switch (pImageFormat->channelOrder) { case UR_IMAGE_CHANNEL_ORDER_A: @@ -119,32 +120,33 @@ cl_image_format map_ur_image_format_to_cl(const ur_image_format_t *pImageFormat) cl_image_desc map_ur_image_desc_to_cl(const ur_image_desc_t *pImageDesc) { cl_image_desc clImageDesc; - clImageDesc.image_type = cl::cast(pImageDesc->type); + clImageDesc.image_type = + cl_adapter::cast(pImageDesc->type); switch (pImageDesc->type) { - case UR_MEM_TYPE_BUFFER: - clImageDesc.image_type = CL_MEM_OBJECT_BUFFER; + case UR_MEM_TYPE_BUFFER: + clImageDesc.image_type = CL_MEM_OBJECT_BUFFER; break; - case UR_MEM_TYPE_IMAGE2D: - clImageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; + case UR_MEM_TYPE_IMAGE2D: + clImageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; break; - case UR_MEM_TYPE_IMAGE3D: - clImageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; + case UR_MEM_TYPE_IMAGE3D: + clImageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; break; - case UR_MEM_TYPE_IMAGE2D_ARRAY: - clImageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; + case UR_MEM_TYPE_IMAGE2D_ARRAY: + clImageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; break; - case UR_MEM_TYPE_IMAGE1D: - clImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; + case UR_MEM_TYPE_IMAGE1D: + clImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; break; - case UR_MEM_TYPE_IMAGE1D_ARRAY: - clImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; + case UR_MEM_TYPE_IMAGE1D_ARRAY: + clImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; break; - case UR_MEM_TYPE_IMAGE1D_BUFFER: - clImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; + case UR_MEM_TYPE_IMAGE1D_BUFFER: + clImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; break; - default: - clImageDesc.image_type = -1; + default: + clImageDesc.image_type = -1; break; } @@ -237,14 +239,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) { UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(phBuffer, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(phBuffer, UR_RESULT_ERROR_INVALID_NULL_POINTER); cl_int ret_err = CL_INVALID_OPERATION; if (pProperties) { // TODO: need to check if all properties are supported by OpenCL RT and // ignore unsupported clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cl::cast(hContext); + cl_context CLContext = cl_adapter::cast(hContext); // First we need to look up the function pointer ret_err = cl_ext::getExtFuncFromContext( @@ -277,14 +279,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( *phBuffer = reinterpret_cast(FuncPtr( CLContext, propertiesIntel.data(), static_cast(flags), - size, pProperties->pHost, cl::cast(&ret_err))); + size, pProperties->pHost, cl_adapter::cast(&ret_err))); CL_RETURN_ON_FAILURE(ret_err); } } *phBuffer = reinterpret_cast(clCreateBuffer( - cl::cast(hContext), static_cast(flags), size, - pProperties->pHost, cl::cast(&ret_err))); + cl_adapter::cast(hContext), static_cast(flags), + size, pProperties->pHost, cl_adapter::cast(&ret_err))); CL_RETURN_ON_FAILURE(ret_err); return UR_RESULT_SUCCESS; @@ -295,7 +297,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, void *pHost, ur_mem_handle_t *phMem) { UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(phMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(phMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); cl_int ret_err = CL_INVALID_OPERATION; @@ -304,10 +306,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( cl_map_flags map_flags = convert_ur_mem_flags_to_cl(flags); *phMem = reinterpret_cast(clCreateImage( - cl::cast(hContext), - map_flags, &image_format, - &image_desc, pHost, - cl::cast(&ret_err))); + cl_adapter::cast(hContext), map_flags, &image_format, + &image_desc, pHost, cl_adapter::cast(&ret_err))); CL_RETURN_ON_FAILURE(ret_err); return UR_RESULT_SUCCESS; @@ -335,9 +335,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( buffer_region.origin = pRegion->origin; buffer_region.size = pRegion->size; - *phMem = reinterpret_cast(clCreateSubBuffer( - cl::cast(hBuffer), static_cast(flags), - buffer_create_type, &buffer_region, cl::cast(&ret_err))); + *phMem = reinterpret_cast( + clCreateSubBuffer(cl_adapter::cast(hBuffer), + static_cast(flags), buffer_create_type, + &buffer_region, cl_adapter::cast(&ret_err))); CL_RETURN_ON_FAILURE(ret_err); return UR_RESULT_SUCCESS; @@ -385,8 +386,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); const cl_int clPropName = map_ur_mem_info_to_cl(propName); - CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl::cast(hMemory), clPropName, - propSize, pPropValue, pPropSizeRet)); + CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(hMemory), + clPropName, propSize, pPropValue, + pPropSizeRet)); return UR_RESULT_SUCCESS; } @@ -400,19 +402,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); const cl_int clPropName = map_ur_mem_image_info_to_cl(propName); - CL_RETURN_ON_FAILURE(clGetImageInfo(cl::cast(hMemory), clPropName, - propSize, pPropValue, pPropSizeRet)); + CL_RETURN_ON_FAILURE(clGetImageInfo(cl_adapter::cast(hMemory), + clPropName, propSize, pPropValue, + pPropSizeRet)); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { UR_ASSERT(hMem, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - CL_RETURN_ON_FAILURE(clRetainMemObject(cl::cast(hMem))); + CL_RETURN_ON_FAILURE(clRetainMemObject(cl_adapter::cast(hMem))); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { UR_ASSERT(hMem, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - CL_RETURN_ON_FAILURE(clReleaseMemObject(cl::cast(hMem))); + CL_RETURN_ON_FAILURE(clReleaseMemObject(cl_adapter::cast(hMem))); return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp index 3de5d4887f4f2..5bf16691c4051 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp @@ -52,9 +52,9 @@ urPlatformGetInfo(ur_platform_handle_t hPlatform, ur_platform_info_t propName, case UR_PLATFORM_INFO_VERSION: case UR_PLATFORM_INFO_EXTENSIONS: case UR_PLATFORM_INFO_PROFILE: { - CL_RETURN_ON_FAILURE(clGetPlatformInfo(cl::cast(hPlatform), - cl_propName, propSize, pPropValue, - pSizeRet)); + CL_RETURN_ON_FAILURE( + clGetPlatformInfo(cl_adapter::cast(hPlatform), + cl_propName, propSize, pPropValue, pSizeRet)); return UR_RESULT_SUCCESS; } default: @@ -78,9 +78,10 @@ urPlatformGet(uint32_t NumEntries, ur_platform_handle_t *phPlatforms, UR_ASSERT(phPlatforms || pNumPlatforms, UR_RESULT_ERROR_INVALID_VALUE); UR_ASSERT(!phPlatforms || NumEntries > 0, UR_RESULT_ERROR_INVALID_SIZE); - cl_int result = clGetPlatformIDs(cl::cast(NumEntries), - cl::cast(phPlatforms), - cl::cast(pNumPlatforms)); + cl_int result = + clGetPlatformIDs(cl_adapter::cast(NumEntries), + cl_adapter::cast(phPlatforms), + cl_adapter::cast(pNumPlatforms)); /* Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms */ if (result == CL_PLATFORM_NOT_FOUND_KHR) { diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp index 6793737e1c5f0..2e7c6f1229379 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp @@ -144,10 +144,10 @@ ur_result_t urSamplerCreate(ur_context_handle_t hContext, cl_filter_mode filterMode = ur2clFilterMode(pDesc->filterMode); // Always call OpenCL 1.0 API - *phSampler = cl::cast(clCreateSampler( - cl::cast(hContext), + *phSampler = cl_adapter::cast(clCreateSampler( + cl_adapter::cast(hContext), static_cast(pDesc->normalizedCoords), addressingMode, filterMode, - cl::cast(&error_code))); + cl_adapter::cast(&error_code))); return map_cl_error_to_ur(error_code); } @@ -163,7 +163,7 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, sizeof(ur_sampler_addressing_mode_t)); if (ur_result_t err = map_cl_error_to_ur( - clGetSamplerInfo(cl::cast(hSampler), sampler_info, + clGetSamplerInfo(cl_adapter::cast(hSampler), sampler_info, propSize, pPropValue, pPropSizeRet))) { return err; } @@ -176,13 +176,15 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, UR_APIEXPORT ur_result_t UR_APICALL urSamplerRetain(ur_sampler_handle_t hSampler) { UR_ASSERT(hSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - return map_cl_error_to_ur(clRetainSampler(cl::cast(hSampler))); + return map_cl_error_to_ur( + clRetainSampler(cl_adapter::cast(hSampler))); } UR_APIEXPORT ur_result_t UR_APICALL urSamplerRelease(ur_sampler_handle_t hSampler) { UR_ASSERT(hSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - return map_cl_error_to_ur(clReleaseSampler(cl::cast(hSampler))); + return map_cl_error_to_ur( + clReleaseSampler(cl_adapter::cast(hSampler))); } UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( @@ -190,8 +192,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( UR_ASSERT(hSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(phNativeSampler, UR_RESULT_ERROR_INVALID_NULL_POINTER); - *phNativeSampler = - reinterpret_cast(cl::cast(hSampler)); + *phNativeSampler = reinterpret_cast( + cl_adapter::cast(hSampler)); return UR_RESULT_SUCCESS; } @@ -204,6 +206,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( std::ignore = hContext; *phSampler = reinterpret_cast( - cl::cast(hNativeSampler)); + cl_adapter::cast(hNativeSampler)); return UR_RESULT_SUCCESS; } From 98c25b93575e224f4f1a07ab9f681fd1672d3175 Mon Sep 17 00:00:00 2001 From: Callum Fare Date: Fri, 26 May 2023 17:19:53 +0100 Subject: [PATCH 06/36] [SYCL][OpenCL] Port USM entry points to UR --- sycl/plugins/opencl/CMakeLists.txt | 1 + .../adapters/opencl/ur_interface_loader.cpp | 20 +- .../ur/adapters/opencl/usm.cpp | 457 ++++++++++++++++++ 3 files changed, 468 insertions(+), 10 deletions(-) create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp diff --git a/sycl/plugins/opencl/CMakeLists.txt b/sycl/plugins/opencl/CMakeLists.txt index 7ebd85a5ed66c..eb527bfc9b5b7 100644 --- a/sycl/plugins/opencl/CMakeLists.txt +++ b/sycl/plugins/opencl/CMakeLists.txt @@ -27,6 +27,7 @@ add_sycl_plugin(opencl "../unified_runtime/ur/adapters/opencl/platform.cpp" "../unified_runtime/ur/adapters/opencl/sampler.cpp" "../unified_runtime/ur/adapters/opencl/memory.cpp" + "../unified_runtime/ur/adapters/opencl/usm.cpp" # --- "${sycl_inc_dir}/sycl/detail/pi.h" "pi_opencl.cpp" diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index 969a9bbee66e3..67293bfb2690f 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -181,12 +181,12 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( // pDdiTable->pfnMemImageRead = nullptr; // pDdiTable->pfnMemImageWrite = nullptr; // pDdiTable->pfnMemUnmap = nullptr; - // pDdiTable->pfnUSMFill2D = nullptr; - // pDdiTable->pfnUSMFill = nullptr; - // pDdiTable->pfnUSMAdvise = nullptr; - // pDdiTable->pfnUSMMemcpy2D = nullptr; - // pDdiTable->pfnUSMMemcpy = nullptr; - // pDdiTable->pfnUSMPrefetch = nullptr; + pDdiTable->pfnUSMFill2D = urEnqueueUSMFill2D; + pDdiTable->pfnUSMFill = urEnqueueUSMFill; + pDdiTable->pfnUSMAdvise = urEnqueueUSMAdvise; + pDdiTable->pfnUSMMemcpy2D = urEnqueueUSMMemcpy2D; + pDdiTable->pfnUSMMemcpy = urEnqueueUSMMemcpy; + pDdiTable->pfnUSMPrefetch = urEnqueueUSMPrefetch; return UR_RESULT_SUCCESS; } @@ -225,10 +225,10 @@ urGetUSMProcAddrTable(ur_api_version_t version, ur_usm_dditable_t *pDdiTable) { if (UR_RESULT_SUCCESS != result) { return result; } - // pDdiTable->pfnDeviceAlloc = nullptr; - // pDdiTable->pfnFree = nullptr; - // pDdiTable->pfnGetMemAllocInfo = nullptr; - // pDdiTable->pfnHostAlloc = nullptr; + pDdiTable->pfnDeviceAlloc = urUSMDeviceAlloc; + pDdiTable->pfnFree = urUSMFree; + pDdiTable->pfnGetMemAllocInfo = urUSMGetMemAllocInfo; + pDdiTable->pfnHostAlloc = urUSMHostAlloc; // pDdiTable->pfnPoolCreate = nullptr; // pDdiTable->pfnPoolDestroy = nullptr; // pDdiTable->pfnPoolDestroy = nullptr; diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp new file mode 100644 index 0000000000000..309794477aabc --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp @@ -0,0 +1,457 @@ +//===--------- usm.cpp - OpenCL Adapter -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t size, void **ppMem) { + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + void *Ptr = nullptr; + ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; + uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; + + cl_mem_alloc_flags_intel Flags = 0; + cl_mem_properties_intel Properties[3]; + + if (pUSMDesc && pUSMDesc->pNext && + static_cast(pUSMDesc->pNext)->stype == + UR_STRUCTURE_TYPE_USM_HOST_DESC) { + const auto *HostDesc = + static_cast(pUSMDesc->pNext); + + if (HostDesc->flags & UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) { + Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL; + } + Properties[0] = CL_MEM_ALLOC_FLAGS_INTEL; + Properties[1] = Flags; + Properties[2] = 0; + } else { + Properties[0] = 0; + } + + // First we need to look up the function pointer + clHostMemAllocINTEL_fn FuncPtr = nullptr; + cl_context CLContext = cl_adapter::cast(hContext); + RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, + cl_ext::clHostMemAllocName, &FuncPtr); + + if (FuncPtr) { + Ptr = FuncPtr(CLContext, Properties, size, Alignment, + cl_adapter::cast(&RetVal)); + } + + *ppMem = Ptr; + + // ensure we aligned the allocation correctly + if (RetVal == UR_RESULT_SUCCESS && Alignment != 0) + assert(reinterpret_cast(*ppMem) % Alignment == 0 && + "allocation not aligned correctly"); + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool, + size_t size, void **ppMem) { + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + void *Ptr = nullptr; + ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; + uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; + + cl_mem_alloc_flags_intel Flags = 0; + cl_mem_properties_intel Properties[3]; + if (pUSMDesc && pUSMDesc->pNext && + static_cast(pUSMDesc->pNext)->stype == + UR_STRUCTURE_TYPE_USM_DEVICE_DESC) { + const auto *HostDesc = + static_cast(pUSMDesc->pNext); + + if (HostDesc->flags & UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) { + Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL; + } + if (HostDesc->flags & UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) { + Flags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL; + } + Properties[0] = CL_MEM_ALLOC_FLAGS_INTEL; + Properties[1] = Flags; + Properties[2] = 0; + } else { + Properties[0] = 0; + } + + // First we need to look up the function pointer + clDeviceMemAllocINTEL_fn FuncPtr = nullptr; + cl_context CLContext = cl_adapter::cast(hContext); + RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, + cl_ext::clDeviceMemAllocName, &FuncPtr); + + if (FuncPtr) { + Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), + cl_adapter::cast(Properties), size, + Alignment, cl_adapter::cast(&RetVal)); + } + + *ppMem = Ptr; + + // ensure we aligned the allocation correctly + if (RetVal == UR_RESULT_SUCCESS && Alignment != 0) + assert(reinterpret_cast(*ppMem) % Alignment == 0 && + "allocation not aligned correctly"); + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool, + size_t size, void **ppMem) { + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + void *Ptr = nullptr; + ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; + uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; + + cl_mem_alloc_flags_intel Flags = 0; + const auto *NextStruct = + (pUSMDesc ? static_cast(pUSMDesc->pNext) + : nullptr); + while (NextStruct) { + if (NextStruct->stype == UR_STRUCTURE_TYPE_USM_HOST_DESC) { + const auto *HostDesc = + reinterpret_cast(NextStruct); + if (HostDesc->flags & UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) { + Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL; + } + } else if (NextStruct->stype == UR_STRUCTURE_TYPE_USM_DEVICE_DESC) { + const auto *DevDesc = + reinterpret_cast(NextStruct); + if (DevDesc->flags & UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) { + Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL; + } + if (DevDesc->flags & UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) { + Flags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL; + } + } + NextStruct = static_cast(NextStruct->pNext); + } + + cl_mem_properties_intel Properties[3] = {CL_MEM_ALLOC_FLAGS_INTEL, Flags, 0}; + + // Passing a flags value of 0 doesn't work, so truncate the properties + if (Flags == 0) { + Properties[0] = 0; + } + + // First we need to look up the function pointer + clSharedMemAllocINTEL_fn FuncPtr = nullptr; + cl_context CLContext = cl_adapter::cast(hContext); + RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, + cl_ext::clSharedMemAllocName, &FuncPtr); + + if (FuncPtr) { + Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), + cl_adapter::cast(Properties), size, + Alignment, cl_adapter::cast(&RetVal)); + } + + *ppMem = Ptr; + + assert(Alignment == 0 || + (RetVal == UR_RESULT_SUCCESS && + reinterpret_cast(*ppMem) % Alignment == 0)); + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, + void *pMem) { + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + // Use a blocking free to avoid issues with indirect access from kernels that + // might be still running. + clMemBlockingFreeINTEL_fn FuncPtr = nullptr; + + cl_context CLContext = cl_adapter::cast(hContext); + ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; + RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, + cl_ext::clMemBlockingFreeName, &FuncPtr); + + if (FuncPtr) { + RetVal = map_cl_error_to_ur(FuncPtr(CLContext, pMem)); + } + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( + ur_queue_handle_t hQueue, void *ptr, size_t patternSize, + const void *pPattern, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + // Have to look up the context from the kernel + cl_context CLContext; + cl_int CLErr = clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, + sizeof(cl_context), &CLContext, nullptr); + if (CLErr != CL_SUCCESS) { + return map_cl_error_to_ur(CLErr); + } + + clEnqueueMemFillINTEL_fn FuncPtr = nullptr; + ur_result_t RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemFillINTELCache, + cl_ext::clEnqueueMemFillName, &FuncPtr); + + if (FuncPtr) { + RetVal = map_cl_error_to_ur( + FuncPtr(cl_adapter::cast(hQueue), ptr, pPattern, + patternSize, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + } + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( + ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pDst, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(pSrc, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + // Have to look up the context from the kernel + cl_context CLContext; + cl_int CLErr = clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, + sizeof(cl_context), &CLContext, nullptr); + if (CLErr != CL_SUCCESS) { + return map_cl_error_to_ur(CLErr); + } + + clEnqueueMemcpyINTEL_fn FuncPtr = nullptr; + ur_result_t RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, + cl_ext::clEnqueueMemcpyName, &FuncPtr); + + if (FuncPtr) { + RetVal = map_cl_error_to_ur( + FuncPtr(cl_adapter::cast(hQueue), blocking, pDst, + pSrc, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + } + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( + ur_queue_handle_t hQueue, const void *pMem, size_t size, + ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + (void)pMem; + (void)size; + + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + // flags is currently unused so fail if set + if (flags != 0) + return UR_RESULT_ERROR_INVALID_VALUE; + + return map_cl_error_to_ur(clEnqueueMarkerWithWaitList( + cl_adapter::cast(hQueue), numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + /* + // Use this once impls support it. + // Have to look up the context from the kernel + cl_context CLContext; + cl_int CLErr = + clGetCommandQueueInfo(cl_adapter::cast(hQueue), + CL_QUEUE_CONTEXT, sizeof(cl_context), + &CLContext, nullptr); + if (CLErr != CL_SUCCESS) { + return map_cl_error_to_ur(CLErr); + } + + clEnqueueMigrateMemINTEL_fn FuncPtr; + ur_result_t Err = cl_ext::getExtFuncFromContext( + CLContext, "clEnqueueMigrateMemINTEL", &FuncPtr); + + ur_result_t RetVal; + if (Err != UR_RESULT_SUCCESS) { + RetVal = Err; + } else { + RetVal = map_cl_error_to_ur( + FuncPtr(cl_adapter::cast(hQueue), pMem, size, flags, + numEventsInWaitList, + reinterpret_cast(phEventWaitList), + reinterpret_cast(phEvent))); + } + */ +} + +UR_APIEXPORT ur_result_t UR_APICALL +urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, + ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) { + (void)pMem; + (void)size; + (void)advice; + + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + return map_cl_error_to_ur(clEnqueueMarkerWithWaitList( + cl_adapter::cast(hQueue), 0, nullptr, + reinterpret_cast(phEvent))); + + /* + // Change to use this once drivers support it. + // Have to look up the context from the kernel + cl_context CLContext; + cl_int CLErr = + clGetCommandQueueInfo(cl_adapter::cast(hQueue), + CL_QUEUE_CONTEXT, + sizeof(cl_context), + &CLContext, nullptr); + if (CLErr != CL_SUCCESS) { + return map_cl_error_to_ur(CLErr); + } + + clEnqueueMemAdviseINTEL_fn FuncPtr; + ur_result_t Err = + cl_ext::getExtFuncFromContext( + CLContext, "clEnqueueMemAdviseINTEL", &FuncPtr); + + ur_result_t RetVal; + if (Err != UR_RESULT_SUCCESS) { + RetVal = Err; + } else { + RetVal = + map_cl_error_to_ur(FuncPtr(cl_adapter::cast(hQueue), pMem, + size, advice, 0, nullptr, reinterpret_cast(phEvent))); + } + */ +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( + ur_queue_handle_t hQueue, void *pMem, size_t pitch, size_t patternSize, + const void *pPattern, size_t width, size_t height, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = pMem; + std::ignore = pitch; + std::ignore = patternSize; + std::ignore = pPattern; + std::ignore = width; + std::ignore = height; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_INVALID_OPERATION; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( + ur_queue_handle_t hQueue, bool blocking, void *pDst, size_t dstPitch, + const void *pSrc, size_t srcPitch, size_t width, size_t height, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = blocking; + std::ignore = pDst; + std::ignore = dstPitch; + std::ignore = pSrc; + std::ignore = srcPitch; + std::ignore = width; + std::ignore = height; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_INVALID_OPERATION; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, + ur_usm_alloc_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + clGetMemAllocInfoINTEL_fn FuncPtr = nullptr; + cl_context CLContext = cl_adapter::cast(hContext); + ur_result_t RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clGetMemAllocInfoINTELCache, + cl_ext::clGetMemAllocInfoName, &FuncPtr); + + cl_mem_info_intel PropNameCL; + switch (propName) { + case UR_USM_ALLOC_INFO_TYPE: + PropNameCL = CL_MEM_ALLOC_TYPE_INTEL; + break; + case UR_USM_ALLOC_INFO_BASE_PTR: + PropNameCL = CL_MEM_ALLOC_BASE_PTR_INTEL; + break; + case UR_USM_ALLOC_INFO_SIZE: + PropNameCL = CL_MEM_ALLOC_SIZE_INTEL; + break; + case UR_USM_ALLOC_INFO_DEVICE: + PropNameCL = CL_MEM_ALLOC_DEVICE_INTEL; + break; + default: + return UR_RESULT_ERROR_INVALID_VALUE; + } + + if (FuncPtr) { + RetVal = map_cl_error_to_ur(FuncPtr(cl_adapter::cast(hContext), + pMem, PropNameCL, propSize, pPropValue, + pPropSizeRet)); + if (RetVal == UR_RESULT_SUCCESS && pPropValue && + propName == UR_USM_ALLOC_INFO_TYPE) { + auto *AllocTypeCL = + static_cast(pPropValue); + ur_usm_type_t AllocTypeUR; + switch (*AllocTypeCL) { + case CL_MEM_TYPE_HOST_INTEL: + AllocTypeUR = UR_USM_TYPE_HOST; + break; + case CL_MEM_TYPE_DEVICE_INTEL: + AllocTypeUR = UR_USM_TYPE_DEVICE; + break; + case CL_MEM_TYPE_SHARED_INTEL: + AllocTypeUR = UR_USM_TYPE_SHARED; + break; + case CL_MEM_TYPE_UNKNOWN_INTEL: + default: + AllocTypeUR = UR_USM_TYPE_UNKNOWN; + break; + } + auto *AllocTypeOut = static_cast(pPropValue); + *AllocTypeOut = AllocTypeUR; + } + } + + return RetVal; +} From def7a179c0da7c37e11db27c0a28c925e26cb1cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Mon, 22 May 2023 13:43:05 +0100 Subject: [PATCH 07/36] Update UR to a more recent version --- sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp | 1 + sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp | 4 +++- sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp index 1fd0f402a8271..8914d1a993f33 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp @@ -1015,6 +1015,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( ur_native_handle_t hNativeDevice, ur_platform_handle_t, + const ur_device_native_properties_t *, ur_device_handle_t *phDevice) { UR_ASSERT(hNativeDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp index 5bf16691c4051..870e386489a8d 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp @@ -105,7 +105,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( - ur_native_handle_t hNativePlatform, ur_platform_handle_t *phPlatform) { + ur_native_handle_t hNativePlatform, + const ur_platform_native_properties_t *, + ur_platform_handle_t *phPlatform) { UR_ASSERT(hNativePlatform, UR_RESULT_ERROR_INVALID_NULL_HANDLE); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp index 2e7c6f1229379..056d61d572672 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp @@ -199,12 +199,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( ur_native_handle_t hNativeSampler, ur_context_handle_t hContext, + const ur_sampler_native_properties_t *pProperties, ur_sampler_handle_t *phSampler) { UR_ASSERT(hNativeSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(phSampler, UR_RESULT_ERROR_INVALID_NULL_POINTER); std::ignore = hContext; + std::ignore = pProperties; *phSampler = reinterpret_cast( cl_adapter::cast(hNativeSampler)); return UR_RESULT_SUCCESS; From 7b9c541854e12465e709dc60ede6e0ba08d3ac21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Tue, 23 May 2023 16:44:46 +0100 Subject: [PATCH 08/36] Port Program API's from PI to UR --- sycl/plugins/opencl/CMakeLists.txt | 4 + sycl/plugins/unified_runtime/pi2ur.hpp | 1 + .../ur/adapters/opencl/common.cpp | 7 +- .../ur/adapters/opencl/common.hpp | 29 +- .../ur/adapters/opencl/context.cpp | 26 +- .../ur/adapters/opencl/context.hpp | 19 + .../ur/adapters/opencl/device.cpp | 65 ++-- .../ur/adapters/opencl/device.hpp | 2 + .../ur/adapters/opencl/kernel.cpp | 8 + .../ur/adapters/opencl/platform.cpp | 25 +- .../ur/adapters/opencl/platform.hpp | 14 + .../ur/adapters/opencl/program.cpp | 346 ++++++++++++++++++ .../adapters/opencl/ur_interface_loader.cpp | 30 +- 13 files changed, 507 insertions(+), 69 deletions(-) create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/platform.hpp create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp diff --git a/sycl/plugins/opencl/CMakeLists.txt b/sycl/plugins/opencl/CMakeLists.txt index eb527bfc9b5b7..7834ba1c269ba 100644 --- a/sycl/plugins/opencl/CMakeLists.txt +++ b/sycl/plugins/opencl/CMakeLists.txt @@ -22,12 +22,16 @@ add_sycl_plugin(opencl "../unified_runtime/ur/adapters/opencl/common.cpp" "../unified_runtime/ur/adapters/opencl/common.hpp" "../unified_runtime/ur/adapters/opencl/context.cpp" + "../unified_runtime/ur/adapters/opencl/context.hpp" "../unified_runtime/ur/adapters/opencl/device.cpp" "../unified_runtime/ur/adapters/opencl/device.hpp" + "../unified_runtime/ur/adapters/opencl/kernel.cpp" "../unified_runtime/ur/adapters/opencl/platform.cpp" + "../unified_runtime/ur/adapters/opencl/platform.hpp" "../unified_runtime/ur/adapters/opencl/sampler.cpp" "../unified_runtime/ur/adapters/opencl/memory.cpp" "../unified_runtime/ur/adapters/opencl/usm.cpp" + "../unified_runtime/ur/adapters/opencl/program.cpp" # --- "${sycl_inc_dir}/sycl/detail/pi.h" "pi_opencl.cpp" diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index be14e342c9f88..b7da250610c35 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -12,6 +12,7 @@ #include #include #include +#include // Map of UR error codes to PI error codes static pi_result ur2piResult(ur_result_t urResult) { diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp index 87ba7a5e3d42a..294b3c9897c8e 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp @@ -36,11 +36,16 @@ ur_result_t map_cl_error_to_ur(cl_int result) { case CL_OUT_OF_HOST_MEMORY: return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; case CL_INVALID_VALUE: + case CL_INVALID_BUILD_OPTIONS: return UR_RESULT_ERROR_INVALID_VALUE; case CL_INVALID_PLATFORM: return UR_RESULT_ERROR_INVALID_PLATFORM; + case CL_INVALID_OPERATION: + return UR_RESULT_ERROR_INVALID_OPERATION; + case CL_BUILD_PROGRAM_FAILURE: + return UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE; default: - return UR_RESULT_ERROR_UNKNOWN; + assert(false && "OpenCL error has no UR equivalent."); } } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp index 797f2b4a0287f..6339204e87d78 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp @@ -10,11 +10,28 @@ #include #include #include +#include #include +/** + * Call an OpenCL API and, if the result is not CL_SUCCESS, automatically return + * from the current function. + */ #define CL_RETURN_ON_FAILURE(clCall) \ - if (const cl_int cl_result = clCall != CL_SUCCESS) { \ - return map_cl_error_to_ur(cl_result); \ + if (const cl_int cl_result_macro = clCall; cl_result_macro != CL_SUCCESS) { \ + return map_cl_error_to_ur(cl_result_macro); \ + } + +/** + * Call an OpenCL API and, if the result is not CL_SUCCESS, automatically return + * from the current function and set the pointer `outPtr` to nullptr. + */ +#define CL_RETURN_ON_FAILURE_AND_SET_NULL(clCall, outPtr) \ + if (const cl_int cl_result_macro = clCall != CL_SUCCESS) { \ + if (outPtr != nullptr) { \ + *outPtr = nullptr; \ + } \ + return map_cl_error_to_ur(cl_result_macro); \ } namespace OCLV { @@ -130,10 +147,6 @@ template To cast(From value) { } } // namespace cl -ur_result_t map_cl_error_to_ur(cl_int result); - -ur_result_t urGetNativeHandle(void *urObj, ur_native_handle_t *nativeHandle); - namespace cl_ext { // Older versions of GCC don't like "const" here #if defined(__GNUC__) && (__GNUC__ < 7 || (__GNU__C == 7 && __GNUC_MINOR__ < 2)) @@ -292,3 +305,7 @@ static ur_result_t getExtFuncFromContext(cl_context context, return UR_RESULT_SUCCESS; } } // namespace cl_ext + +ur_result_t map_cl_error_to_ur(cl_int result); + +ur_result_t urGetNativeHandle(void *urObj, ur_native_handle_t *nativeHandle); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp index ea5f347d281e9..061b2d8a7d62d 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp @@ -1,4 +1,4 @@ -//===--------- context.hpp - OpenCL Adapter ---------------------------===// +//===--------- context.cpp - OpenCL Adapter ---------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,10 +6,32 @@ // //===-----------------------------------------------------------------===// -#include "common.hpp" +#include "context.hpp" #include +cl_uint cl_adapter::getDevicesFromContext( + ur_context_handle_t hContext, + std::unique_ptr> &devicesInCtx) { + + cl_uint deviceCount; + CL_RETURN_ON_FAILURE(clGetContextInfo(cl_adapter::cast(hContext), + CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), + &deviceCount, nullptr)); + + if (deviceCount < 1) { + return CL_INVALID_CONTEXT; + } + + devicesInCtx = std::make_unique>(deviceCount); + + CL_RETURN_ON_FAILURE(clGetContextInfo( + cl_adapter::cast(hContext), CL_CONTEXT_DEVICES, + deviceCount * sizeof(cl_device_id), (*devicesInCtx).data(), nullptr)); + + return CL_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( uint32_t DeviceCount, const ur_device_handle_t *phDevices, const ur_context_properties_t *, ur_context_handle_t *phContext) { diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp new file mode 100644 index 0000000000000..c6b8cf21b2a74 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp @@ -0,0 +1,19 @@ +//===--------- context.hpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +#include +#include + +namespace cl_adapter { +cl_uint +getDevicesFromContext(ur_context_handle_t hContext, + std::unique_ptr> &devicesInCtx); +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp index 8914d1a993f33..ef473d1334df5 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp @@ -12,51 +12,35 @@ #include #include -cl_int getDeviceVersion(cl_device_id dev, OCLV::OpenCLVersion &version) { - cl_int ret_err = CL_INVALID_VALUE; +cl_int cl_adapter::getDeviceVersion(cl_device_id dev, OCLV::OpenCLVersion &version) { size_t devVerSize = 0; - ret_err = clGetDeviceInfo(dev, CL_DEVICE_VERSION, 0, nullptr, &devVerSize); - - if (ret_err != CL_SUCCESS) { - return ret_err; - } + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(dev, CL_DEVICE_VERSION, 0, nullptr, &devVerSize)); std::string devVer(devVerSize, '\0'); - ret_err = clGetDeviceInfo(dev, CL_DEVICE_VERSION, devVerSize, devVer.data(), - nullptr); - - if (ret_err != CL_SUCCESS) { - return ret_err; - } + CL_RETURN_ON_FAILURE(clGetDeviceInfo(dev, CL_DEVICE_VERSION, devVerSize, + devVer.data(), nullptr)); version = OCLV::OpenCLVersion(devVer); if (!version.isValid()) { return CL_INVALID_DEVICE; } - return ret_err; + return CL_SUCCESS; } -cl_int checkDeviceExtensions(cl_device_id dev, - const std::vector &exts, - bool &supported) { - cl_int ret_err = CL_INVALID_VALUE; - +cl_int cl_adapter::checkDeviceExtensions(cl_device_id dev, + const std::vector &exts, + bool &supported) { size_t extSize = 0; - ret_err = clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &extSize); - - if (ret_err != CL_SUCCESS) { - return ret_err; - } + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &extSize)); std::string extStr(extSize, '\0'); - ret_err = clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, extSize, extStr.data(), - nullptr); - if (ret_err != CL_SUCCESS) { - return ret_err; - } + CL_RETURN_ON_FAILURE(clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, extSize, + extStr.data(), nullptr)); supported = true; for (const std::string &ext : exts) { @@ -65,7 +49,7 @@ cl_int checkDeviceExtensions(cl_device_id dev, } } - return ret_err; + return CL_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, @@ -451,7 +435,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: { OCLV::OpenCLVersion version; CL_RETURN_ON_FAILURE( - getDeviceVersion(cl_adapter::cast(hDevice), version)); + cl_adapter::getDeviceVersion(cl_adapter::cast(hDevice), version)); const std::string results = std::to_string(version.getMajor()) + "." + std::to_string(version.getMinor()); @@ -512,7 +496,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * and we have to emulate it on older OpenCL runtimes. */ OCLV::OpenCLVersion devVer; CL_RETURN_ON_FAILURE( - getDeviceVersion(cl_adapter::cast(hDevice), devVer)); + cl_adapter::getDeviceVersion(cl_adapter::cast(hDevice), devVer)); if (devVer >= OCLV::V2_1) { cl_uint cl_value; @@ -541,7 +525,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * UR type: ur_device_fp_capability_flags_t */ if (propName == UR_DEVICE_INFO_HALF_FP_CONFIG) { bool supported; - CL_RETURN_ON_FAILURE(checkDeviceExtensions( + CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( cl_adapter::cast(hDevice), {"cl_khr_fp16"}, supported)); if (!supported) { @@ -562,7 +546,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * appropriately */ OCLV::OpenCLVersion devVer; CL_RETURN_ON_FAILURE( - getDeviceVersion(cl_adapter::cast(hDevice), devVer)); + cl_adapter::getDeviceVersion(cl_adapter::cast(hDevice), devVer)); /* Minimum required capability to be returned. For OpenCL 1.2, this is all * that is required */ @@ -621,7 +605,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, OCLV::OpenCLVersion devVer; CL_RETURN_ON_FAILURE( - getDeviceVersion(cl_adapter::cast(hDevice), devVer)); + cl_adapter::getDeviceVersion(cl_adapter::cast(hDevice), devVer)); cl_device_atomic_capabilities cl_capabilities; if (devVer >= OCLV::V3_0) { @@ -674,7 +658,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, OCLV::OpenCLVersion devVer; CL_RETURN_ON_FAILURE( - getDeviceVersion(cl_adapter::cast(hDevice), devVer)); + cl_adapter::getDeviceVersion(cl_adapter::cast(hDevice), devVer)); cl_device_atomic_capabilities cl_capabilities; if (devVer >= OCLV::V3_0) { @@ -723,7 +707,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, OCLV::OpenCLVersion devVer; CL_RETURN_ON_FAILURE( - getDeviceVersion(cl_adapter::cast(hDevice), devVer)); + cl_adapter::getDeviceVersion(cl_adapter::cast(hDevice), devVer)); cl_device_atomic_capabilities cl_capabilities; if (devVer >= OCLV::V3_0) { @@ -775,7 +759,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_ATOMIC_64: { bool supported = false; - CL_RETURN_ON_FAILURE(checkDeviceExtensions( + CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( cl_adapter::cast(hDevice), {"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics"}, supported)); @@ -794,7 +778,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT: { bool supported = false; CL_RETURN_ON_FAILURE( - checkDeviceExtensions(cl_adapter::cast(hDevice), + cl_adapter::checkDeviceExtensions(cl_adapter::cast(hDevice), {"cl_intel_mem_channel_property"}, supported)); return ReturnValue(supported); @@ -1015,8 +999,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( ur_native_handle_t hNativeDevice, ur_platform_handle_t, - const ur_device_native_properties_t *, - ur_device_handle_t *phDevice) { + const ur_device_native_properties_t *, ur_device_handle_t *phDevice) { UR_ASSERT(hNativeDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp index 596a608546429..87571114fc79c 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp @@ -12,8 +12,10 @@ #include #include +namespace cl_adapter { cl_int getDeviceVersion(cl_device_id dev, OCLV::OpenCLVersion &version); cl_int checkDeviceExtensions(cl_device_id dev, const std::vector &exts, bool &supported); +} // namespace cl diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp new file mode 100644 index 0000000000000..28033d6241a01 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp @@ -0,0 +1,8 @@ +//===----------- kernel.cpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-------------------------------------------------------------------===// +#include "common.hpp" diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp index 870e386489a8d..d0d6c2a2e6e62 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp @@ -1,4 +1,4 @@ -//===--------- platform.hpp - OpenCL Adapter ---------------------------===// +//===--------- platform.cpp - OpenCL Adapter ---------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,10 +6,28 @@ // //===-----------------------------------------------------------------===// -#include "common.hpp" +#include "platform.hpp" #include +cl_int cl::getPlatformVersion(cl_platform_id plat, OCLV::OpenCLVersion &version) { + + size_t platVerSize = 0; + CL_RETURN_ON_FAILURE( + clGetPlatformInfo(plat, CL_PLATFORM_VERSION, 0, nullptr, &platVerSize)); + + std::string platVer(platVerSize, '\0'); + CL_RETURN_ON_FAILURE(clGetPlatformInfo(plat, CL_PLATFORM_VERSION, platVerSize, + platVer.data(), nullptr)); + + version = OCLV::OpenCLVersion(platVer); + if (!version.isValid()) { + return CL_INVALID_PLATFORM; + } + + return CL_SUCCESS; +} + cl_int map_ur_platform_info_to_cl(ur_platform_info_t urPropName) { cl_int cl_propName; @@ -105,8 +123,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( - ur_native_handle_t hNativePlatform, - const ur_platform_native_properties_t *, + ur_native_handle_t hNativePlatform, const ur_platform_native_properties_t *, ur_platform_handle_t *phPlatform) { UR_ASSERT(hNativePlatform, UR_RESULT_ERROR_INVALID_NULL_HANDLE); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.hpp new file mode 100644 index 0000000000000..d5f14e774c3af --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.hpp @@ -0,0 +1,14 @@ +//===--------- platform.hpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +namespace cl { +cl_int getPlatformVersion(cl_platform_id plat, OCLV::OpenCLVersion &version); +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp new file mode 100644 index 0000000000000..bf6ec337b97ac --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp @@ -0,0 +1,346 @@ +//===--------- platform.cpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-------------------------------------------------------------------===// + +#include "common.hpp" +#include "context.hpp" +#include "device.hpp" +#include "platform.hpp" + +namespace cl { +cl_uint getDevicesFromProgram( + ur_program_handle_t hProgram, + std::unique_ptr> &devicesInProgram) { + + cl_uint deviceCount; + CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), + CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), + &deviceCount, nullptr)); + + if (deviceCount < 1) { + return CL_INVALID_CONTEXT; + } + + devicesInProgram = std::make_unique>(deviceCount); + + CL_RETURN_ON_FAILURE(clGetProgramInfo( + cl_adapter::cast(hProgram), CL_PROGRAM_DEVICES, + deviceCount * sizeof(cl_device_id), (*devicesInProgram).data(), nullptr)); + + return CL_SUCCESS; +} +} // namespace cl + +UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( + ur_context_handle_t hContext, const void *pIL, size_t length, + const ur_program_properties_t *, ur_program_handle_t *phProgram) { + + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pIL, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(phProgram, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + std::unique_ptr> devicesInCtx; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl_adapter::getDevicesFromContext(hContext, devicesInCtx), phProgram); + + cl_platform_id curPlatform; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + clGetDeviceInfo((*devicesInCtx)[0], CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &curPlatform, nullptr), + phProgram); + + OCLV::OpenCLVersion platVer; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl::getPlatformVersion(curPlatform, platVer), phProgram); + + cl_int err = CL_SUCCESS; + if (platVer >= OCLV::V2_1) { + + /* Make sure all devices support CL 2.1 or newer as well. */ + for (cl_device_id dev : *devicesInCtx) { + OCLV::OpenCLVersion devVer; + + CL_RETURN_ON_FAILURE_AND_SET_NULL(cl_adapter::getDeviceVersion(dev, devVer), + phProgram); + + /* If the device does not support CL 2.1 or greater, we need to make sure + * it supports the cl_khr_il_program extension. + */ + if (devVer < OCLV::V2_1) { + bool supported = false; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl_adapter::checkDeviceExtensions(dev, {"cl_khr_il_program"}, supported), + phProgram); + + if (!supported) { + return UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE; + } + } + } + + *phProgram = cl_adapter::cast(clCreateProgramWithIL( + cl_adapter::cast(hContext), pIL, length, &err)); + CL_RETURN_ON_FAILURE(err); + } + + /* If none of the devices conform with CL 2.1 or newer make sure they all + * support the cl_khr_il_program extension. + */ + for (cl_device_id dev : *devicesInCtx) { + bool supported = false; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl_adapter::checkDeviceExtensions(dev, {"cl_khr_il_program"}, supported), + phProgram); + + if (!supported) { + return UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE; + } + } + + using apiFuncT = + cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *); + apiFuncT funcPtr = + reinterpret_cast(clGetExtensionFunctionAddressForPlatform( + curPlatform, "clCreateProgramWithILKHR")); + + assert(funcPtr != nullptr); + + *phProgram = cl_adapter::cast( + funcPtr(cl_adapter::cast(hContext), pIL, length, &err)); + CL_RETURN_ON_FAILURE(err); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( + ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, + const uint8_t *pBinary, const ur_program_properties_t *, + ur_program_handle_t *phProgram) { + + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pBinary, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(phProgram, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + cl_int binary_status; + cl_int cl_result; + *phProgram = cl_adapter::cast(clCreateProgramWithBinary( + cl_adapter::cast(hContext), cl_adapter::cast(1u), + cl_adapter::cast(&hDevice), &size, &pBinary, &binary_status, + &cl_result)); + CL_RETURN_ON_FAILURE(binary_status); + CL_RETURN_ON_FAILURE(cl_result); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramCompile(ur_context_handle_t hContext, ur_program_handle_t hProgram, + const char *pOptions) { + + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + std::unique_ptr> devicesInProgram; + CL_RETURN_ON_FAILURE(cl::getDevicesFromProgram(hProgram, devicesInProgram)); + + CL_RETURN_ON_FAILURE(clCompileProgram(cl_adapter::cast(hProgram), + devicesInProgram->size(), + devicesInProgram->data(), pOptions, 0, + nullptr, nullptr, nullptr, nullptr)); + + return UR_RESULT_SUCCESS; +} + +cl_int map_ur_program_info_to_cl(ur_program_info_t urPropName) { + + cl_int cl_propName; + switch (static_cast(urPropName)) { + case UR_PROGRAM_INFO_REFERENCE_COUNT: + cl_propName = CL_PROGRAM_REFERENCE_COUNT; + break; + case UR_PROGRAM_INFO_CONTEXT: + cl_propName = CL_PROGRAM_CONTEXT; + break; + case UR_PROGRAM_INFO_NUM_DEVICES: + cl_propName = CL_PROGRAM_NUM_DEVICES; + break; + case UR_PROGRAM_INFO_DEVICES: + cl_propName = CL_PROGRAM_DEVICES; + break; + case UR_PROGRAM_INFO_SOURCE: + cl_propName = CL_PROGRAM_SOURCE; + break; + case UR_PROGRAM_INFO_BINARY_SIZES: + cl_propName = CL_PROGRAM_BINARY_SIZES; + break; + case UR_PROGRAM_INFO_BINARIES: + cl_propName = CL_PROGRAM_BINARIES; + break; + case UR_PROGRAM_INFO_NUM_KERNELS: + cl_propName = CL_PROGRAM_NUM_KERNELS; + break; + case UR_PROGRAM_INFO_KERNEL_NAMES: + cl_propName = CL_PROGRAM_KERNEL_NAMES; + break; + default: + cl_propName = -1; + } + + return cl_propName; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, + size_t propSize, void *pPropValue, size_t *pPropSizeRet) { + + UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), + map_ur_program_info_to_cl(propName), + propSize, pPropValue, pPropSizeRet)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t hContext, + ur_program_handle_t hProgram, + const char *pOptions) { + + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + std::unique_ptr> devicesInProgram; + CL_RETURN_ON_FAILURE(cl::getDevicesFromProgram(hProgram, devicesInProgram)); + + CL_RETURN_ON_FAILURE( + clBuildProgram(cl_adapter::cast(hProgram), devicesInProgram->size(), + devicesInProgram->data(), pOptions, nullptr, nullptr)); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramLink(ur_context_handle_t hContext, uint32_t count, + const ur_program_handle_t *phPrograms, const char *pOptions, + ur_program_handle_t *phProgram) { + + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(phPrograms, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(phProgram, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + cl_int cl_result; + *phProgram = cl_adapter::cast(clLinkProgram( + cl_adapter::cast(hContext), 0, nullptr, pOptions, + cl_adapter::cast(count), cl_adapter::cast(phPrograms), + nullptr, nullptr, &cl_result)); + CL_RETURN_ON_FAILURE(cl_result); + + return UR_RESULT_SUCCESS; +} + +cl_int map_ur_program_build_info_to_cl(ur_program_build_info_t urPropName) { + + cl_int cl_propName; + switch (static_cast(urPropName)) { + case UR_PROGRAM_BUILD_INFO_STATUS: + cl_propName = CL_PROGRAM_BUILD_STATUS; + break; + case UR_PROGRAM_BUILD_INFO_OPTIONS: + cl_propName = CL_PROGRAM_BUILD_OPTIONS; + break; + case UR_PROGRAM_BUILD_INFO_LOG: + cl_propName = CL_PROGRAM_BUILD_LOG; + break; + case UR_PROGRAM_BUILD_INFO_BINARY_TYPE: + cl_propName = CL_PROGRAM_BINARY_TYPE; + break; + default: + cl_propName = -1; + } + + return cl_propName; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, + ur_program_build_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + + UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + CL_RETURN_ON_FAILURE(clGetProgramBuildInfo( + cl_adapter::cast(hProgram), cl_adapter::cast(hDevice), + map_ur_program_build_info_to_cl(propName), propSize, pPropValue, + pPropSizeRet)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramRetain(ur_program_handle_t hProgram) { + UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + CL_RETURN_ON_FAILURE(clRetainProgram(cl_adapter::cast(hProgram))); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramRelease(ur_program_handle_t hProgram) { + UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + CL_RETURN_ON_FAILURE(clReleaseProgram(cl_adapter::cast(hProgram))); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( + ur_program_handle_t hProgram, ur_native_handle_t *phNativeProgram) { + + UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(phNativeProgram, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + *phNativeProgram = reinterpret_cast(hProgram); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( + ur_native_handle_t hNativeProgram, ur_context_handle_t, + const ur_program_native_properties_t *, ur_program_handle_t *phProgram) { + UR_ASSERT(hNativeProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + *phProgram = reinterpret_cast(hNativeProgram); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( + ur_program_handle_t hProgram, uint32_t count, + const ur_specialization_constant_info_t *pSpecConstants) { + + cl_program clProg = cl_adapter::cast(hProgram); + cl_context Ctx = nullptr; + size_t RetSize = 0; + + CL_RETURN_ON_FAILURE(clGetProgramInfo(clProg, CL_PROGRAM_CONTEXT, sizeof(Ctx), + &Ctx, &RetSize)); + + cl_ext::clSetProgramSpecializationConstant_fn F = nullptr; + const ur_result_t ur_result = cl_ext::getExtFuncFromContext( + Ctx, cl_ext::ExtFuncPtrCache->clSetProgramSpecializationConstantCache, + cl_ext::clSetProgramSpecializationConstantName, &F); + + if (ur_result != UR_RESULT_SUCCESS) { + return ur_result; + } + + for (uint32_t i = 0; i < count; ++i) { + CL_RETURN_ON_FAILURE(F(clProg, pSpecConstants[i].id, pSpecConstants[i].size, + pSpecConstants[i].pValue)); + } + + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index 67293bfb2690f..2e6578e135a2b 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -83,20 +83,20 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( if (UR_RESULT_SUCCESS != result) { return result; } - // pDdiTable->pfnBuild = urProgramBuild; - // pDdiTable->pfnCompile = urProgramCompile; - // pDdiTable->pfnCreateWithBinary = urProgramCreateWithBinary; - // pDdiTable->pfnCreateWithIL = urProgramCreateWithIL; - // pDdiTable->pfnCreateWithNativeHandle = urProgramCreateWithNativeHandle; - // pDdiTable->pfnGetBuildInfo = urProgramGetBuildInfo; - // pDdiTable->pfnGetFunctionPointer = nullptr; - // pDdiTable->pfnGetInfo = urProgramGetInfo; - // pDdiTable->pfnGetNativeHandle = urProgramGetNativeHandle; - // pDdiTable->pfnLink = urProgramLink; - // pDdiTable->pfnRelease = urProgramRelease; - // pDdiTable->pfnRetain = urProgramRetain; - // pDdiTable->pfnSetSpecializationConstants = - // urProgramSetSpecializationConstants; + pDdiTable->pfnBuild = urProgramBuild; + pDdiTable->pfnCompile = urProgramCompile; + pDdiTable->pfnCreateWithBinary = urProgramCreateWithBinary; + pDdiTable->pfnCreateWithIL = urProgramCreateWithIL; + pDdiTable->pfnCreateWithNativeHandle = urProgramCreateWithNativeHandle; + pDdiTable->pfnGetBuildInfo = urProgramGetBuildInfo; + pDdiTable->pfnGetFunctionPointer = nullptr; + pDdiTable->pfnGetInfo = urProgramGetInfo; + pDdiTable->pfnGetNativeHandle = urProgramGetNativeHandle; + pDdiTable->pfnLink = urProgramLink; + pDdiTable->pfnRelease = urProgramRelease; + pDdiTable->pfnRetain = urProgramRetain; + pDdiTable->pfnSetSpecializationConstants = + urProgramSetSpecializationConstants; return UR_RESULT_SUCCESS; } @@ -131,7 +131,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( return result; } pDdiTable->pfnCreate = urSamplerCreate; - pDdiTable->pfnCreateWithNativeHandle = urSamplerCreateWithNativeHandle; + // pDdiTable->pfnCreateWithNativeHandle = urSamplerCreateWithNativeHandle; pDdiTable->pfnGetInfo = urSamplerGetInfo; pDdiTable->pfnGetNativeHandle = urSamplerGetNativeHandle; pDdiTable->pfnRelease = urSamplerRelease; From 7cfd02ba92b1388ec14d14900c8f03b8c95f60fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Wed, 24 May 2023 17:21:11 +0100 Subject: [PATCH 09/36] Port Kernel API's from PI to UR --- .../ur/adapters/opencl/common.cpp | 2 +- .../ur/adapters/opencl/common.hpp | 19 +- .../ur/adapters/opencl/context.cpp | 8 +- .../ur/adapters/opencl/context.hpp | 2 +- .../ur/adapters/opencl/device.cpp | 48 +-- .../ur/adapters/opencl/device.hpp | 10 +- .../ur/adapters/opencl/kernel.cpp | 298 ++++++++++++++++++ .../ur/adapters/opencl/platform.cpp | 23 +- .../ur/adapters/opencl/platform.hpp | 5 +- .../ur/adapters/opencl/program.cpp | 60 ++-- .../adapters/opencl/ur_interface_loader.cpp | 30 +- 11 files changed, 410 insertions(+), 95 deletions(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp index 294b3c9897c8e..6e0c0a776e5ec 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp @@ -20,7 +20,7 @@ thread_local char ErrorMessage[cl_adapter::MaxMessageSize]; strcpy(cl_adapter::ErrorMessage, message); ErrorMessageCode = error_code; } -} // namespace cl +} // namespace cl_adapter // Returns plugin specific error and warning messages; common implementation // that can be shared between adapters diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp index 6339204e87d78..bf101ac0ac626 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp @@ -14,17 +14,28 @@ #include /** - * Call an OpenCL API and, if the result is not CL_SUCCESS, automatically return - * from the current function. + * Call an OpenCL API and, if the result is not CL_SUCCESS, automatically map + * the OpenCL error to UR and return from the current function. */ #define CL_RETURN_ON_FAILURE(clCall) \ if (const cl_int cl_result_macro = clCall; cl_result_macro != CL_SUCCESS) { \ return map_cl_error_to_ur(cl_result_macro); \ } +/** + * Call an UR API and, if the result is not UR_RESULT_SUCCESS, automatically + * return from the current function. + */ +#define UR_RETURN_ON_FAILURE(urCall) \ + if (const ur_result_t ur_result_macro = urCall; \ + ur_result_macro != UR_RESULT_SUCCESS) { \ + return ur_result_macro; \ + } + /** * Call an OpenCL API and, if the result is not CL_SUCCESS, automatically return - * from the current function and set the pointer `outPtr` to nullptr. + * from the current function and set the pointer `outPtr` to nullptr. The OpenCL + * error is mapped to UR */ #define CL_RETURN_ON_FAILURE_AND_SET_NULL(clCall, outPtr) \ if (const cl_int cl_result_macro = clCall != CL_SUCCESS) { \ @@ -145,7 +156,7 @@ template To cast(From value) { return static_cast(value); } } -} // namespace cl +} // namespace cl_adapter namespace cl_ext { // Older versions of GCC don't like "const" here diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp index 061b2d8a7d62d..577cef3d56a9b 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp @@ -10,7 +10,7 @@ #include -cl_uint cl_adapter::getDevicesFromContext( +ur_result_t cl_adapter::getDevicesFromContext( ur_context_handle_t hContext, std::unique_ptr> &devicesInCtx) { @@ -20,7 +20,7 @@ cl_uint cl_adapter::getDevicesFromContext( &deviceCount, nullptr)); if (deviceCount < 1) { - return CL_INVALID_CONTEXT; + return UR_RESULT_ERROR_INVALID_CONTEXT; } devicesInCtx = std::make_unique>(deviceCount); @@ -29,7 +29,7 @@ cl_uint cl_adapter::getDevicesFromContext( cl_adapter::cast(hContext), CL_CONTEXT_DEVICES, deviceCount * sizeof(cl_device_id), (*devicesInCtx).data(), nullptr)); - return CL_SUCCESS; + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( @@ -48,7 +48,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( return map_cl_error_to_ur(ret); } -cl_int map_ur_context_info_to_cl(ur_context_info_t urPropName) { +static cl_int map_ur_context_info_to_cl(ur_context_info_t urPropName) { cl_int cl_propName; switch (urPropName) { diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp index c6b8cf21b2a74..1ce55c81e748b 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp @@ -13,7 +13,7 @@ #include namespace cl_adapter { -cl_uint +ur_result_t getDevicesFromContext(ur_context_handle_t hContext, std::unique_ptr> &devicesInCtx); } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp index ef473d1334df5..61c53b21149b8 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp @@ -12,7 +12,8 @@ #include #include -cl_int cl_adapter::getDeviceVersion(cl_device_id dev, OCLV::OpenCLVersion &version) { +ur_result_t cl_adapter::getDeviceVersion(cl_device_id dev, + OCLV::OpenCLVersion &version) { size_t devVerSize = 0; CL_RETURN_ON_FAILURE( @@ -24,15 +25,14 @@ cl_int cl_adapter::getDeviceVersion(cl_device_id dev, OCLV::OpenCLVersion &versi version = OCLV::OpenCLVersion(devVer); if (!version.isValid()) { - return CL_INVALID_DEVICE; + return UR_RESULT_ERROR_INVALID_DEVICE; } - return CL_SUCCESS; + return UR_RESULT_SUCCESS; } -cl_int cl_adapter::checkDeviceExtensions(cl_device_id dev, - const std::vector &exts, - bool &supported) { +ur_result_t cl_adapter::checkDeviceExtensions( + cl_device_id dev, const std::vector &exts, bool &supported) { size_t extSize = 0; CL_RETURN_ON_FAILURE( clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &extSize)); @@ -49,7 +49,7 @@ cl_int cl_adapter::checkDeviceExtensions(cl_device_id dev, } } - return CL_SUCCESS; + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, @@ -97,7 +97,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, return map_cl_error_to_ur(result); } -ur_device_fp_capability_flags_t +static ur_device_fp_capability_flags_t map_ur_cl_device_fp_config_to_ur(cl_device_fp_config cl_value) { ur_device_fp_capability_flags_t ur_value = 0; @@ -129,7 +129,7 @@ map_ur_cl_device_fp_config_to_ur(cl_device_fp_config cl_value) { return ur_value; } -cl_int map_ur_device_info_to_cl(ur_device_info_t urPropName) { +static cl_int map_ur_device_info_to_cl(ur_device_info_t urPropName) { cl_int cl_propName; switch (static_cast(urPropName)) { @@ -434,8 +434,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: { OCLV::OpenCLVersion version; - CL_RETURN_ON_FAILURE( - cl_adapter::getDeviceVersion(cl_adapter::cast(hDevice), version)); + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), version)); const std::string results = std::to_string(version.getMajor()) + "." + std::to_string(version.getMinor()); @@ -495,8 +495,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* Corresponding OpenCL query is only available starting with OpenCL 2.1 * and we have to emulate it on older OpenCL runtimes. */ OCLV::OpenCLVersion devVer; - CL_RETURN_ON_FAILURE( - cl_adapter::getDeviceVersion(cl_adapter::cast(hDevice), devVer)); + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), devVer)); if (devVer >= OCLV::V2_1) { cl_uint cl_value; @@ -545,8 +545,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* This query is missing before OpenCL 3.0. Check version and handle * appropriately */ OCLV::OpenCLVersion devVer; - CL_RETURN_ON_FAILURE( - cl_adapter::getDeviceVersion(cl_adapter::cast(hDevice), devVer)); + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), devVer)); /* Minimum required capability to be returned. For OpenCL 1.2, this is all * that is required */ @@ -604,8 +604,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; OCLV::OpenCLVersion devVer; - CL_RETURN_ON_FAILURE( - cl_adapter::getDeviceVersion(cl_adapter::cast(hDevice), devVer)); + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), devVer)); cl_device_atomic_capabilities cl_capabilities; if (devVer >= OCLV::V3_0) { @@ -657,8 +657,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL; OCLV::OpenCLVersion devVer; - CL_RETURN_ON_FAILURE( - cl_adapter::getDeviceVersion(cl_adapter::cast(hDevice), devVer)); + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), devVer)); cl_device_atomic_capabilities cl_capabilities; if (devVer >= OCLV::V3_0) { @@ -706,8 +706,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; OCLV::OpenCLVersion devVer; - CL_RETURN_ON_FAILURE( - cl_adapter::getDeviceVersion(cl_adapter::cast(hDevice), devVer)); + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), devVer)); cl_device_atomic_capabilities cl_capabilities; if (devVer >= OCLV::V3_0) { @@ -777,9 +777,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT: { bool supported = false; - CL_RETURN_ON_FAILURE( - cl_adapter::checkDeviceExtensions(cl_adapter::cast(hDevice), - {"cl_intel_mem_channel_property"}, supported)); + CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), + {"cl_intel_mem_channel_property"}, supported)); return ReturnValue(supported); } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp index 87571114fc79c..14890f8ce8090 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp @@ -13,9 +13,9 @@ #include namespace cl_adapter { -cl_int getDeviceVersion(cl_device_id dev, OCLV::OpenCLVersion &version); +ur_result_t getDeviceVersion(cl_device_id dev, OCLV::OpenCLVersion &version); -cl_int checkDeviceExtensions(cl_device_id dev, - const std::vector &exts, - bool &supported); -} // namespace cl +ur_result_t checkDeviceExtensions(cl_device_id dev, + const std::vector &exts, + bool &supported); +} // namespace cl_adapter diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp index 28033d6241a01..30403f12a1955 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp @@ -6,3 +6,301 @@ // //===-------------------------------------------------------------------===// #include "common.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL +urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, + ur_kernel_handle_t *phKernel) { + + cl_int cl_result; + *phKernel = cl_adapter::cast(clCreateKernel( + cl_adapter::cast(hProgram), pKernelName, &cl_result)); + CL_RETURN_ON_FAILURE(cl_result); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urKernelSetArgValue(ur_kernel_handle_t hKernel, uint32_t argIndex, + size_t argSize, const void *pArgValue) { + + CL_RETURN_ON_FAILURE(clSetKernelArg(cl_adapter::cast(hKernel), + cl_adapter::cast(argIndex), + argSize, pArgValue)); + + return UR_RESULT_SUCCESS; +} + +static cl_int map_ur_kernel_info_to_cl(ur_kernel_info_t urPropName) { + + cl_int cl_propName; + switch (static_cast(urPropName)) { + case UR_KERNEL_INFO_FUNCTION_NAME: + cl_propName = CL_KERNEL_FUNCTION_NAME; + break; + case UR_KERNEL_INFO_NUM_ARGS: + cl_propName = CL_KERNEL_NUM_ARGS; + break; + case UR_KERNEL_INFO_REFERENCE_COUNT: + cl_propName = CL_KERNEL_REFERENCE_COUNT; + break; + case UR_KERNEL_INFO_CONTEXT: + cl_propName = CL_KERNEL_CONTEXT; + break; + case UR_KERNEL_INFO_PROGRAM: + cl_propName = CL_KERNEL_PROGRAM; + break; + case UR_KERNEL_INFO_ATTRIBUTES: + cl_propName = CL_KERNEL_ATTRIBUTES; + break; + case UR_KERNEL_INFO_NUM_REGS: + cl_propName = CL_KERNEL_NUM_ARGS; + break; + default: + cl_propName = -1; + } + + return cl_propName; +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, + ur_kernel_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + + UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), + map_ur_kernel_info_to_cl(propName), + propSize, pPropValue, pPropSizeRet)); + + return UR_RESULT_SUCCESS; +} + +static cl_int +map_ur_kernel_group_info_to_cl(ur_kernel_group_info_t urPropName) { + + cl_int cl_propName; + switch (static_cast(urPropName)) { + case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: + cl_propName = CL_KERNEL_GLOBAL_WORK_SIZE; + break; + case UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: + cl_propName = CL_KERNEL_WORK_GROUP_SIZE; + break; + case UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: + cl_propName = CL_KERNEL_COMPILE_WORK_GROUP_SIZE; + break; + case UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: + cl_propName = CL_KERNEL_LOCAL_MEM_SIZE; + break; + case UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: + cl_propName = CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE; + break; + case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: + cl_propName = CL_KERNEL_PRIVATE_MEM_SIZE; + break; + default: + cl_propName = -1; + } + + return cl_propName; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, + ur_kernel_group_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + + UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + CL_RETURN_ON_FAILURE( + clGetKernelWorkGroupInfo(cl_adapter::cast(hKernel), + cl_adapter::cast(hDevice), + map_ur_kernel_group_info_to_cl(propName), + propSize, pPropValue, pPropSizeRet)); + + return UR_RESULT_SUCCESS; +} + +static cl_int +map_ur_kernel_sub_group_info_to_cl(ur_kernel_sub_group_info_t urPropName) { + + cl_int cl_propName; + switch (static_cast(urPropName)) { + case UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE: + cl_propName = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE; + break; + case UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS: + cl_propName = CL_KERNEL_MAX_NUM_SUB_GROUPS; + break; + case UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS: + cl_propName = CL_KERNEL_COMPILE_NUM_SUB_GROUPS; + break; + case UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL: + cl_propName = CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL; + break; + default: + cl_propName = -1; + } + + return cl_propName; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, + ur_kernel_sub_group_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + + UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + CL_RETURN_ON_FAILURE( + clGetKernelSubGroupInfo(cl_adapter::cast(hKernel), + cl_adapter::cast(hDevice), + map_ur_kernel_sub_group_info_to_cl(propName), 0, + nullptr, propSize, pPropValue, pPropSizeRet)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(ur_kernel_handle_t hKernel) { + UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + CL_RETURN_ON_FAILURE(clRetainKernel(cl_adapter::cast(hKernel))); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urKernelRelease(ur_kernel_handle_t hKernel) { + UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + CL_RETURN_ON_FAILURE(clReleaseKernel(cl_adapter::cast(hKernel))); + return UR_RESULT_SUCCESS; +} + +/** + * Enables indirect access of pointers in kernels. Necessary to avoid telling CL + * about every pointer that might be used. + */ +static ur_result_t USMSetIndirectAccess(ur_kernel_handle_t hKernel) { + + cl_bool TrueVal = CL_TRUE; + clHostMemAllocINTEL_fn HFunc = nullptr; + clSharedMemAllocINTEL_fn SFunc = nullptr; + clDeviceMemAllocINTEL_fn DFunc = nullptr; + cl_context CLContext; + + /* We test that each alloc type is supported before we actually try to set + * KernelExecInfo. */ + CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), + CL_KERNEL_CONTEXT, sizeof(cl_context), + &CLContext, nullptr)); + + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, + cl_ext::clHostMemAllocName, &HFunc)); + + if (HFunc) { + CL_RETURN_ON_FAILURE( + clSetKernelExecInfo(cl_adapter::cast(hKernel), + CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); + } + + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, + cl_ext::clDeviceMemAllocName, &DFunc)); + + if (DFunc) { + CL_RETURN_ON_FAILURE( + clSetKernelExecInfo(cl_adapter::cast(hKernel), + CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); + } + + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, + cl_ext::clSharedMemAllocName, &SFunc)); + + if (SFunc) { + CL_RETURN_ON_FAILURE( + clSetKernelExecInfo(cl_adapter::cast(hKernel), + CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); + } + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urKernelSetExecInfo(ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName, + size_t propSize, const void *pPropValue) { + + switch (propName) { + case UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS: { + if (*(static_cast(pPropValue)) == true) { + CL_RETURN_ON_FAILURE(USMSetIndirectAccess(hKernel)); + } + return UR_RESULT_SUCCESS; + } + case UR_KERNEL_EXEC_INFO_CACHE_CONFIG: { + /* Setting the cache config is unsupported in OpenCL */ + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + case UR_KERNEL_EXEC_INFO_USM_PTRS: { + CL_RETURN_ON_FAILURE(clSetKernelExecInfo( + cl_adapter::cast(hKernel), propName, propSize, pPropValue)); + return UR_RESULT_SUCCESS; + } + default: { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + } +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( + ur_kernel_handle_t hKernel, uint32_t argIndex, const void *pArgValue) { + + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), + CL_KERNEL_CONTEXT, sizeof(cl_context), + &CLContext, nullptr)); + + clSetKernelArgMemPointerINTEL_fn FuncPtr = nullptr; + UR_RETURN_ON_FAILURE( + cl_ext::getExtFuncFromContext( + CLContext, + cl_ext::ExtFuncPtrCache->clSetKernelArgMemPointerINTELCache, + cl_ext::clSetKernelArgMemPointerName, &FuncPtr)); + + if (FuncPtr) { + /* OpenCL passes pointers by value not by reference. This means we need to + * deref the arg to get the pointer value */ + auto PtrToPtr = reinterpret_cast(pArgValue); + auto DerefPtr = reinterpret_cast(*PtrToPtr); + CL_RETURN_ON_FAILURE(FuncPtr(cl_adapter::cast(hKernel), + cl_adapter::cast(argIndex), + DerefPtr)); + } + + return UR_RESULT_SUCCESS; +} +UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( + ur_kernel_handle_t hKernel, ur_native_handle_t *phNativeKernel) { + + UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(phNativeKernel, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + *phNativeKernel = reinterpret_cast(hKernel); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( + ur_native_handle_t hNativeKernel, ur_context_handle_t, ur_program_handle_t, + const ur_kernel_native_properties_t *, ur_kernel_handle_t *phKernel) { + UR_ASSERT(hNativeKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + *phKernel = reinterpret_cast(hNativeKernel); + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp index d0d6c2a2e6e62..499f9b089bc75 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp @@ -10,7 +10,8 @@ #include -cl_int cl::getPlatformVersion(cl_platform_id plat, OCLV::OpenCLVersion &version) { +ur_result_t cl_adapter::getPlatformVersion(cl_platform_id plat, + OCLV::OpenCLVersion &version) { size_t platVerSize = 0; CL_RETURN_ON_FAILURE( @@ -22,13 +23,13 @@ cl_int cl::getPlatformVersion(cl_platform_id plat, OCLV::OpenCLVersion &version) version = OCLV::OpenCLVersion(platVer); if (!version.isValid()) { - return CL_INVALID_PLATFORM; + return UR_RESULT_ERROR_INVALID_PLATFORM; } - return CL_SUCCESS; + return UR_RESULT_SUCCESS; } -cl_int map_ur_platform_info_to_cl(ur_platform_info_t urPropName) { +static cl_int map_ur_platform_info_to_cl(ur_platform_info_t urPropName) { cl_int cl_propName; switch (urPropName) { @@ -137,13 +138,13 @@ UR_DLLEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t) { return UR_RESULT_SUCCESS; } -// This API is called by Sycl RT to notify the end of the plugin lifetime. -// Windows: dynamically loaded plugins might have been unloaded already -// when this is called. Sycl RT holds onto the PI plugin so it can be -// called safely. But this is not transitive. If the PI plugin in turn -// dynamically loaded a different DLL, that may have been unloaded. -// TODO: add a global variable lifetime management code here (see -// pi_level_zero.cpp for reference). +/* This API is called by Sycl RT to notify the end of the adapter lifetime. + * Windows: dynamically loaded plugins might have been unloaded already when + * this is called. Sycl RT holds onto the UR adapter so it can be called safely. + * But this is not transitive. If the UR adapter dynamically loaded a + * different DLL, that may have been unloaded already. + * TODO: add a global variable lifetime management code here (see + * pi_level_zero.cpp for reference). */ UR_DLLEXPORT ur_result_t UR_APICALL urTearDown(void *pParams) { UR_ASSERT(pParams, UR_RESULT_ERROR_INVALID_NULL_POINTER); if (cl_ext::ExtFuncPtrCache) { diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.hpp index d5f14e774c3af..6f69a04030da1 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.hpp @@ -9,6 +9,7 @@ #include "common.hpp" -namespace cl { -cl_int getPlatformVersion(cl_platform_id plat, OCLV::OpenCLVersion &version); +namespace cl_adapter { +ur_result_t getPlatformVersion(cl_platform_id plat, + OCLV::OpenCLVersion &version); } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp index bf6ec337b97ac..fdc7bb6a55a77 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp @@ -11,8 +11,7 @@ #include "device.hpp" #include "platform.hpp" -namespace cl { -cl_uint getDevicesFromProgram( +static ur_result_t getDevicesFromProgram( ur_program_handle_t hProgram, std::unique_ptr> &devicesInProgram) { @@ -22,7 +21,7 @@ cl_uint getDevicesFromProgram( &deviceCount, nullptr)); if (deviceCount < 1) { - return CL_INVALID_CONTEXT; + return UR_RESULT_ERROR_INVALID_CONTEXT; } devicesInProgram = std::make_unique>(deviceCount); @@ -31,9 +30,8 @@ cl_uint getDevicesFromProgram( cl_adapter::cast(hProgram), CL_PROGRAM_DEVICES, deviceCount * sizeof(cl_device_id), (*devicesInProgram).data(), nullptr)); - return CL_SUCCESS; + return UR_RESULT_SUCCESS; } -} // namespace cl UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( ur_context_handle_t hContext, const void *pIL, size_t length, @@ -55,7 +53,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( OCLV::OpenCLVersion platVer; CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl::getPlatformVersion(curPlatform, platVer), phProgram); + cl_adapter::getPlatformVersion(curPlatform, platVer), phProgram); cl_int err = CL_SUCCESS; if (platVer >= OCLV::V2_1) { @@ -64,8 +62,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( for (cl_device_id dev : *devicesInCtx) { OCLV::OpenCLVersion devVer; - CL_RETURN_ON_FAILURE_AND_SET_NULL(cl_adapter::getDeviceVersion(dev, devVer), - phProgram); + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl_adapter::getDeviceVersion(dev, devVer), phProgram); /* If the device does not support CL 2.1 or greater, we need to make sure * it supports the cl_khr_il_program extension. @@ -73,7 +71,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( if (devVer < OCLV::V2_1) { bool supported = false; CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::checkDeviceExtensions(dev, {"cl_khr_il_program"}, supported), + cl_adapter::checkDeviceExtensions(dev, {"cl_khr_il_program"}, + supported), phProgram); if (!supported) { @@ -93,7 +92,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( for (cl_device_id dev : *devicesInCtx) { bool supported = false; CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::checkDeviceExtensions(dev, {"cl_khr_il_program"}, supported), + cl_adapter::checkDeviceExtensions(dev, {"cl_khr_il_program"}, + supported), phProgram); if (!supported) { @@ -130,8 +130,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( cl_int cl_result; *phProgram = cl_adapter::cast(clCreateProgramWithBinary( cl_adapter::cast(hContext), cl_adapter::cast(1u), - cl_adapter::cast(&hDevice), &size, &pBinary, &binary_status, - &cl_result)); + cl_adapter::cast(&hDevice), &size, &pBinary, + &binary_status, &cl_result)); CL_RETURN_ON_FAILURE(binary_status); CL_RETURN_ON_FAILURE(cl_result); @@ -146,7 +146,7 @@ urProgramCompile(ur_context_handle_t hContext, ur_program_handle_t hProgram, UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); std::unique_ptr> devicesInProgram; - CL_RETURN_ON_FAILURE(cl::getDevicesFromProgram(hProgram, devicesInProgram)); + CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, devicesInProgram)); CL_RETURN_ON_FAILURE(clCompileProgram(cl_adapter::cast(hProgram), devicesInProgram->size(), @@ -156,7 +156,7 @@ urProgramCompile(ur_context_handle_t hContext, ur_program_handle_t hProgram, return UR_RESULT_SUCCESS; } -cl_int map_ur_program_info_to_cl(ur_program_info_t urPropName) { +static cl_int map_ur_program_info_to_cl(ur_program_info_t urPropName) { cl_int cl_propName; switch (static_cast(urPropName)) { @@ -215,11 +215,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t hContext, UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); std::unique_ptr> devicesInProgram; - CL_RETURN_ON_FAILURE(cl::getDevicesFromProgram(hProgram, devicesInProgram)); + CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, devicesInProgram)); - CL_RETURN_ON_FAILURE( - clBuildProgram(cl_adapter::cast(hProgram), devicesInProgram->size(), - devicesInProgram->data(), pOptions, nullptr, nullptr)); + CL_RETURN_ON_FAILURE(clBuildProgram( + cl_adapter::cast(hProgram), devicesInProgram->size(), + devicesInProgram->data(), pOptions, nullptr, nullptr)); return UR_RESULT_SUCCESS; } @@ -233,16 +233,18 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count, UR_ASSERT(phProgram, UR_RESULT_ERROR_INVALID_NULL_POINTER); cl_int cl_result; - *phProgram = cl_adapter::cast(clLinkProgram( - cl_adapter::cast(hContext), 0, nullptr, pOptions, - cl_adapter::cast(count), cl_adapter::cast(phPrograms), - nullptr, nullptr, &cl_result)); + *phProgram = cl_adapter::cast( + clLinkProgram(cl_adapter::cast(hContext), 0, nullptr, + pOptions, cl_adapter::cast(count), + cl_adapter::cast(phPrograms), nullptr, + nullptr, &cl_result)); CL_RETURN_ON_FAILURE(cl_result); return UR_RESULT_SUCCESS; } -cl_int map_ur_program_build_info_to_cl(ur_program_build_info_t urPropName) { +static cl_int +map_ur_program_build_info_to_cl(ur_program_build_info_t urPropName) { cl_int cl_propName; switch (static_cast(urPropName)) { @@ -273,10 +275,11 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - CL_RETURN_ON_FAILURE(clGetProgramBuildInfo( - cl_adapter::cast(hProgram), cl_adapter::cast(hDevice), - map_ur_program_build_info_to_cl(propName), propSize, pPropValue, - pPropSizeRet)); + CL_RETURN_ON_FAILURE( + clGetProgramBuildInfo(cl_adapter::cast(hProgram), + cl_adapter::cast(hDevice), + map_ur_program_build_info_to_cl(propName), propSize, + pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } @@ -293,7 +296,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramRelease(ur_program_handle_t hProgram) { UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - CL_RETURN_ON_FAILURE(clReleaseProgram(cl_adapter::cast(hProgram))); + CL_RETURN_ON_FAILURE( + clReleaseProgram(cl_adapter::cast(hProgram))); return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index 2e6578e135a2b..6340cfe5ede90 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -106,21 +106,21 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( if (UR_RESULT_SUCCESS != result) { return result; } - // pDdiTable->pfnCreate = urKernelCreate; - // pDdiTable->pfnCreateWithNativeHandle = urKernelCreateWithNativeHandle; - // pDdiTable->pfnGetGroupInfo = urKernelGetGroupInfo; - // pDdiTable->pfnGetInfo = urKernelGetInfo; - // pDdiTable->pfnGetNativeHandle = urKernelGetNativeHandle; - // pDdiTable->pfnGetSubGroupInfo = urKernelGetSubGroupInfo; - // pDdiTable->pfnRelease = urKernelRelease; - // pDdiTable->pfnRetain = urKernelRetain; - // pDdiTable->pfnSetArgLocal = nullptr; - // pDdiTable->pfnSetArgMemObj = nullptr; - // pDdiTable->pfnSetArgPointer = urKernelSetArgPointer; - // pDdiTable->pfnSetArgSampler = nullptr; - // pDdiTable->pfnSetArgValue = urKernelSetArgValue; - // pDdiTable->pfnSetExecInfo = urKernelSetExecInfo; - // pDdiTable->pfnSetSpecializationConstants = nullptr; + pDdiTable->pfnCreate = urKernelCreate; + pDdiTable->pfnCreateWithNativeHandle = urKernelCreateWithNativeHandle; + pDdiTable->pfnGetGroupInfo = urKernelGetGroupInfo; + pDdiTable->pfnGetInfo = urKernelGetInfo; + pDdiTable->pfnGetNativeHandle = urKernelGetNativeHandle; + pDdiTable->pfnGetSubGroupInfo = urKernelGetSubGroupInfo; + pDdiTable->pfnRelease = urKernelRelease; + pDdiTable->pfnRetain = urKernelRetain; + pDdiTable->pfnSetArgLocal = nullptr; + pDdiTable->pfnSetArgMemObj = nullptr; + pDdiTable->pfnSetArgPointer = urKernelSetArgPointer; + pDdiTable->pfnSetArgSampler = nullptr; + pDdiTable->pfnSetArgValue = urKernelSetArgValue; + pDdiTable->pfnSetExecInfo = urKernelSetExecInfo; + pDdiTable->pfnSetSpecializationConstants = nullptr; return UR_RESULT_SUCCESS; } From 1a106475d3d662e280ef4b42111ce5cfc9a6671d Mon Sep 17 00:00:00 2001 From: Callum Fare Date: Fri, 2 Jun 2023 10:06:21 +0100 Subject: [PATCH 10/36] Fix handling of UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE --- .../ur/adapters/opencl/common.cpp | 2 +- .../ur/adapters/opencl/kernel.cpp | 67 +++++++++++++++++-- 2 files changed, 63 insertions(+), 6 deletions(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp index 6e0c0a776e5ec..579ac15465718 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp @@ -45,7 +45,7 @@ ur_result_t map_cl_error_to_ur(cl_int result) { case CL_BUILD_PROGRAM_FAILURE: return UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE; default: - assert(false && "OpenCL error has no UR equivalent."); + return UR_RESULT_ERROR_UNKNOWN; } } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp index 30403f12a1955..7062b998203b9 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp @@ -155,11 +155,68 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - CL_RETURN_ON_FAILURE( - clGetKernelSubGroupInfo(cl_adapter::cast(hKernel), - cl_adapter::cast(hDevice), - map_ur_kernel_sub_group_info_to_cl(propName), 0, - nullptr, propSize, pPropValue, pPropSizeRet)); + std::shared_ptr InputValue; + size_t InputValueSize = 0; + size_t RetVal; + + if (propName == UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE) { + // OpenCL needs an input value for PI_KERNEL_MAX_SUB_GROUP_SIZE so if no + // value is given we use the max work item size of the device in the first + // dimention to avoid truncation of max sub-group size. + uint32_t MaxDims = 0; + ur_result_t UrRet = + urDeviceGetInfo(hDevice, UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS, + sizeof(uint32_t), &MaxDims, nullptr); + if (UrRet != UR_RESULT_SUCCESS) + return UrRet; + std::shared_ptr WGSizes{new size_t[MaxDims]}; + UrRet = urDeviceGetInfo( + hDevice, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES, MaxDims * sizeof(size_t), + WGSizes.get(), nullptr); + if (UrRet != UR_RESULT_SUCCESS) + return UrRet; + for (size_t i = 1; i < MaxDims; ++i) + WGSizes.get()[i] = 1; + InputValue = std::move(WGSizes); + InputValueSize = MaxDims * sizeof(size_t); + } + + cl_int Ret = clGetKernelSubGroupInfo( + cl_adapter::cast(hKernel), + cl_adapter::cast(hDevice), + map_ur_kernel_sub_group_info_to_cl(propName), InputValueSize, + InputValue.get(), sizeof(size_t), &RetVal, pPropSizeRet); + + if (Ret == CL_INVALID_OPERATION) { + // clGetKernelSubGroupInfo returns CL_INVALID_OPERATION if the device does + // not support subgroups. + if (propName == UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS) { + RetVal = 1; // Minimum required by SYCL 2020 spec + Ret = CL_SUCCESS; + } else if (propName == UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS) { + RetVal = 0; // Not specified by kernel + Ret = CL_SUCCESS; + } else if (propName == UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE) { + // Return the maximum work group size for the kernel + size_t KernelWGSize = 0; + ur_result_t UrRet = urKernelGetGroupInfo( + hKernel, hDevice, UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE, + sizeof(size_t), &KernelWGSize, nullptr); + if (UrRet != UR_RESULT_SUCCESS) + return UrRet; + RetVal = KernelWGSize; + Ret = CL_SUCCESS; + } else if (propName == UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL) { + RetVal = 0; // Not specified by kernel + Ret = CL_SUCCESS; + } + } + + *(static_cast(pPropValue)) = static_cast(RetVal); + if (pPropSizeRet) + *pPropSizeRet = sizeof(uint32_t); + + CL_RETURN_ON_FAILURE(Ret); return UR_RESULT_SUCCESS; } From a02360069ed96c3a0d5484c58e23e0d474026ac4 Mon Sep 17 00:00:00 2001 From: Callum Fare Date: Tue, 6 Jun 2023 12:23:40 +0000 Subject: [PATCH 11/36] [SYCL][OpenCL] Port enqueue entry points to UR --- sycl/plugins/opencl/CMakeLists.txt | 1 + .../ur/adapters/opencl/common.cpp | 20 + .../ur/adapters/opencl/common.hpp | 5 +- .../ur/adapters/opencl/enqueue.cpp | 529 ++++++++++++++++++ .../adapters/opencl/ur_interface_loader.cpp | 46 +- 5 files changed, 576 insertions(+), 25 deletions(-) create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/enqueue.cpp diff --git a/sycl/plugins/opencl/CMakeLists.txt b/sycl/plugins/opencl/CMakeLists.txt index 7834ba1c269ba..512d6c7f68f11 100644 --- a/sycl/plugins/opencl/CMakeLists.txt +++ b/sycl/plugins/opencl/CMakeLists.txt @@ -25,6 +25,7 @@ add_sycl_plugin(opencl "../unified_runtime/ur/adapters/opencl/context.hpp" "../unified_runtime/ur/adapters/opencl/device.cpp" "../unified_runtime/ur/adapters/opencl/device.hpp" + "../unified_runtime/ur/adapters/opencl/enqueue.cpp" "../unified_runtime/ur/adapters/opencl/kernel.cpp" "../unified_runtime/ur/adapters/opencl/platform.cpp" "../unified_runtime/ur/adapters/opencl/platform.hpp" diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp index 579ac15465718..3f7d81af7f72a 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp @@ -40,10 +40,30 @@ ur_result_t map_cl_error_to_ur(cl_int result) { return UR_RESULT_ERROR_INVALID_VALUE; case CL_INVALID_PLATFORM: return UR_RESULT_ERROR_INVALID_PLATFORM; + case CL_DEVICE_NOT_FOUND: + return UR_RESULT_ERROR_DEVICE_NOT_FOUND; case CL_INVALID_OPERATION: return UR_RESULT_ERROR_INVALID_OPERATION; + case CL_INVALID_ARG_VALUE: + return UR_RESULT_ERROR_INVALID_ARGUMENT; + case CL_INVALID_EVENT: + return UR_RESULT_ERROR_INVALID_EVENT; + case CL_INVALID_EVENT_WAIT_LIST: + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + case CL_INVALID_BINARY: + return UR_RESULT_ERROR_INVALID_BINARY; + case CL_INVALID_KERNEL_NAME: + return UR_RESULT_ERROR_INVALID_KERNEL_NAME; case CL_BUILD_PROGRAM_FAILURE: return UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE; + case CL_INVALID_WORK_GROUP_SIZE: + return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE; + case CL_INVALID_WORK_ITEM_SIZE: + return UR_RESULT_ERROR_INVALID_WORK_ITEM_SIZE; + case CL_INVALID_WORK_DIMENSION: + return UR_RESULT_ERROR_INVALID_WORK_DIMENSION; + case CL_OUT_OF_RESOURCES: + return UR_RESULT_ERROR_OUT_OF_RESOURCES; default: return UR_RESULT_ERROR_UNKNOWN; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp index bf101ac0ac626..ce447a64f2619 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp @@ -217,8 +217,9 @@ cl_int(CL_API_CALL *)(cl_command_queue queue, cl_program program, using clEnqueueWriteHostPipeINTEL_fn = CL_API_ENTRY cl_int(CL_API_CALL *)(cl_command_queue queue, cl_program program, - const char *pipe_symbol, cl_bool blocking, void *ptr, - size_t size, cl_uint num_events_in_waitlist, + const char *pipe_symbol, cl_bool blocking, + const void *ptr, size_t size, + cl_uint num_events_in_waitlist, const cl_event *events_waitlist, cl_event *event); template struct FuncPtrCache { diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/enqueue.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/enqueue.cpp new file mode 100644 index 0000000000000..437bd5dc8418d --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/enqueue.cpp @@ -0,0 +1,529 @@ +//===--------- enqueue.cpp - OpenCL Adapter --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" + +cl_map_flags convert_ur_map_flags_to_cl(ur_map_flags_t ur_flags) { + cl_map_flags cl_flags = 0; + if (ur_flags & UR_MAP_FLAG_READ) { + cl_flags |= CL_MAP_READ; + } + if (ur_flags & UR_MAP_FLAG_WRITE) { + cl_flags |= CL_MAP_WRITE; + } + if (ur_flags & UR_MAP_FLAG_WRITE_INVALIDATE_REGION) { + cl_flags |= CL_MAP_WRITE_INVALIDATE_REGION; + } + + return cl_flags; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pGlobalWorkOffset, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(pGlobalWorkSize, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + CL_RETURN_ON_FAILURE(clEnqueueNDRangeKernel( + cl_adapter::cast(hQueue), + cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, + pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( + ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + CL_RETURN_ON_FAILURE(clEnqueueMarkerWithWaitList( + cl_adapter::cast(hQueue), numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( + ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + CL_RETURN_ON_FAILURE(clEnqueueBarrierWithWaitList( + cl_adapter::cast(hQueue), numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, + size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hBuffer, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pDst, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + CL_RETURN_ON_FAILURE(clEnqueueReadBuffer( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), blockingRead, offset, size, pDst, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, + size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hBuffer, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pSrc, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + CL_RETURN_ON_FAILURE(clEnqueueWriteBuffer( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), blockingWrite, offset, size, pSrc, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hBuffer, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + CL_RETURN_ON_FAILURE(clEnqueueReadBufferRect( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), blockingRead, + cl_adapter::cast(&bufferOrigin), + cl_adapter::cast(&hostOrigin), + cl_adapter::cast(®ion), bufferRowPitch, + bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hBuffer, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + CL_RETURN_ON_FAILURE(clEnqueueWriteBufferRect( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), blockingWrite, + cl_adapter::cast(&bufferOrigin), + cl_adapter::cast(&hostOrigin), + cl_adapter::cast(®ion), bufferRowPitch, + bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( + ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hBufferSrc, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hBufferDst, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + CL_RETURN_ON_FAILURE(clEnqueueCopyBuffer( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBufferSrc), + cl_adapter::cast(hBufferDst), srcOffset, dstOffset, size, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hBufferSrc, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hBufferDst, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + CL_RETURN_ON_FAILURE(clEnqueueCopyBufferRect( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBufferSrc), + cl_adapter::cast(hBufferDst), + cl_adapter::cast(&srcOrigin), + cl_adapter::cast(&dstOrigin), + cl_adapter::cast(®ion), srcRowPitch, srcSlicePitch, + dstRowPitch, dstSlicePitch, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, const void *pPattern, + size_t patternSize, size_t offset, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hBuffer, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pPattern, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + CL_RETURN_ON_FAILURE(clEnqueueFillBuffer( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), pPattern, patternSize, offset, size, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( + ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead, + ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hImage, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pDst, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + CL_RETURN_ON_FAILURE(clEnqueueReadImage( + cl_adapter::cast(hQueue), + cl_adapter::cast(hImage), blockingRead, + cl_adapter::cast(&origin), + cl_adapter::cast(®ion), rowPitch, slicePitch, pDst, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( + ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite, + ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hImage, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pSrc, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + CL_RETURN_ON_FAILURE(clEnqueueWriteImage( + cl_adapter::cast(hQueue), + cl_adapter::cast(hImage), blockingWrite, + cl_adapter::cast(&origin), + cl_adapter::cast(®ion), rowPitch, slicePitch, pSrc, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( + ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, + ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hImageSrc, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hImageDst, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + CL_RETURN_ON_FAILURE(clEnqueueCopyImage( + cl_adapter::cast(hQueue), + cl_adapter::cast(hImageSrc), cl_adapter::cast(hImageDst), + cl_adapter::cast(&srcOrigin), + cl_adapter::cast(&dstOrigin), + cl_adapter::cast(®ion), numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingMap, + ur_map_flags_t mapFlags, size_t offset, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, void **ppRetMap) { + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hBuffer, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(ppRetMap, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + cl_int err; + *ppRetMap = clEnqueueMapBuffer( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), blockingMap, + convert_ur_map_flags_to_cl(mapFlags), offset, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent), &err); + + CL_RETURN_ON_FAILURE(err); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( + ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hMem, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + CL_RETURN_ON_FAILURE(clEnqueueUnmapMemObject( + cl_adapter::cast(hQueue), + cl_adapter::cast(hMem), pMappedPtr, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingWrite, size_t count, size_t offset, const void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pSrc, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + cl_context Ctx = nullptr; + cl_int Res = + clGetCommandQueueInfo(cl_adapter::cast(hQueue), + CL_QUEUE_CONTEXT, sizeof(Ctx), &Ctx, nullptr); + + if (Res != CL_SUCCESS) + return map_cl_error_to_ur(Res); + + cl_ext::clEnqueueWriteGlobalVariable_fn F = nullptr; + Res = cl_ext::getExtFuncFromContext( + Ctx, cl_ext::ExtFuncPtrCache->clEnqueueWriteGlobalVariableCache, + cl_ext::clEnqueueWriteGlobalVariableName, &F); + + if (!F || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + Res = F(cl_adapter::cast(hQueue), + cl_adapter::cast(hProgram), name, blockingWrite, count, + offset, pSrc, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent)); + + return map_cl_error_to_ur(Res); +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingRead, size_t count, size_t offset, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pDst, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + cl_context Ctx = nullptr; + cl_int Res = + clGetCommandQueueInfo(cl_adapter::cast(hQueue), + CL_QUEUE_CONTEXT, sizeof(Ctx), &Ctx, nullptr); + + if (Res != CL_SUCCESS) + return map_cl_error_to_ur(Res); + + cl_ext::clEnqueueReadGlobalVariable_fn F = nullptr; + Res = cl_ext::getExtFuncFromContext( + Ctx, cl_ext::ExtFuncPtrCache->clEnqueueReadGlobalVariableCache, + cl_ext::clEnqueueReadGlobalVariableName, &F); + + if (!F || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + Res = F(cl_adapter::cast(hQueue), + cl_adapter::cast(hProgram), name, blockingRead, count, + offset, pDst, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent)); + + return map_cl_error_to_ur(Res); +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, void *pDst, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pDst, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(pipe_symbol, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + cl_context CLContext; + cl_int CLErr = clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, + sizeof(cl_context), &CLContext, nullptr); + if (CLErr != CL_SUCCESS) { + return map_cl_error_to_ur(CLErr); + } + + clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; + ur_result_t RetVal = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueReadHostPipeINTELCache, + cl_ext::clEnqueueReadHostPipeName, &FuncPtr); + + if (FuncPtr) { + RetVal = map_cl_error_to_ur( + FuncPtr(cl_adapter::cast(hQueue), + cl_adapter::cast(hProgram), pipe_symbol, blocking, + pDst, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + } + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, void *pSrc, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pSrc, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(pipe_symbol, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + cl_context CLContext; + cl_int CLErr = clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, + sizeof(cl_context), &CLContext, nullptr); + if (CLErr != CL_SUCCESS) { + return map_cl_error_to_ur(CLErr); + } + + clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; + ur_result_t RetVal = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueWriteHostPipeINTELCache, + cl_ext::clEnqueueWriteHostPipeName, &FuncPtr); + + if (FuncPtr) { + RetVal = map_cl_error_to_ur( + FuncPtr(cl_adapter::cast(hQueue), + cl_adapter::cast(hProgram), pipe_symbol, blocking, + pSrc, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + } + + return RetVal; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index 6340cfe5ede90..902d0cd690aab 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -164,29 +164,29 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( if (UR_RESULT_SUCCESS != result) { return result; } - // pDdiTable->pfnDeviceGlobalVariableRead = nullptr; - // pDdiTable->pfnDeviceGlobalVariableWrite = nullptr; - // pDdiTable->pfnEventsWait = urEnqueueEventsWait; - // pDdiTable->pfnEventsWaitWithBarrier = urEnqueueEventsWaitWithBarrier; - // pDdiTable->pfnKernelLaunch = urEnqueueKernelLaunch; - // pDdiTable->pfnMemBufferCopy = nullptr; - // pDdiTable->pfnMemBufferCopyRect = nullptr; - // pDdiTable->pfnMemBufferFill = nullptr; - // pDdiTable->pfnMemBufferMap = nullptr; - // pDdiTable->pfnMemBufferRead = nullptr; - // pDdiTable->pfnMemBufferReadRect = nullptr; - // pDdiTable->pfnMemBufferWrite = nullptr; - // pDdiTable->pfnMemBufferWriteRect = nullptr; - // pDdiTable->pfnMemImageCopy = nullptr; - // pDdiTable->pfnMemImageRead = nullptr; - // pDdiTable->pfnMemImageWrite = nullptr; - // pDdiTable->pfnMemUnmap = nullptr; - pDdiTable->pfnUSMFill2D = urEnqueueUSMFill2D; - pDdiTable->pfnUSMFill = urEnqueueUSMFill; - pDdiTable->pfnUSMAdvise = urEnqueueUSMAdvise; - pDdiTable->pfnUSMMemcpy2D = urEnqueueUSMMemcpy2D; - pDdiTable->pfnUSMMemcpy = urEnqueueUSMMemcpy; - pDdiTable->pfnUSMPrefetch = urEnqueueUSMPrefetch; + pDdiTable->pfnDeviceGlobalVariableRead = urEnqueueDeviceGlobalVariableRead; + pDdiTable->pfnDeviceGlobalVariableWrite = urEnqueueDeviceGlobalVariableWrite; + pDdiTable->pfnEventsWait = urEnqueueEventsWait; + pDdiTable->pfnEventsWaitWithBarrier = urEnqueueEventsWaitWithBarrier; + pDdiTable->pfnKernelLaunch = urEnqueueKernelLaunch; + pDdiTable->pfnMemBufferCopy = urEnqueueMemBufferCopy; + pDdiTable->pfnMemBufferCopyRect = urEnqueueMemBufferCopyRect; + pDdiTable->pfnMemBufferFill = urEnqueueMemBufferFill; + pDdiTable->pfnMemBufferMap = urEnqueueMemBufferMap; + pDdiTable->pfnMemBufferRead = urEnqueueMemBufferRead; + pDdiTable->pfnMemBufferReadRect = urEnqueueMemBufferReadRect; + pDdiTable->pfnMemBufferWrite = urEnqueueMemBufferWrite; + pDdiTable->pfnMemBufferWriteRect = urEnqueueMemBufferWriteRect; + pDdiTable->pfnMemImageCopy = urEnqueueMemImageCopy; + pDdiTable->pfnMemImageRead = urEnqueueMemImageRead; + pDdiTable->pfnMemImageWrite = urEnqueueMemImageWrite; + pDdiTable->pfnMemUnmap = urEnqueueMemUnmap; + pDdiTable->pfnUSMFill2D = urEnqueueUSMFill2D; + pDdiTable->pfnUSMFill = urEnqueueUSMFill; + pDdiTable->pfnUSMAdvise = urEnqueueUSMAdvise; + pDdiTable->pfnUSMMemcpy2D = urEnqueueUSMMemcpy2D; + pDdiTable->pfnUSMMemcpy = urEnqueueUSMMemcpy; + pDdiTable->pfnUSMPrefetch = urEnqueueUSMPrefetch; return UR_RESULT_SUCCESS; } From dcb23040919238969cb3bac3845e408e7aad4e8c Mon Sep 17 00:00:00 2001 From: Martin Morrison-Grant Date: Tue, 6 Jun 2023 14:58:50 +0000 Subject: [PATCH 12/36] [SYCL][OpenCL] Port Queue and Event to UR. --- sycl/plugins/opencl/CMakeLists.txt | 2 + sycl/plugins/unified_runtime/pi2ur.hpp | 1 - .../ur/adapters/opencl/event.cpp | 148 ++++++++++++++ .../ur/adapters/opencl/queue.cpp | 189 ++++++++++++++++++ .../adapters/opencl/ur_interface_loader.cpp | 32 +-- 5 files changed, 355 insertions(+), 17 deletions(-) create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp diff --git a/sycl/plugins/opencl/CMakeLists.txt b/sycl/plugins/opencl/CMakeLists.txt index 512d6c7f68f11..ba41e5ed7c116 100644 --- a/sycl/plugins/opencl/CMakeLists.txt +++ b/sycl/plugins/opencl/CMakeLists.txt @@ -33,6 +33,8 @@ add_sycl_plugin(opencl "../unified_runtime/ur/adapters/opencl/memory.cpp" "../unified_runtime/ur/adapters/opencl/usm.cpp" "../unified_runtime/ur/adapters/opencl/program.cpp" + "../unified_runtime/ur/adapters/opencl/event.cpp" + "../unified_runtime/ur/adapters/opencl/queue.cpp" # --- "${sycl_inc_dir}/sycl/detail/pi.h" "pi_opencl.cpp" diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index b7da250610c35..3704effa8e43b 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -1570,7 +1570,6 @@ inline pi_result piextQueueCreateWithNativeHandle( PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); - PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); ur_context_handle_t UrContext = reinterpret_cast(Context); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp new file mode 100644 index 0000000000000..406b5788e51c9 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp @@ -0,0 +1,148 @@ +//===--------- memory.cpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" + +#include + +cl_event_info convert_ur_event_info_to_cl(const ur_event_info_t propName) { + switch (propName) { + case UR_EVENT_INFO_COMMAND_QUEUE: + return CL_EVENT_COMMAND_QUEUE; + break; + case UR_EVENT_INFO_CONTEXT: + return CL_EVENT_CONTEXT; + break; + case UR_EVENT_INFO_COMMAND_TYPE: + return CL_EVENT_COMMAND_TYPE; + break; + case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS: + return CL_EVENT_COMMAND_EXECUTION_STATUS; + break; + case UR_EVENT_INFO_REFERENCE_COUNT: + return CL_EVENT_REFERENCE_COUNT; + break; + default: + return -1; + break; + } +} + +cl_profiling_info +convert_ur_profiling_info_to_cl(const ur_profiling_info_t propName) { + switch (propName) { + case UR_PROFILING_INFO_COMMAND_QUEUED: + return CL_PROFILING_COMMAND_QUEUED; + break; + case UR_PROFILING_INFO_COMMAND_SUBMIT: + return CL_PROFILING_COMMAND_SUBMIT; + break; + case UR_PROFILING_INFO_COMMAND_START: + return CL_PROFILING_COMMAND_START; + break; + // TODO(ur) add UR_PROFILING_INFO_COMMAND_COMPLETE once spec has been updated + case UR_PROFILING_INFO_COMMAND_END: + return CL_PROFILING_COMMAND_END; + break; + default: + return -1; + break; + } +} + +cl_int +convert_ur_profiling_info_to_cl(const ur_execution_info_t executionInfo) { + switch (executionInfo) { + case UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE: + return CL_COMPLETE; + break; + case UR_EXECUTION_INFO_EXECUTION_INFO_RUNNING: + return CL_RUNNING; + break; + case UR_EXECUTION_INFO_EXECUTION_INFO_SUBMITTED: + return CL_SUBMITTED; + break; + case UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED: + return CL_QUEUED; + break; + default: + return -1; + break; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( + ur_native_handle_t hNativeEvent, ur_context_handle_t hContext, + const ur_event_native_properties_t *pProperties, + ur_event_handle_t *phEvent) { + UR_ASSERT(hNativeEvent, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + (void)hContext; + (void)pProperties; + *phEvent = reinterpret_cast(hNativeEvent); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( + ur_event_handle_t hEvent, ur_native_handle_t *phNativeEvent) { + return urGetNativeHandle(hEvent, phNativeEvent); +} + +UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { + UR_ASSERT(hEvent, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + cl_int ret_err = clReleaseEvent(cl_adapter::cast(hEvent)); + CL_RETURN_ON_FAILURE(ret_err); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { + UR_ASSERT(hEvent, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + cl_int ret_err = clRetainEvent(cl_adapter::cast(hEvent)); + CL_RETURN_ON_FAILURE(ret_err); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) { + UR_ASSERT(phEventWaitList, UR_RESULT_ERROR_INVALID_NULL_POINTER); + cl_int ret_err = clWaitForEvents( + numEvents, cl_adapter::cast(phEventWaitList)); + CL_RETURN_ON_FAILURE(ret_err); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, + ur_event_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + UR_ASSERT(hEvent, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + cl_event_info clEventInfo = convert_ur_event_info_to_cl(propName); + cl_int ret_err = + clGetEventInfo(cl_adapter::cast(hEvent), clEventInfo, propSize, + pPropValue, pPropSizeRet); + CL_RETURN_ON_FAILURE(ret_err); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( + ur_event_handle_t hEvent, ur_profiling_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + UR_ASSERT(hEvent, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + cl_profiling_info clProfilingInfo = convert_ur_profiling_info_to_cl(propName); + cl_int ret_err = clGetEventProfilingInfo(cl_adapter::cast(hEvent), + clProfilingInfo, propSize, + pPropValue, pPropSizeRet); + CL_RETURN_ON_FAILURE(ret_err); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urEventSetCallback(ur_event_handle_t hEvent, ur_execution_info_t execStatus, + ur_event_callback_t pfnNotify, void *pUserData) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp new file mode 100644 index 0000000000000..e5d3224f908f0 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp @@ -0,0 +1,189 @@ +//===--------- memory.cpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" +#include "platform.hpp" + +#include + +cl_command_queue_info map_ur_queue_info_to_cl(const ur_queue_info_t propName) { + switch (propName) { + case UR_QUEUE_INFO_CONTEXT: + return CL_QUEUE_CONTEXT; + break; + case UR_QUEUE_INFO_DEVICE: + return CL_QUEUE_DEVICE; + break; + case UR_QUEUE_INFO_DEVICE_DEFAULT: + return CL_QUEUE_DEVICE_DEFAULT; + break; + case UR_QUEUE_INFO_FLAGS: + return CL_QUEUE_PROPERTIES_ARRAY; + break; + case UR_QUEUE_INFO_REFERENCE_COUNT: + return CL_QUEUE_REFERENCE_COUNT; + break; + case UR_QUEUE_INFO_SIZE: + return CL_QUEUE_SIZE; + break; + default: + return -1; + break; + } +} + +cl_command_queue_properties convert_ur_queue_properties_to_cl( + const ur_queue_properties_t *urQueueProperties) { + cl_command_queue_properties clCommandQueueProperties = 0; + + if (urQueueProperties->flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) { + clCommandQueueProperties |= CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; + } + if (urQueueProperties->flags & UR_QUEUE_FLAG_PROFILING_ENABLE) { + clCommandQueueProperties |= CL_QUEUE_PROFILING_ENABLE; + } + if (urQueueProperties->flags & UR_QUEUE_FLAG_ON_DEVICE) { + clCommandQueueProperties |= CL_QUEUE_ON_DEVICE; + } + if (urQueueProperties->flags & UR_QUEUE_FLAG_ON_DEVICE_DEFAULT) { + clCommandQueueProperties |= CL_QUEUE_ON_DEVICE_DEFAULT; + } + if (urQueueProperties->flags & UR_QUEUE_FLAG_PRIORITY_LOW) { + clCommandQueueProperties |= CL_QUEUE_PRIORITY_LOW_KHR; + } + if (urQueueProperties->flags & UR_QUEUE_FLAG_PRIORITY_HIGH) { + clCommandQueueProperties |= CL_QUEUE_PRIORITY_HIGH_KHR; + } + + return clCommandQueueProperties; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_queue_properties_t *pProperties, ur_queue_handle_t *phQueue) { + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + cl_platform_id curPlatform; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + clGetDeviceInfo(cl_adapter::cast(hDevice), + CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &curPlatform, + nullptr), + phQueue); + + cl_command_queue_properties clProperties = + convert_ur_queue_properties_to_cl(pProperties); + + // Check that unexpected bits are not set. + assert(!(clProperties & + ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | + CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | + CL_QUEUE_ON_DEVICE_DEFAULT))); + + // Properties supported by OpenCL backend. + cl_command_queue_properties SupportByOpenCL = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE | + CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; + + OCLV::OpenCLVersion version; + CL_RETURN_ON_FAILURE_AND_SET_NULL(cl_adapter::getPlatformVersion(curPlatform, version), + phQueue); + + cl_int ret_err = CL_INVALID_OPERATION; + + if (version >= OCLV::V2_0) { + *phQueue = cl_adapter::cast( + clCreateCommandQueue(cl_adapter::cast(hContext), + cl_adapter::cast(hDevice), + clProperties & SupportByOpenCL, &ret_err)); + CL_RETURN_ON_FAILURE(ret_err); + return UR_RESULT_SUCCESS; + } + + cl_queue_properties CreationFlagProperties[] = { + CL_QUEUE_PROPERTIES, clProperties & SupportByOpenCL, 0}; + *phQueue = + cl_adapter::cast(clCreateCommandQueueWithProperties( + cl_adapter::cast(hContext), + cl_adapter::cast(hDevice), CreationFlagProperties, + &ret_err)); + CL_RETURN_ON_FAILURE(ret_err); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, + ur_queue_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + if (propName == UR_QUEUE_INFO_EMPTY) { + // OpenCL doesn't provide API to check the status of the queue. + return UR_RESULT_ERROR_INVALID_VALUE; + } + + cl_command_queue_info clCommandQueueInfo = map_ur_queue_info_to_cl(propName); + + cl_int ret_err = clGetCommandQueueInfo( + cl_adapter::cast(hQueue), clCommandQueueInfo, propSize, + pPropValue, pPropSizeRet); + CL_RETURN_ON_FAILURE(ret_err); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueGetNativeHandle( + ur_queue_handle_t hQueue, ur_native_handle_t *phNativeQueue) { + return urGetNativeHandle(hQueue, phNativeQueue); +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( + ur_native_handle_t hNativeQueue, ur_context_handle_t hContext, + ur_device_handle_t hDevice, const ur_queue_native_properties_t *pProperties, + ur_queue_handle_t *phQueue) { + UR_ASSERT(hNativeQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(phQueue, UR_RESULT_ERROR_INVALID_NULL_POINTER); + (void)hContext; + (void)hDevice; + (void)pProperties; + *phQueue = reinterpret_cast(hNativeQueue); + cl_int ret_err = + clRetainCommandQueue(cl_adapter::cast(hNativeQueue)); + CL_RETURN_ON_FAILURE(ret_err); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + cl_int ret_err = clFinish(cl_adapter::cast(hQueue)); + CL_RETURN_ON_FAILURE(ret_err); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + cl_int ret_err = clFinish(cl_adapter::cast(hQueue)); + CL_RETURN_ON_FAILURE(ret_err); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + cl_int ret_err = + clRetainCommandQueue(cl_adapter::cast(hQueue)); + CL_RETURN_ON_FAILURE(ret_err); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + cl_int ret_err = + clReleaseCommandQueue(cl_adapter::cast(hQueue)); + CL_RETURN_ON_FAILURE(ret_err); + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index 902d0cd690aab..ecbdfa79ce54c 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -66,14 +66,14 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( if (UR_RESULT_SUCCESS != result) { return result; } - // pDdiTable->pfnCreateWithNativeHandle = urEventCreateWithNativeHandle; - // pDdiTable->pfnGetInfo = urEventGetInfo; - // pDdiTable->pfnGetNativeHandle = urEventGetNativeHandle; - // pDdiTable->pfnGetProfilingInfo = urEventGetProfilingInfo; - // pDdiTable->pfnRelease = urEventRelease; - // pDdiTable->pfnRetain = urEventRetain; - // pDdiTable->pfnSetCallback = urEventSetCallback; - // pDdiTable->pfnWait = urEventWait; + pDdiTable->pfnCreateWithNativeHandle = urEventCreateWithNativeHandle; + pDdiTable->pfnGetInfo = urEventGetInfo; + pDdiTable->pfnGetNativeHandle = urEventGetNativeHandle; + pDdiTable->pfnGetProfilingInfo = urEventGetProfilingInfo; + pDdiTable->pfnRelease = urEventRelease; + pDdiTable->pfnRetain = urEventRetain; + pDdiTable->pfnSetCallback = urEventSetCallback; + pDdiTable->pfnWait = urEventWait; return UR_RESULT_SUCCESS; } @@ -208,14 +208,14 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( if (UR_RESULT_SUCCESS != result) { return result; } - // pDdiTable->pfnCreate = urQueueCreate; - // pDdiTable->pfnCreateWithNativeHandle = urQueueCreateWithNativeHandle; - // pDdiTable->pfnFinish = urQueueFinish; - // pDdiTable->pfnFlush = urQueueFlush; - // pDdiTable->pfnGetInfo = urQueueGetInfo; - // pDdiTable->pfnGetNativeHandle = urQueueGetNativeHandle; - // pDdiTable->pfnRelease = urQueueRelease; - // pDdiTable->pfnRetain = urQueueRetain; + pDdiTable->pfnCreate = urQueueCreate; + pDdiTable->pfnCreateWithNativeHandle = urQueueCreateWithNativeHandle; + pDdiTable->pfnFinish = urQueueFinish; + pDdiTable->pfnFlush = urQueueFlush; + pDdiTable->pfnGetInfo = urQueueGetInfo; + pDdiTable->pfnGetNativeHandle = urQueueGetNativeHandle; + pDdiTable->pfnRelease = urQueueRelease; + pDdiTable->pfnRetain = urQueueRetain; return UR_RESULT_SUCCESS; } From 630d7cd693dfbded374fcda1fe8126c45c846ace Mon Sep 17 00:00:00 2001 From: Martin Morrison-Grant Date: Thu, 8 Jun 2023 14:50:06 +0000 Subject: [PATCH 13/36] [SYCL][OpenCL] Port piPluginGetBackendOption, piGetDeviceAndHostTimer, piextKernelSetArgSampler and piextKernelSetArgMemObj to UR. --- .../ur/adapters/opencl/device.cpp | 38 +++++++++++++++++++ .../ur/adapters/opencl/kernel.cpp | 29 ++++++++++++-- .../ur/adapters/opencl/platform.cpp | 25 ++++++++++++ .../adapters/opencl/ur_interface_loader.cpp | 7 ++-- 4 files changed, 92 insertions(+), 7 deletions(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp index 61c53b21149b8..a7f13f74732cb 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp @@ -8,6 +8,7 @@ #include "device.hpp" #include "common.hpp" +#include "platform.hpp" #include #include @@ -1006,3 +1007,40 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( *phDevice = reinterpret_cast(hNativeDevice); return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( + ur_device_handle_t hDevice, uint64_t *pDeviceTimestamp, + uint64_t *pHostTimestamp) { + OCLV::OpenCLVersion devVer, platVer; + cl_platform_id platform; + cl_device_id deviceID = cl_adapter::cast(hDevice); + + // TODO: Cache OpenCL version for each device and platform + auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &platform, nullptr); + CL_RETURN_ON_FAILURE(ret_err); + + ret_err = cl_adapter::getDeviceVersion(deviceID, devVer); + CL_RETURN_ON_FAILURE(ret_err); + + ret_err = cl_adapter::getPlatformVersion(platform, platVer); + + if (platVer < OCLV::V2_1 || devVer < OCLV::V2_1) { + cl_adapter::setErrorMessage( + "OpenCL version for device and/or platform is less than 2.1", + UR_RESULT_ERROR_INVALID_OPERATION); + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + if (pDeviceTimestamp) { + uint64_t dummy; + clGetDeviceAndHostTimer(deviceID, pDeviceTimestamp, + pHostTimestamp == nullptr ? &dummy + : pHostTimestamp); + + } else if (pHostTimestamp) { + clGetHostTimer(deviceID, pHostTimestamp); + } + + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp index 7062b998203b9..34139b1318cdf 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp @@ -166,13 +166,12 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, uint32_t MaxDims = 0; ur_result_t UrRet = urDeviceGetInfo(hDevice, UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS, - sizeof(uint32_t), &MaxDims, nullptr); + sizeof(uint32_t), &MaxDims, nullptr); if (UrRet != UR_RESULT_SUCCESS) return UrRet; std::shared_ptr WGSizes{new size_t[MaxDims]}; - UrRet = urDeviceGetInfo( - hDevice, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES, MaxDims * sizeof(size_t), - WGSizes.get(), nullptr); + UrRet = urDeviceGetInfo(hDevice, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES, + MaxDims * sizeof(size_t), WGSizes.get(), nullptr); if (UrRet != UR_RESULT_SUCCESS) return UrRet; for (size_t i = 1; i < MaxDims; ++i) @@ -361,3 +360,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( *phKernel = reinterpret_cast(hNativeKernel); return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( + ur_kernel_handle_t hKernel, uint32_t argIndex, ur_mem_handle_t hArgValue) { + UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + cl_int ret_err = clSetKernelArg( + cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), + sizeof(hArgValue), cl_adapter::cast(hArgValue)); + CL_RETURN_ON_FAILURE(ret_err); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urKernelSetArgSampler(ur_kernel_handle_t hKernel, uint32_t argIndex, + ur_sampler_handle_t hArgValue) { + UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hArgValue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + cl_int ret_err = clSetKernelArg( + cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), + sizeof(hArgValue), cl_adapter::cast(&hArgValue)); + CL_RETURN_ON_FAILURE(ret_err); + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp index 499f9b089bc75..d5da9c581a6e6 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp @@ -153,3 +153,28 @@ UR_DLLEXPORT ur_result_t UR_APICALL urTearDown(void *pParams) { } return UR_RESULT_SUCCESS; } + +// Returns plugin specific backend option. +// Current support is only for optimization options. +// Return '-cl-opt-disable' for pFrontendOption = -O0 and '' for others. +UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( + ur_platform_handle_t hPlatform, const char *pFrontendOption, + const char **ppPlatformOption) { + using namespace std::literals; + if (pFrontendOption == nullptr) + return UR_RESULT_SUCCESS; + if (pFrontendOption == ""sv) { + *ppPlatformOption = ""; + return UR_RESULT_SUCCESS; + } + if (!strcmp(pFrontendOption, "-O0")) { + *ppPlatformOption = "-cl-opt-disable"; + return UR_RESULT_SUCCESS; + } + if (pFrontendOption == "-O1"sv || pFrontendOption == "-O2"sv || + pFrontendOption == "-O3"sv) { + *ppPlatformOption = ""; + return UR_RESULT_SUCCESS; + } + return UR_RESULT_ERROR_INVALID_VALUE; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index ecbdfa79ce54c..7ddbccd22fd79 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -41,6 +41,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( pDdiTable->pfnGetApiVersion = urPlatformGetApiVersion; pDdiTable->pfnGetInfo = urPlatformGetInfo; pDdiTable->pfnGetNativeHandle = urPlatformGetNativeHandle; + pDdiTable->pfnGetBackendOption = urPlatformGetBackendOption; return UR_RESULT_SUCCESS; } @@ -115,9 +116,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( pDdiTable->pfnRelease = urKernelRelease; pDdiTable->pfnRetain = urKernelRetain; pDdiTable->pfnSetArgLocal = nullptr; - pDdiTable->pfnSetArgMemObj = nullptr; + pDdiTable->pfnSetArgMemObj = urKernelSetArgMemObj; pDdiTable->pfnSetArgPointer = urKernelSetArgPointer; - pDdiTable->pfnSetArgSampler = nullptr; + pDdiTable->pfnSetArgSampler = urKernelSetArgSampler; pDdiTable->pfnSetArgValue = urKernelSetArgValue; pDdiTable->pfnSetExecInfo = urKernelSetExecInfo; pDdiTable->pfnSetSpecializationConstants = nullptr; @@ -244,7 +245,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( } pDdiTable->pfnCreateWithNativeHandle = urDeviceCreateWithNativeHandle; pDdiTable->pfnGet = urDeviceGet; - // pDdiTable->pfnGetGlobalTimestamps = urDeviceGetGlobalTimestamps; + pDdiTable->pfnGetGlobalTimestamps = urDeviceGetGlobalTimestamps; pDdiTable->pfnGetInfo = urDeviceGetInfo; pDdiTable->pfnGetNativeHandle = urDeviceGetNativeHandle; pDdiTable->pfnPartition = urDevicePartition; From afc9feaf91110a8eb6f91279c77c77db2591af5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Mon, 3 Jul 2023 11:43:22 +0000 Subject: [PATCH 14/36] [OpenCL] Multiple fixes after rebasing --- sycl/plugins/unified_runtime/pi2ur.hpp | 15 ++- .../ur/adapters/opencl/device.cpp | 120 +++++++++++++++--- .../ur/adapters/opencl/kernel.cpp | 32 +++-- .../ur/adapters/opencl/platform.cpp | 13 ++ .../ur/adapters/opencl/program.cpp | 47 +++---- .../ur/adapters/opencl/queue.cpp | 18 +-- .../adapters/opencl/ur_interface_loader.cpp | 2 +- sycl/test-e2e/Basic/subdevice_pi.cpp | 2 +- 8 files changed, 185 insertions(+), 64 deletions(-) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 3704effa8e43b..72d42f13ae659 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -3191,8 +3191,8 @@ inline pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, /// /// \param queue is the queue to submit to /// \param ptr is the ptr to fill -/// \param pitch is the total width of the destination memory including padding -/// \param pattern is a pointer with the bytes of the pattern to set +/// \param pitch is the total width of the destination memory including +/// padding \param pattern is a pointer with the bytes of the pattern to set /// \param pattern_size is the size in bytes of the pattern /// \param width is width in bytes of each row to fill /// \param height is height the columns to fill @@ -3891,6 +3891,17 @@ inline pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, HANDLE_ERRORS(urEventGetInfo(UrEvent, PropName, ParamValueSize, ParamValue, ParamValueSizeRet)); + if (ParamName == PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) { + /* If the PI_EVENT_INFO_COMMAND_EXECUTION_STATUS info value is + * PI_EVENT_QUEUED, change it to PI_EVENT_SUBMITTED. This change is needed + * since sycl::info::event::event_command_status has no equivalent to + * PI_EVENT_QUEUED. */ + const auto param_value_int = static_cast(ParamValue); + if (*param_value_int == PI_EVENT_QUEUED) { + *param_value_int = PI_EVENT_SUBMITTED; + } + } + return PI_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp index a7f13f74732cb..2fc200fbec92f 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp @@ -209,7 +209,7 @@ static cl_int map_ur_device_info_to_cl(ur_device_info_t urPropName) { case UR_DEVICE_INFO_REFERENCE_COUNT: cl_propName = CL_DEVICE_REFERENCE_COUNT; break; - case UR_DEVICE_INFO_PARTITION_PROPERTIES: + case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: cl_propName = CL_DEVICE_PARTITION_PROPERTIES; break; case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: @@ -442,19 +442,50 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, std::to_string(version.getMinor()); return ReturnValue(results.c_str(), results.size() + 1); } - case UR_DEVICE_INFO_PARTITION_PROPERTIES: + case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { + size_t cl_size; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, 0, + nullptr, &cl_size)); + const size_t n_properties = cl_size / sizeof(cl_device_partition_property); + + std::vector cl_value(n_properties); + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, + cl_size, cl_value.data(), nullptr)); + + /* The OpenCL implementation returns a value of 0 if no properties are + * supported. UR will return a size of 0 for now. + */ + if (pPropSizeRet && cl_value[0] == 0) { + *pPropSizeRet = 0; + return UR_RESULT_SUCCESS; + } + + std::vector ur_value{}; + for (size_t i = 0; i < n_properties; ++i) { + if (cl_value[i] != CL_DEVICE_PARTITION_BY_NAMES_INTEL && + cl_value[i] != 0) { + ur_value.push_back(static_cast(cl_value[i])); + } + } + return ReturnValue(ur_value.data(), ur_value.size()); + } case UR_DEVICE_INFO_PARTITION_TYPE: { + size_t cl_size; CL_RETURN_ON_FAILURE( clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, 0, nullptr, &cl_size)); const size_t n_properties = cl_size / sizeof(cl_device_partition_property); - /* Special case for UR_DEVICE_INFO_PARTITION_TYPE because OpenCL - * implementation returns a size of 0 if the device is not a sub-device. - * But UR implementation expects a size of 1 element with a value of 0. */ - if (propName == UR_DEVICE_INFO_PARTITION_TYPE && cl_size == 0) { - return ReturnValue(static_cast(0)); + /* The OpenCL implementation returns either a size of 0 or a value of 0 if + * the device is not a sub-device. UR will return a size of 0 for now. + * TODO Ideally, this could become an error once PI is removed from SYCL RT + */ + if (pPropSizeRet && (cl_size == 0 || n_properties == 1)) { + *pPropSizeRet = 0; + return UR_RESULT_SUCCESS; } auto cl_value = @@ -463,11 +494,28 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, cl_size, cl_value, nullptr)); - std::vector ur_value{}; - for (size_t i = 0; i < n_properties; ++i) { - if (cl_value[i] != CL_DEVICE_PARTITION_BY_NAMES_INTEL) { - ur_value.push_back( - static_cast(cl_value[i])); + std::vector ur_value(n_properties - 1); + + /* OpenCL will always return exactly one partition type followed by one or + * more values. */ + for (uint32_t i = 0; i < ur_value.size(); ++i) { + ur_value[i].type = static_cast(cl_value[0]); + switch (ur_value[i].type) { + case UR_DEVICE_PARTITION_EQUALLY: { + ur_value[i].value.equally = cl_value[i + 1]; + break; + } + case UR_DEVICE_PARTITION_BY_COUNTS: { + ur_value[i].value.count = cl_value[i + 1]; + break; + } + case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: { + ur_value[i].value.affinity_domain = cl_value[i + 1]; + break; + } + default: { + return UR_RESULT_ERROR_UNKNOWN; + } } } @@ -796,7 +844,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: { - /* CL type: cl_bitfield + /* CL type: cl_bitfield / enum * UR type: ur_flags_t (uint32_t) */ cl_bitfield cl_value; @@ -937,17 +985,52 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, - const ur_device_partition_property_t *pProperties, uint32_t NumDevices, + const ur_device_partition_properties_t *pProperties, uint32_t NumDevices, ur_device_handle_t *phSubDevices, uint32_t *pNumDevicesRet) { UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(pProperties, UR_RESULT_ERROR_INVALID_NULL_POINTER); + std::vector cl_properties( + pProperties->PropCount + 2); + + /* The type must be the same for all properties since OpenCL doesn't support + * property lists with multiple types */ + cl_properties[0] = + static_cast(pProperties->pProperties->type); + + for (uint32_t i = 0; i < pProperties->PropCount; ++i) { + cl_device_partition_property cl_property; + switch (pProperties->pProperties->type) { + case UR_DEVICE_PARTITION_EQUALLY: { + cl_property = static_cast( + pProperties->pProperties->value.equally); + break; + } + case UR_DEVICE_PARTITION_BY_COUNTS: { + cl_property = static_cast( + pProperties->pProperties->value.count); + break; + } + case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: { + cl_property = static_cast( + pProperties->pProperties->value.affinity_domain); + break; + } + default: { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + } + cl_properties[i + 1] = cl_property; + } + + /* Terminate the list with 0 */ + cl_properties[cl_properties.size() - 1] = 0; + cl_uint cl_num_devices_ret; CL_RETURN_ON_FAILURE(clCreateSubDevices( - cl_adapter::cast(hDevice), - cl_adapter::cast(pProperties), 0, - nullptr, &cl_num_devices_ret)); + cl_adapter::cast(hDevice), cl_properties.data(), 0, nullptr, + &cl_num_devices_ret)); if (pNumDevicesRet) { *pNumDevicesRet = cl_num_devices_ret; @@ -958,8 +1041,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( if (phSubDevices) { std::vector cl_sub_devices(cl_num_devices_ret); CL_RETURN_ON_FAILURE(clCreateSubDevices( - cl_adapter::cast(hDevice), - cl_adapter::cast(pProperties), + cl_adapter::cast(hDevice), cl_properties.data(), cl_num_devices_ret, cl_sub_devices.data(), nullptr)); std::memcpy(phSubDevices, cl_sub_devices.data(), diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp index 34139b1318cdf..2bdfb1401f3b9 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp @@ -196,14 +196,25 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, RetVal = 0; // Not specified by kernel Ret = CL_SUCCESS; } else if (propName == UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE) { - // Return the maximum work group size for the kernel - size_t KernelWGSize = 0; - ur_result_t UrRet = urKernelGetGroupInfo( - hKernel, hDevice, UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE, - sizeof(size_t), &KernelWGSize, nullptr); - if (UrRet != UR_RESULT_SUCCESS) + // Return the maximum sub group size for the device + size_t result_size = 0; + // Two calls to urDeviceGetInfo are needed: the first determines the size + // required to store the result, and the second returns the actual size + // values. + ur_result_t UrRet = + urDeviceGetInfo(hDevice, UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, 0, + nullptr, &result_size); + if (UrRet != UR_RESULT_SUCCESS) { return UrRet; - RetVal = KernelWGSize; + } + assert(result_size % sizeof(size_t) == 0); + std::vector result(result_size / sizeof(size_t)); + UrRet = urDeviceGetInfo(hDevice, UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, + result_size, result.data(), nullptr); + if (UrRet != UR_RESULT_SUCCESS) { + return UrRet; + } + RetVal = *std::max_element(result.begin(), result.end()); Ret = CL_SUCCESS; } else if (propName == UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL) { RetVal = 0; // Not specified by kernel @@ -361,8 +372,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( - ur_kernel_handle_t hKernel, uint32_t argIndex, ur_mem_handle_t hArgValue) { +UR_APIEXPORT ur_result_t UR_APICALL +urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_mem_obj_properties_t *pProperties, + ur_mem_handle_t hArgValue) { + UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); cl_int ret_err = clSetKernelArg( cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp index d5da9c581a6e6..ebea289cb8bd4 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp @@ -167,6 +167,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( *ppPlatformOption = ""; return UR_RESULT_SUCCESS; } + // Return '-cl-opt-disable' for frontend_option = -O0 and '' for others. if (!strcmp(pFrontendOption, "-O0")) { *ppPlatformOption = "-cl-opt-disable"; return UR_RESULT_SUCCESS; @@ -176,5 +177,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( *ppPlatformOption = ""; return UR_RESULT_SUCCESS; } + if (pFrontendOption == "-ftarget-compile-fast"sv) { + *ppPlatformOption = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'"; + return UR_RESULT_SUCCESS; + } return UR_RESULT_ERROR_INVALID_VALUE; } + +UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetLastError( + ur_platform_handle_t hPlatform, const char **ppMessage, int32_t *pError) { + std::ignore = hPlatform; + std::ignore = ppMessage; + std::ignore = pError; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp index fdc7bb6a55a77..7779c7d9eafae 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp @@ -84,34 +84,35 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( *phProgram = cl_adapter::cast(clCreateProgramWithIL( cl_adapter::cast(hContext), pIL, length, &err)); CL_RETURN_ON_FAILURE(err); - } + } else { - /* If none of the devices conform with CL 2.1 or newer make sure they all - * support the cl_khr_il_program extension. - */ - for (cl_device_id dev : *devicesInCtx) { - bool supported = false; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::checkDeviceExtensions(dev, {"cl_khr_il_program"}, - supported), - phProgram); - - if (!supported) { - return UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE; + /* If none of the devices conform with CL 2.1 or newer make sure they all + * support the cl_khr_il_program extension. + */ + for (cl_device_id dev : *devicesInCtx) { + bool supported = false; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl_adapter::checkDeviceExtensions(dev, {"cl_khr_il_program"}, + supported), + phProgram); + + if (!supported) { + return UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE; + } } - } - using apiFuncT = - cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *); - apiFuncT funcPtr = - reinterpret_cast(clGetExtensionFunctionAddressForPlatform( - curPlatform, "clCreateProgramWithILKHR")); + using apiFuncT = + cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *); + apiFuncT funcPtr = + reinterpret_cast(clGetExtensionFunctionAddressForPlatform( + curPlatform, "clCreateProgramWithILKHR")); - assert(funcPtr != nullptr); + assert(funcPtr != nullptr); - *phProgram = cl_adapter::cast( - funcPtr(cl_adapter::cast(hContext), pIL, length, &err)); - CL_RETURN_ON_FAILURE(err); + *phProgram = cl_adapter::cast( + funcPtr(cl_adapter::cast(hContext), pIL, length, &err)); + CL_RETURN_ON_FAILURE(err); + } return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp index e5d3224f908f0..4c45365e3df2c 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp @@ -80,19 +80,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( convert_ur_queue_properties_to_cl(pProperties); // Check that unexpected bits are not set. - assert(!(clProperties & - ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | - CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | - CL_QUEUE_ON_DEVICE_DEFAULT))); + assert(!(clProperties & ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | + CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | + CL_QUEUE_ON_DEVICE_DEFAULT))); // Properties supported by OpenCL backend. cl_command_queue_properties SupportByOpenCL = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; - + OCLV::OpenCLVersion version; - CL_RETURN_ON_FAILURE_AND_SET_NULL(cl_adapter::getPlatformVersion(curPlatform, version), - phQueue); + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl_adapter::getPlatformVersion(curPlatform, version), phQueue); cl_int ret_err = CL_INVALID_OPERATION; @@ -137,8 +136,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueGetNativeHandle( - ur_queue_handle_t hQueue, ur_native_handle_t *phNativeQueue) { +UR_APIEXPORT ur_result_t UR_APICALL +urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *pDesc, + ur_native_handle_t *phNativeQueue) { return urGetNativeHandle(hQueue, phNativeQueue); } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index 7ddbccd22fd79..24ffd0c50e1cf 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -41,6 +41,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( pDdiTable->pfnGetApiVersion = urPlatformGetApiVersion; pDdiTable->pfnGetInfo = urPlatformGetInfo; pDdiTable->pfnGetNativeHandle = urPlatformGetNativeHandle; + pDdiTable->pfnGetLastError = urPlatformGetLastError; pDdiTable->pfnGetBackendOption = urPlatformGetBackendOption; return UR_RESULT_SUCCESS; } @@ -197,7 +198,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnGetLastResult = urGetLastResult; pDdiTable->pfnInit = urInit; pDdiTable->pfnTearDown = urTearDown; return UR_RESULT_SUCCESS; diff --git a/sycl/test-e2e/Basic/subdevice_pi.cpp b/sycl/test-e2e/Basic/subdevice_pi.cpp index bf63e1da8aa06..127eb9b9fae06 100644 --- a/sycl/test-e2e/Basic/subdevice_pi.cpp +++ b/sycl/test-e2e/Basic/subdevice_pi.cpp @@ -195,7 +195,7 @@ int main(int argc, const char **argv) { std::string test(argv[1]); std::string partition_type(argv[2]); - device dev(default_selector_v); + device dev(cpu_selector_v); std::vector host_mem(1024, 1); buffer buf(&host_mem[0], host_mem.size()); From e6e012b17101cfa051971c901986539b2b78e099 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Wed, 5 Jul 2023 12:07:40 +0100 Subject: [PATCH 15/36] [OpenCL] Rebase opencl adapter --- sycl/plugins/opencl/CMakeLists.txt | 2 + sycl/plugins/opencl/pi_opencl.cpp | 1995 ++--------------- sycl/plugins/opencl/pi_opencl.hpp | 89 +- sycl/plugins/unified_runtime/CMakeLists.txt | 227 +- sycl/plugins/unified_runtime/pi2ur.hpp | 2 +- .../ur/adapters/opencl/command_buffer.cpp | 253 +++ .../ur/adapters/opencl/command_buffer.hpp | 13 + .../ur/adapters/opencl/common.cpp | 7 + .../ur/adapters/opencl/common.hpp | 4 + .../ur/adapters/opencl/kernel.cpp | 24 +- 10 files changed, 561 insertions(+), 2055 deletions(-) create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.hpp diff --git a/sycl/plugins/opencl/CMakeLists.txt b/sycl/plugins/opencl/CMakeLists.txt index ba41e5ed7c116..5e7bcdc1283df 100644 --- a/sycl/plugins/opencl/CMakeLists.txt +++ b/sycl/plugins/opencl/CMakeLists.txt @@ -35,6 +35,8 @@ add_sycl_plugin(opencl "../unified_runtime/ur/adapters/opencl/program.cpp" "../unified_runtime/ur/adapters/opencl/event.cpp" "../unified_runtime/ur/adapters/opencl/queue.cpp" + "../unified_runtime/ur/adapters/opencl/command_buffer.hpp" + "../unified_runtime/ur/adapters/opencl/command_buffer.cpp" # --- "${sycl_inc_dir}/sycl/detail/pi.h" "pi_opencl.cpp" diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 422a29bbe4765..b80251d26b308 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -34,6 +34,8 @@ #include #include "../unified_runtime/ur/adapters/opencl/common.hpp" +#include "../unified_runtime/ur/adapters/opencl/device.hpp" +#include "../unified_runtime/ur/adapters/opencl/platform.hpp" #define CHECK_ERR_SET_NULL_RET(err, ptr, reterr) \ if (err != CL_SUCCESS) { \ @@ -42,295 +44,27 @@ return cast(reterr); \ } -// Want all the needed casts be explicit, do not define conversion operators. -template To cast(From value) { - // TODO: see if more sanity checks are possible. - static_assert(sizeof(From) == sizeof(To), "cast failed size check"); - return (To)(value); -} - -// Older versions of GCC don't like "const" here -#if defined(__GNUC__) && (__GNUC__ < 7 || (__GNU__C == 7 && __GNUC_MINOR__ < 2)) -#define CONSTFIX constexpr -#else -#define CONSTFIX const -#endif - -// Names of USM functions that are queried from OpenCL -CONSTFIX char clHostMemAllocName[] = "clHostMemAllocINTEL"; -CONSTFIX char clDeviceMemAllocName[] = "clDeviceMemAllocINTEL"; -CONSTFIX char clSharedMemAllocName[] = "clSharedMemAllocINTEL"; -CONSTFIX char clMemBlockingFreeName[] = "clMemBlockingFreeINTEL"; -CONSTFIX char clCreateBufferWithPropertiesName[] = - "clCreateBufferWithPropertiesINTEL"; -CONSTFIX char clSetKernelArgMemPointerName[] = "clSetKernelArgMemPointerINTEL"; -CONSTFIX char clEnqueueMemFillName[] = "clEnqueueMemFillINTEL"; -CONSTFIX char clEnqueueMemcpyName[] = "clEnqueueMemcpyINTEL"; -CONSTFIX char clGetMemAllocInfoName[] = "clGetMemAllocInfoINTEL"; -CONSTFIX char clSetProgramSpecializationConstantName[] = - "clSetProgramSpecializationConstant"; -CONSTFIX char clGetDeviceFunctionPointerName[] = - "clGetDeviceFunctionPointerINTEL"; -CONSTFIX char clEnqueueWriteGlobalVariableName[] = - "clEnqueueWriteGlobalVariableINTEL"; -CONSTFIX char clEnqueueReadGlobalVariableName[] = - "clEnqueueReadGlobalVariableINTEL"; -// Names of host pipe functions queried from OpenCL -CONSTFIX char clEnqueueReadHostPipeName[] = "clEnqueueReadHostPipeINTEL"; -CONSTFIX char clEnqueueWriteHostPipeName[] = "clEnqueueWriteHostPipeINTEL"; - -#undef CONSTFIX - -// Returns plugin specific backend option. -pi_result piPluginGetBackendOption(pi_platform, const char *frontend_option, - const char **backend_option) { - using namespace std::literals; - if (frontend_option == nullptr) - return PI_ERROR_INVALID_VALUE; - if (frontend_option == ""sv) { - *backend_option = ""; - return PI_SUCCESS; - } - // Return '-cl-opt-disable' for frontend_option = -O0 and '' for others. - if (!strcmp(frontend_option, "-O0")) { - *backend_option = "-cl-opt-disable"; +// TODO(ur) remove when other endpoints have been ported +pi_result map_ur_error(ur_result_t result) { + switch (result) { + case UR_RESULT_SUCCESS: return PI_SUCCESS; - } - if (frontend_option == "-O1"sv || frontend_option == "-O2"sv || - frontend_option == "-O3"sv) { - *backend_option = ""; - return PI_SUCCESS; - } - if (frontend_option == "-ftarget-compile-fast"sv) { - *backend_option = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'"; - return PI_SUCCESS; - } - return PI_ERROR_INVALID_VALUE; -} - -static cl_int getPlatformVersion(cl_platform_id plat, - OCLV::OpenCLVersion &version) { - cl_int ret_err = CL_INVALID_VALUE; - - size_t platVerSize = 0; - ret_err = - clGetPlatformInfo(plat, CL_PLATFORM_VERSION, 0, nullptr, &platVerSize); - - std::string platVer(platVerSize, '\0'); - ret_err = clGetPlatformInfo(plat, CL_PLATFORM_VERSION, platVerSize, - platVer.data(), nullptr); - - if (ret_err != CL_SUCCESS) - return ret_err; - - version = OCLV::OpenCLVersion(platVer); - if (!version.isValid()) - return CL_INVALID_PLATFORM; - - return ret_err; -} - -static cl_int getDeviceVersion(cl_device_id dev, OCLV::OpenCLVersion &version) { - cl_int ret_err = CL_INVALID_VALUE; - - size_t devVerSize = 0; - ret_err = clGetDeviceInfo(dev, CL_DEVICE_VERSION, 0, nullptr, &devVerSize); - - std::string devVer(devVerSize, '\0'); - ret_err = clGetDeviceInfo(dev, CL_DEVICE_VERSION, devVerSize, devVer.data(), - nullptr); - - if (ret_err != CL_SUCCESS) - return ret_err; - - version = OCLV::OpenCLVersion(devVer); - if (!version.isValid()) - return CL_INVALID_DEVICE; - - return ret_err; -} - -static cl_int checkDeviceExtensions(cl_device_id dev, - const std::vector &exts, - bool &supported) { - cl_int ret_err = CL_INVALID_VALUE; - - size_t extSize = 0; - ret_err = clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &extSize); - - std::string extStr(extSize, '\0'); - ret_err = clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, extSize, extStr.data(), - nullptr); - - if (ret_err != CL_SUCCESS) - return ret_err; - - supported = true; - for (const std::string &ext : exts) - if (!(supported = (extStr.find(ext) != std::string::npos))) - break; - - return ret_err; -} - -using clGetDeviceFunctionPointer_fn = CL_API_ENTRY -cl_int(CL_API_CALL *)(cl_device_id device, cl_program program, - const char *FuncName, cl_ulong *ret_ptr); - -using clEnqueueWriteGlobalVariable_fn = CL_API_ENTRY -cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool, - size_t, size_t, const void *, cl_uint, const cl_event *, - cl_event *); - -using clEnqueueReadGlobalVariable_fn = CL_API_ENTRY -cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool, - size_t, size_t, void *, cl_uint, const cl_event *, - cl_event *); - -using clSetProgramSpecializationConstant_fn = CL_API_ENTRY -cl_int(CL_API_CALL *)(cl_program program, cl_uint spec_id, size_t spec_size, - const void *spec_value); - -template struct FuncPtrCache { - std::map Map; - std::mutex Mutex; -}; - -// FIXME: There's currently no mechanism for cleaning up this cache, meaning -// that it is invalidated whenever a context is destroyed. This could lead to -// reusing an invalid function pointer if another context happends to have the -// same native handle. -struct ExtFuncPtrCacheT { - FuncPtrCache clHostMemAllocINTELCache; - FuncPtrCache clDeviceMemAllocINTELCache; - FuncPtrCache clSharedMemAllocINTELCache; - FuncPtrCache clGetDeviceFunctionPointerCache; - FuncPtrCache - clCreateBufferWithPropertiesINTELCache; - FuncPtrCache clMemBlockingFreeINTELCache; - FuncPtrCache - clSetKernelArgMemPointerINTELCache; - FuncPtrCache clEnqueueMemFillINTELCache; - FuncPtrCache clEnqueueMemcpyINTELCache; - FuncPtrCache clGetMemAllocInfoINTELCache; - FuncPtrCache - clEnqueueWriteGlobalVariableCache; - FuncPtrCache clEnqueueReadGlobalVariableCache; - FuncPtrCache clEnqueueReadHostPipeINTELCache; - FuncPtrCache clEnqueueWriteHostPipeINTELCache; - FuncPtrCache - clSetProgramSpecializationConstantCache; -}; -// A raw pointer is used here since the lifetime of this map has to be tied to -// piTeardown to avoid issues with static destruction order (a user application -// might have static objects that indirectly access this cache in their -// destructor). -static ExtFuncPtrCacheT *ExtFuncPtrCache = new ExtFuncPtrCacheT(); - -// USM helper function to get an extension function pointer -template -static pi_result getExtFuncFromContext(cl_context context, - FuncPtrCache &FPtrCache, - const char *FuncName, T *fptr) { - // TODO - // Potentially redo caching as PI interface changes. - // if cached, return cached FuncPtr - std::lock_guard CacheLock{FPtrCache.Mutex}; - std::map &FPtrMap = FPtrCache.Map; - auto It = FPtrMap.find(context); - if (It != FPtrMap.end()) { - auto F = It->second; - // if cached that extension is not available return nullptr and - // PI_ERROR_INVALID_VALUE - *fptr = F; - return F ? PI_SUCCESS : PI_ERROR_INVALID_VALUE; - } - - cl_uint deviceCount; - cl_int ret_err = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, - sizeof(cl_uint), &deviceCount, nullptr); - - if (ret_err != CL_SUCCESS || deviceCount < 1) { - return PI_ERROR_INVALID_CONTEXT; - } - - std::vector devicesInCtx(deviceCount); - ret_err = clGetContextInfo(context, CL_CONTEXT_DEVICES, - deviceCount * sizeof(cl_device_id), - devicesInCtx.data(), nullptr); - - if (ret_err != CL_SUCCESS) { - return PI_ERROR_INVALID_CONTEXT; - } - - cl_platform_id curPlatform; - ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &curPlatform, nullptr); - - if (ret_err != CL_SUCCESS) { - return PI_ERROR_INVALID_CONTEXT; - } - - T FuncPtr = - (T)clGetExtensionFunctionAddressForPlatform(curPlatform, FuncName); - - if (!FuncPtr) { - // Cache that the extension is not available - FPtrMap[context] = nullptr; + case UR_RESULT_ERROR_OUT_OF_HOST_MEMORY: + return PI_ERROR_OUT_OF_HOST_MEMORY; + case UR_RESULT_ERROR_INVALID_VALUE: return PI_ERROR_INVALID_VALUE; + case UR_RESULT_ERROR_INVALID_PLATFORM: + return PI_ERROR_INVALID_PLATFORM; + default: + return PI_ERROR_UNKNOWN; } - - *fptr = FuncPtr; - FPtrMap[context] = FuncPtr; - - return cast(ret_err); } -/// Enables indirect access of pointers in kernels. -/// Necessary to avoid telling CL about every pointer that might be used. -/// -/// \param kernel is the kernel to be launched -static pi_result USMSetIndirectAccess(pi_kernel kernel) { - // We test that each alloc type is supported before we actually try to - // set KernelExecInfo. - cl_bool TrueVal = CL_TRUE; - clHostMemAllocINTEL_fn HFunc = nullptr; - clSharedMemAllocINTEL_fn SFunc = nullptr; - clDeviceMemAllocINTEL_fn DFunc = nullptr; - cl_context CLContext; - cl_int CLErr = clGetKernelInfo(cast(kernel), CL_KERNEL_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return cast(CLErr); - } - - getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clHostMemAllocINTELCache, clHostMemAllocName, - &HFunc); - if (HFunc) { - clSetKernelExecInfo(cast(kernel), - CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal); - } - - getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clDeviceMemAllocINTELCache, - clDeviceMemAllocName, &DFunc); - if (DFunc) { - clSetKernelExecInfo(cast(kernel), - CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal); - } - - getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clSharedMemAllocINTELCache, - clSharedMemAllocName, &SFunc); - if (SFunc) { - clSetKernelExecInfo(cast(kernel), - CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal); - } - return PI_SUCCESS; +// Want all the needed casts be explicit, do not define conversion operators. +template To cast(From value) { + // TODO: see if more sanity checks are possible. + static_assert(sizeof(From) == sizeof(To), "cast failed size check"); + return (To)(value); } extern "C" { @@ -406,225 +140,6 @@ pi_result piextDeviceSelectBinary(pi_device device, pi_device_binary *images, return PI_ERROR_INVALID_BINARY; } -pi_result piextQueueCreate(pi_context Context, pi_device Device, - pi_queue_properties *Properties, pi_queue *Queue) { - assert(Properties); - // Expect flags mask to be passed first. - assert(Properties[0] == PI_QUEUE_FLAGS); - if (Properties[0] != PI_QUEUE_FLAGS) - return PI_ERROR_INVALID_VALUE; - pi_queue_properties Flags = Properties[1]; - // Extra data isn't supported yet. - assert(Properties[2] == 0); - if (Properties[2] != 0) - return PI_ERROR_INVALID_VALUE; - return piQueueCreate(Context, Device, Flags, Queue); -} -pi_result piQueueCreate(pi_context context, pi_device device, - pi_queue_properties properties, pi_queue *queue) { - assert(queue && "piQueueCreate failed, queue argument is null"); - - cl_platform_id curPlatform; - cl_int ret_err = - clGetDeviceInfo(cast(device), CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &curPlatform, nullptr); - - CHECK_ERR_SET_NULL_RET(ret_err, queue, ret_err); - - // Check that unexpected bits are not set. - assert(!(properties & - ~(PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE | - PI_QUEUE_FLAG_PROFILING_ENABLE | PI_QUEUE_FLAG_ON_DEVICE | - PI_QUEUE_FLAG_ON_DEVICE_DEFAULT | - PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS | - PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW | - PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH))); - - // Properties supported by OpenCL backend. - cl_command_queue_properties SupportByOpenCL = - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE | - CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; - - OCLV::OpenCLVersion version; - ret_err = getPlatformVersion(curPlatform, version); - - CHECK_ERR_SET_NULL_RET(ret_err, queue, ret_err); - - if (version >= OCLV::V2_0) { - *queue = cast(clCreateCommandQueue( - cast(context), cast(device), - cast(properties) & SupportByOpenCL, - &ret_err)); - return cast(ret_err); - } - - cl_queue_properties CreationFlagProperties[] = { - CL_QUEUE_PROPERTIES, - cast(properties) & SupportByOpenCL, 0}; - *queue = cast(clCreateCommandQueueWithProperties( - cast(context), cast(device), - CreationFlagProperties, &ret_err)); - return cast(ret_err); -} - -pi_result piQueueGetInfo(pi_queue queue, pi_queue_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - if (queue == nullptr) { - return PI_ERROR_INVALID_QUEUE; - } - - switch (param_name) { - case PI_EXT_ONEAPI_QUEUE_INFO_EMPTY: - // OpenCL doesn't provide API to check the status of the queue. - return PI_ERROR_INVALID_VALUE; - default: - cl_int CLErr = clGetCommandQueueInfo( - cast(queue), cast(param_name), - param_value_size, param_value, param_value_size_ret); - if (CLErr != CL_SUCCESS) { - return cast(CLErr); - } - } - return PI_SUCCESS; -} - -pi_result piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle, - int32_t NativeHandleDesc, pi_context, - pi_device, bool ownNativeHandle, - pi_queue_properties *Properties, - pi_queue *piQueue) { - (void)NativeHandleDesc; - (void)ownNativeHandle; - (void)Properties; - assert(piQueue != nullptr); - *piQueue = reinterpret_cast(nativeHandle); - clRetainCommandQueue(cast(nativeHandle)); - return PI_SUCCESS; -} - -pi_result piProgramCreate(pi_context context, const void *il, size_t length, - pi_program *res_program) { - cl_uint deviceCount; - cl_int ret_err = - clGetContextInfo(cast(context), CL_CONTEXT_NUM_DEVICES, - sizeof(cl_uint), &deviceCount, nullptr); - - std::vector devicesInCtx(deviceCount); - - if (ret_err != CL_SUCCESS || deviceCount < 1) { - if (res_program != nullptr) - *res_program = nullptr; - return cast(CL_INVALID_CONTEXT); - } - - ret_err = clGetContextInfo(cast(context), CL_CONTEXT_DEVICES, - deviceCount * sizeof(cl_device_id), - devicesInCtx.data(), nullptr); - - CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT); - - cl_platform_id curPlatform; - ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &curPlatform, nullptr); - - CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT); - - OCLV::OpenCLVersion platVer; - ret_err = getPlatformVersion(curPlatform, platVer); - - CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT); - - pi_result err = PI_SUCCESS; - if (platVer >= OCLV::V2_1) { - - /* Make sure all devices support CL 2.1 or newer as well. */ - for (cl_device_id dev : devicesInCtx) { - OCLV::OpenCLVersion devVer; - - ret_err = getDeviceVersion(dev, devVer); - CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT); - - /* If the device does not support CL 2.1 or greater, we need to make sure - * it supports the cl_khr_il_program extension. - */ - if (devVer < OCLV::V2_1) { - bool supported = false; - - ret_err = checkDeviceExtensions(dev, {"cl_khr_il_program"}, supported); - CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT); - - if (!supported) - return cast(CL_INVALID_OPERATION); - } - } - if (res_program != nullptr) - *res_program = cast(clCreateProgramWithIL( - cast(context), il, length, cast(&err))); - return err; - } - - /* If none of the devices conform with CL 2.1 or newer make sure they all - * support the cl_khr_il_program extension. - */ - for (cl_device_id dev : devicesInCtx) { - bool supported = false; - - ret_err = checkDeviceExtensions(dev, {"cl_khr_il_program"}, supported); - CHECK_ERR_SET_NULL_RET(ret_err, res_program, CL_INVALID_CONTEXT); - - if (!supported) - return cast(CL_INVALID_OPERATION); - } - - using apiFuncT = - cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *); - apiFuncT funcPtr = - reinterpret_cast(clGetExtensionFunctionAddressForPlatform( - curPlatform, "clCreateProgramWithILKHR")); - - assert(funcPtr != nullptr); - if (res_program != nullptr) - *res_program = cast( - funcPtr(cast(context), il, length, cast(&err))); - else - err = PI_ERROR_INVALID_VALUE; - - return err; -} - -pi_result piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_context, bool, - pi_program *piProgram) { - assert(piProgram != nullptr); - *piProgram = reinterpret_cast(nativeHandle); - return PI_SUCCESS; -} - -pi_result piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, - const pi_mem_obj_property *arg_properties, - const pi_mem *arg_value) { - std::ignore = arg_properties; - return cast( - clSetKernelArg(cast(kernel), cast(arg_index), - sizeof(arg_value), cast(arg_value))); -} - -pi_result piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index, - const pi_sampler *arg_value) { - return cast( - clSetKernelArg(cast(kernel), cast(arg_index), - sizeof(cl_sampler), cast(arg_value))); -} - -pi_result piextKernelCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_context, pi_program, bool, - pi_kernel *piKernel) { - assert(piKernel != nullptr); - *piKernel = reinterpret_cast(nativeHandle); - return PI_SUCCESS; -} - // Function gets characters between delimeter's in str // then checks if they are equal to the sub_str. // returns true if there is at least one instance @@ -663,10 +178,11 @@ pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program, if (ret_err != CL_SUCCESS) return cast(ret_err); - clGetDeviceFunctionPointer_fn FuncT = nullptr; - ret_err = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clGetDeviceFunctionPointerCache, - clGetDeviceFunctionPointerName, &FuncT); + cl_ext::clGetDeviceFunctionPointer_fn FuncT = nullptr; + ret_err = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clGetDeviceFunctionPointerCache, + cl_ext::clGetDeviceFunctionPointerName, &FuncT); pi_result pi_ret_err = PI_SUCCESS; @@ -715,757 +231,6 @@ pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program, return pi_ret_err; } -pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, - void *host_ptr, pi_mem *ret_mem, - const pi_mem_properties *properties) { - pi_result ret_err = PI_ERROR_INVALID_OPERATION; - if (properties) { - // TODO: need to check if all properties are supported by OpenCL RT and - // ignore unsupported - clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cast(context); - // First we need to look up the function pointer - ret_err = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clCreateBufferWithPropertiesINTELCache, - clCreateBufferWithPropertiesName, &FuncPtr); - if (FuncPtr) { - *ret_mem = - cast(FuncPtr(CLContext, properties, cast(flags), - size, host_ptr, cast(&ret_err))); - return ret_err; - } - } - - *ret_mem = cast(clCreateBuffer(cast(context), - cast(flags), size, - host_ptr, cast(&ret_err))); - return ret_err; -} - -pi_result piMemImageCreate(pi_context context, pi_mem_flags flags, - const pi_image_format *image_format, - const pi_image_desc *image_desc, void *host_ptr, - pi_mem *ret_mem) { - pi_result ret_err = PI_ERROR_INVALID_OPERATION; - *ret_mem = cast( - clCreateImage(cast(context), cast(flags), - cast(image_format), - cast(image_desc), host_ptr, - cast(&ret_err))); - - return ret_err; -} - -pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, - pi_buffer_create_type buffer_create_type, - void *buffer_create_info, pi_mem *ret_mem) { - - pi_result ret_err = PI_ERROR_INVALID_OPERATION; - *ret_mem = cast( - clCreateSubBuffer(cast(buffer), cast(flags), - cast(buffer_create_type), - buffer_create_info, cast(&ret_err))); - return ret_err; -} - -pi_result piextMemCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_context context, - bool ownNativeHandle, pi_mem *piMem) { - (void)context; - (void)ownNativeHandle; - assert(piMem != nullptr); - *piMem = reinterpret_cast(nativeHandle); - return PI_SUCCESS; -} - -pi_result piextMemImageCreateWithNativeHandle( - pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, - const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, - pi_mem *Img) { - (void)context; - (void)ownNativeHandle; - (void)ImageFormat; - (void)ImageDesc; - assert(Img != nullptr); - *Img = reinterpret_cast(nativeHandle); - return PI_SUCCESS; -} - -pi_result piProgramCreateWithBinary( - pi_context context, pi_uint32 num_devices, const pi_device *device_list, - const size_t *lengths, const unsigned char **binaries, - size_t num_metadata_entries, const pi_device_binary_property *metadata, - pi_int32 *binary_status, pi_program *ret_program) { - (void)metadata; - (void)num_metadata_entries; - - pi_result ret_err = PI_ERROR_INVALID_OPERATION; - *ret_program = cast(clCreateProgramWithBinary( - cast(context), cast(num_devices), - cast(device_list), lengths, binaries, - cast(binary_status), cast(&ret_err))); - return ret_err; -} - -pi_result piProgramLink(pi_context context, pi_uint32 num_devices, - const pi_device *device_list, const char *options, - pi_uint32 num_input_programs, - const pi_program *input_programs, - void (*pfn_notify)(pi_program program, void *user_data), - void *user_data, pi_program *ret_program) { - - pi_result ret_err = PI_ERROR_INVALID_OPERATION; - *ret_program = cast( - clLinkProgram(cast(context), cast(num_devices), - cast(device_list), options, - cast(num_input_programs), - cast(input_programs), - cast(pfn_notify), user_data, - cast(&ret_err))); - return ret_err; -} - -pi_result piKernelCreate(pi_program program, const char *kernel_name, - pi_kernel *ret_kernel) { - - pi_result ret_err = PI_ERROR_INVALID_OPERATION; - *ret_kernel = cast(clCreateKernel( - cast(program), kernel_name, cast(&ret_err))); - return ret_err; -} - -pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device, - pi_kernel_group_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - if (kernel == nullptr) { - return PI_ERROR_INVALID_KERNEL; - } - - switch (param_name) { - case PI_KERNEL_GROUP_INFO_NUM_REGS: - return PI_ERROR_INVALID_VALUE; - default: - cl_int result = clGetKernelWorkGroupInfo( - cast(kernel), cast(device), - cast(param_name), param_value_size, - param_value, param_value_size_ret); - return static_cast(result); - } -} - -pi_result piKernelGetSubGroupInfo(pi_kernel kernel, pi_device device, - pi_kernel_sub_group_info param_name, - size_t input_value_size, - const void *input_value, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - (void)param_value_size; - size_t ret_val; - cl_int ret_err; - - std::shared_ptr implicit_input_value; - if (param_name == PI_KERNEL_MAX_SUB_GROUP_SIZE && !input_value) { - // OpenCL needs an input value for PI_KERNEL_MAX_SUB_GROUP_SIZE so if no - // value is given we use the max work item size of the device in the first - // dimention to avoid truncation of max sub-group size. - pi_uint32 max_dims = 0; - pi_result pi_ret_err = - pi2ur::piDeviceGetInfo(device, PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS, - sizeof(pi_uint32), &max_dims, nullptr); - if (pi_ret_err != PI_SUCCESS) - return pi_ret_err; - std::shared_ptr WGSizes{new size_t[max_dims]}; - pi_ret_err = pi2ur::piDeviceGetInfo( - device, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, max_dims * sizeof(size_t), - WGSizes.get(), nullptr); - if (pi_ret_err != PI_SUCCESS) - return pi_ret_err; - for (size_t i = 1; i < max_dims; ++i) - WGSizes.get()[i] = 1; - implicit_input_value = std::move(WGSizes); - input_value_size = max_dims * sizeof(size_t); - input_value = implicit_input_value.get(); - } - - ret_err = cast(clGetKernelSubGroupInfo( - cast(kernel), cast(device), - cast(param_name), input_value_size, input_value, - sizeof(size_t), &ret_val, param_value_size_ret)); - - if (ret_err == CL_INVALID_OPERATION) { - // clGetKernelSubGroupInfo returns CL_INVALID_OPERATION if the device does - // not support subgroups. - - if (param_name == PI_KERNEL_MAX_NUM_SUB_GROUPS) { - ret_val = 1; // Minimum required by SYCL 2020 spec - ret_err = CL_SUCCESS; - } else if (param_name == PI_KERNEL_COMPILE_NUM_SUB_GROUPS) { - ret_val = 0; // Not specified by kernel - ret_err = CL_SUCCESS; - } else if (param_name == PI_KERNEL_MAX_SUB_GROUP_SIZE) { - // Return the maximum sub group size for the device - size_t result_size = 0; - // Two calls to piDeviceGetInfo are needed: the first determines the size - // required to store the result, and the second returns the actual size - // values. - pi_result pi_ret_err = - piDeviceGetInfo(device, PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, 0, - nullptr, &result_size); - if (pi_ret_err != PI_SUCCESS) { - return pi_ret_err; - } - assert(result_size % sizeof(size_t) == 0); - std::vector result(result_size / sizeof(size_t)); - pi_ret_err = piDeviceGetInfo(device, PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, - result_size, result.data(), nullptr); - if (pi_ret_err != PI_SUCCESS) { - return pi_ret_err; - } - ret_val = *std::max_element(result.begin(), result.end()); - ret_err = CL_SUCCESS; - } else if (param_name == PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL) { - ret_val = 0; // Not specified by kernel - ret_err = CL_SUCCESS; - } - } - - if (ret_err != CL_SUCCESS) - return cast(ret_err); - - *(static_cast(param_value)) = static_cast(ret_val); - if (param_value_size_ret) - *param_value_size_ret = sizeof(uint32_t); - return PI_SUCCESS; -} - -pi_result piEventCreate(pi_context context, pi_event *ret_event) { - - pi_result ret_err = PI_ERROR_INVALID_OPERATION; - auto *cl_err = cast(&ret_err); - - cl_event e = clCreateUserEvent(cast(context), cl_err); - *ret_event = cast(e); - if (*cl_err != CL_SUCCESS) - return ret_err; - *cl_err = clSetUserEventStatus(e, CL_COMPLETE); - return ret_err; -} - -pi_result piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_context context, - bool ownNativeHandle, - pi_event *piEvent) { - (void)context; - // TODO: ignore this, but eventually want to return error as unsupported - (void)ownNativeHandle; - - assert(piEvent != nullptr); - assert(nativeHandle); - assert(context); - - *piEvent = reinterpret_cast(nativeHandle); - return PI_SUCCESS; -} - -pi_result piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, - pi_bool blocking_map, pi_map_flags map_flags, - size_t offset, size_t size, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event, void **ret_map) { - - pi_result ret_err = PI_ERROR_INVALID_OPERATION; - *ret_map = cast(clEnqueueMapBuffer( - cast(command_queue), cast(buffer), - cast(blocking_map), map_flags, offset, size, - cast(num_events_in_wait_list), - cast(event_wait_list), cast(event), - cast(&ret_err))); - return ret_err; -} - -// -// USM -// - -/// Allocates host memory accessible by the device. -/// -/// \param result_ptr contains the allocated memory -/// \param context is the pi_context -/// \param pi_usm_mem_properties are optional allocation properties -/// \param size_t is the size of the allocation -/// \param alignment is the desired alignment of the allocation -pi_result piextUSMHostAlloc(void **result_ptr, pi_context context, - pi_usm_mem_properties *properties, size_t size, - pi_uint32 alignment) { - - void *Ptr = nullptr; - pi_result RetVal = PI_ERROR_INVALID_OPERATION; - - // First we need to look up the function pointer - clHostMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cast(context); - RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clHostMemAllocINTELCache, clHostMemAllocName, - &FuncPtr); - - if (FuncPtr) { - Ptr = FuncPtr(CLContext, cast(properties), size, - alignment, cast(&RetVal)); - } - - *result_ptr = Ptr; - - // ensure we aligned the allocation correctly - if (RetVal == PI_SUCCESS && alignment != 0) - assert(reinterpret_cast(*result_ptr) % alignment == 0 && - "allocation not aligned correctly"); - - return RetVal; -} - -/// Allocates device memory -/// -/// \param result_ptr contains the allocated memory -/// \param context is the pi_context -/// \param device is the device the memory will be allocated on -/// \param pi_usm_mem_properties are optional allocation properties -/// \param size_t is the size of the allocation -/// \param alignment is the desired alignment of the allocation -pi_result piextUSMDeviceAlloc(void **result_ptr, pi_context context, - pi_device device, - pi_usm_mem_properties *properties, size_t size, - pi_uint32 alignment) { - - void *Ptr = nullptr; - pi_result RetVal = PI_ERROR_INVALID_OPERATION; - - // First we need to look up the function pointer - clDeviceMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cast(context); - RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clDeviceMemAllocINTELCache, - clDeviceMemAllocName, &FuncPtr); - - if (FuncPtr) { - Ptr = FuncPtr(CLContext, cast(device), - cast(properties), size, alignment, - cast(&RetVal)); - } - - *result_ptr = Ptr; - - // ensure we aligned the allocation correctly - if (RetVal == PI_SUCCESS && alignment != 0) - assert(reinterpret_cast(*result_ptr) % alignment == 0 && - "allocation not aligned correctly"); - - return RetVal; -} - -/// Allocates memory accessible on both host and device -/// -/// \param result_ptr contains the allocated memory -/// \param context is the pi_context -/// \param device is the device the memory will be allocated on -/// \param pi_usm_mem_properties are optional allocation properties -/// \param size_t is the size of the allocation -/// \param alignment is the desired alignment of the allocation -pi_result piextUSMSharedAlloc(void **result_ptr, pi_context context, - pi_device device, - pi_usm_mem_properties *properties, size_t size, - pi_uint32 alignment) { - - void *Ptr = nullptr; - pi_result RetVal = PI_ERROR_INVALID_OPERATION; - - // First we need to look up the function pointer - clSharedMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cast(context); - RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clSharedMemAllocINTELCache, - clSharedMemAllocName, &FuncPtr); - - if (FuncPtr) { - Ptr = FuncPtr(cast(context), cast(device), - cast(properties), size, alignment, - cast(&RetVal)); - } - - *result_ptr = Ptr; - - assert(alignment == 0 || - (RetVal == PI_SUCCESS && - reinterpret_cast(*result_ptr) % alignment == 0)); - return RetVal; -} - -/// Frees allocated USM memory in a blocking manner -/// -/// \param context is the pi_context of the allocation -/// \param ptr is the memory to be freed -pi_result piextUSMFree(pi_context context, void *ptr) { - // Use a blocking free to avoid issues with indirect access from kernels that - // might be still running. - clMemBlockingFreeINTEL_fn FuncPtr = nullptr; - - cl_context CLContext = cast(context); - pi_result RetVal = PI_ERROR_INVALID_OPERATION; - RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clMemBlockingFreeINTELCache, - clMemBlockingFreeName, &FuncPtr); - - if (FuncPtr) { - RetVal = cast(FuncPtr(CLContext, ptr)); - } - - return RetVal; -} - -/// Sets up pointer arguments for CL kernels. An extra indirection -/// is required due to CL argument conventions. -/// -/// \param kernel is the kernel to be launched -/// \param arg_index is the index of the kernel argument -/// \param arg_size is the size in bytes of the argument (ignored in CL) -/// \param arg_value is the pointer argument -pi_result piextKernelSetArgPointer(pi_kernel kernel, pi_uint32 arg_index, - size_t arg_size, const void *arg_value) { - (void)arg_size; - - // Size is unused in CL as pointer args are passed by value. - - // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = clGetKernelInfo(cast(kernel), CL_KERNEL_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return cast(CLErr); - } - - clSetKernelArgMemPointerINTEL_fn FuncPtr = nullptr; - pi_result RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clSetKernelArgMemPointerINTELCache, - clSetKernelArgMemPointerName, &FuncPtr); - - if (FuncPtr) { - // OpenCL passes pointers by value not by reference - // This means we need to deref the arg to get the pointer value - auto PtrToPtr = reinterpret_cast(arg_value); - auto DerefPtr = reinterpret_cast(*PtrToPtr); - RetVal = - cast(FuncPtr(cast(kernel), arg_index, DerefPtr)); - } - - return RetVal; -} - -/// USM Memset API -/// -/// \param queue is the queue to submit to -/// \param ptr is the ptr to memset -/// \param value is value to set. It is interpreted as an 8-bit value and the -/// upper 24 bits are ignored -/// \param count is the size in bytes to memset -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, - size_t count, pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - - // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = - clGetCommandQueueInfo(cast(queue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return cast(CLErr); - } - - clEnqueueMemFillINTEL_fn FuncPtr = nullptr; - pi_result RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clEnqueueMemFillINTELCache, - clEnqueueMemFillName, &FuncPtr); - - if (FuncPtr) { - RetVal = cast(FuncPtr(cast(queue), ptr, &value, - 1, count, num_events_in_waitlist, - cast(events_waitlist), - cast(event))); - } - - return RetVal; -} - -/// USM Memcpy API -/// -/// \param queue is the queue to submit to -/// \param blocking is whether this operation should block the host -/// \param src_ptr is the data to be copied -/// \param dst_ptr is the location the data will be copied -/// \param size is number of bytes to copy -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr, - const void *src_ptr, size_t size, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - - // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = - clGetCommandQueueInfo(cast(queue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return cast(CLErr); - } - - clEnqueueMemcpyINTEL_fn FuncPtr = nullptr; - pi_result RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clEnqueueMemcpyINTELCache, - clEnqueueMemcpyName, &FuncPtr); - - if (FuncPtr) { - RetVal = cast( - FuncPtr(cast(queue), blocking, dst_ptr, src_ptr, size, - num_events_in_waitlist, cast(events_waitlist), - cast(event))); - } - - return RetVal; -} - -/// Hint to migrate memory to the device -/// -/// \param queue is the queue to submit to -/// \param ptr points to the memory to migrate -/// \param size is the number of bytes to migrate -/// \param flags is a bitfield used to specify memory migration options -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size, - pi_usm_migration_flags flags, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - (void)ptr; - (void)size; - - // flags is currently unused so fail if set - if (flags != 0) - return PI_ERROR_INVALID_VALUE; - - return cast(clEnqueueMarkerWithWaitList( - cast(queue), num_events_in_waitlist, - cast(events_waitlist), cast(event))); - - /* - // Use this once impls support it. - // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = - clGetCommandQueueInfo(cast(queue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return cast(CLErr); - } - - clEnqueueMigrateMemINTEL_fn FuncPtr; - pi_result Err = getExtFuncFromContext( - cast(CLContext), "clEnqueueMigrateMemINTEL", &FuncPtr); - - if (Err != PI_SUCCESS) { - RetVal = Err; - } else { - RetVal = cast(FuncPtr( - cast(queue), ptr, size, flags, num_events_in_waitlist, - reinterpret_cast(events_waitlist), - reinterpret_cast(event))); - } - */ -} - -/// USM Memadvise API -/// -/// \param queue is the queue to submit to -/// \param ptr is the data to be advised -/// \param length is the size in bytes of the meory to advise -/// \param advice is device specific advice -/// \param event is the event that represents this operation -// USM memadvise API to govern behavior of automatic migration mechanisms -pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, - size_t length, pi_mem_advice advice, - pi_event *event) { - (void)ptr; - (void)length; - (void)advice; - - return cast( - clEnqueueMarkerWithWaitList(cast(queue), 0, nullptr, - reinterpret_cast(event))); - - /* - // Change to use this once drivers support it. - - // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = clGetCommandQueueInfo(cast(queue), - CL_QUEUE_CONTEXT, - sizeof(cl_context), - &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return cast(CLErr); - } - - clEnqueueMemAdviseINTEL_fn FuncPtr; - pi_result Err = - getExtFuncFromContext( - cast(CLContext), "clEnqueueMemAdviseINTEL", &FuncPtr); - - if (Err != PI_SUCCESS) { - RetVal = Err; - } else { - RetVal = cast(FuncPtr(cast(queue), - ptr, length, advice, 0, nullptr, - reinterpret_cast(event))); - } - */ -} - -/// USM 2D Fill API -/// -/// \param queue is the queue to submit to -/// \param ptr is the ptr to fill -/// \param pattern is a pointer with the bytes of the pattern to set -/// \param pattern_size is the size in bytes of the pattern -/// \param pitch is the total width of the destination memory including padding -/// \param width is width in bytes of each row to fill -/// \param height is height the columns to fill -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueFill2D(pi_queue queue, void *ptr, - size_t pitch, size_t pattern_size, - const void *pattern, size_t width, - size_t height, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - std::ignore = queue; - std::ignore = ptr; - std::ignore = pitch; - std::ignore = pattern_size; - std::ignore = pattern; - std::ignore = width; - std::ignore = height; - std::ignore = num_events_in_waitlist; - std::ignore = events_waitlist; - std::ignore = event; - return PI_ERROR_INVALID_OPERATION; -} - -/// USM 2D Memset API -/// -/// \param queue is the queue to submit to -/// \param ptr is the ptr to memset -/// \param value contains the byte to set with -/// \param pitch is the total width of the destination memory including padding -/// \param width is width in bytes of each row to memset -/// \param height is height the columns to memset -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueMemset2D( - pi_queue queue, void *ptr, size_t pitch, int value, size_t width, - size_t height, pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, pi_event *event) { - std::ignore = queue; - std::ignore = ptr; - std::ignore = pitch; - std::ignore = value; - std::ignore = width; - std::ignore = height; - std::ignore = num_events_in_waitlist; - std::ignore = events_waitlist; - std::ignore = event; - return PI_ERROR_INVALID_OPERATION; -} - -/// USM 2D Memcpy API -/// -/// \param queue is the queue to submit to -/// \param blocking is whether this operation should block the host -/// \param dst_ptr is the location the data will be copied -/// \param dst_pitch is the total width of the destination memory including -/// padding -/// \param src_ptr is the data to be copied -/// \param dst_pitch is the total width of the source memory including padding -/// \param width is width in bytes of each row to be copied -/// \param height is height the columns to be copied -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueMemcpy2D( - pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch, - const void *src_ptr, size_t src_pitch, size_t width, size_t height, - pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, - pi_event *event) { - std::ignore = queue; - std::ignore = blocking; - std::ignore = dst_ptr; - std::ignore = dst_pitch; - std::ignore = src_ptr; - std::ignore = src_pitch; - std::ignore = width; - std::ignore = height; - std::ignore = num_events_in_waitlist; - std::ignore = events_waitlist; - std::ignore = event; - return PI_ERROR_INVALID_OPERATION; -} - -/// API to query information about USM allocated pointers -/// Valid Queries: -/// PI_MEM_ALLOC_TYPE returns host/device/shared pi_host_usm value -/// PI_MEM_ALLOC_BASE_PTR returns the base ptr of an allocation if -/// the queried pointer fell inside an allocation. -/// Result must fit in void * -/// PI_MEM_ALLOC_SIZE returns how big the queried pointer's -/// allocation is in bytes. Result is a size_t. -/// PI_MEM_ALLOC_DEVICE returns the pi_device this was allocated against -/// -/// \param context is the pi_context -/// \param ptr is the pointer to query -/// \param param_name is the type of query to perform -/// \param param_value_size is the size of the result in bytes -/// \param param_value is the result -/// \param param_value_ret is how many bytes were written -pi_result piextUSMGetMemAllocInfo(pi_context context, const void *ptr, - pi_mem_alloc_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - - clGetMemAllocInfoINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cast(context); - pi_result RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clGetMemAllocInfoINTELCache, - clGetMemAllocInfoName, &FuncPtr); - - if (FuncPtr) { - RetVal = cast(FuncPtr(cast(context), ptr, param_name, - param_value_size, param_value, - param_value_size_ret)); - } - - return RetVal; -} - pi_result piextUSMImport(const void *ptr, size_t size, pi_context context) { std::ignore = ptr; std::ignore = size; @@ -1479,515 +244,6 @@ pi_result piextUSMRelease(const void *ptr, pi_context context) { return PI_SUCCESS; } -/// API for writing data from host to a device global variable. -/// -/// \param queue is the queue -/// \param program is the program containing the device global variable -/// \param name is the unique identifier for the device global variable -/// \param blocking_write is true if the write should block -/// \param count is the number of bytes to copy -/// \param offset is the byte offset into the device global variable to start -/// copying -/// \param src is a pointer to where the data must be copied from -/// \param num_events_in_wait_list is a number of events in the wait list -/// \param event_wait_list is the wait list -/// \param event is the resulting event -pi_result piextEnqueueDeviceGlobalVariableWrite( - pi_queue queue, pi_program program, const char *name, - pi_bool blocking_write, size_t count, size_t offset, const void *src, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event) { - cl_context Ctx = nullptr; - cl_int Res = - clGetCommandQueueInfo(cast(queue), CL_QUEUE_CONTEXT, - sizeof(Ctx), &Ctx, nullptr); - - if (Res != CL_SUCCESS) - return cast(Res); - - clEnqueueWriteGlobalVariable_fn F = nullptr; - Res = getExtFuncFromContext( - Ctx, ExtFuncPtrCache->clEnqueueWriteGlobalVariableCache, - clEnqueueWriteGlobalVariableName, &F); - - if (!F || Res != CL_SUCCESS) - return PI_ERROR_INVALID_OPERATION; - Res = F(cast(queue), cast(program), name, - blocking_write, count, offset, src, num_events_in_wait_list, - cast(event_wait_list), cast(event)); - return cast(Res); -} - -/// API reading data from a device global variable to host. -/// -/// \param queue is the queue -/// \param program is the program containing the device global variable -/// \param name is the unique identifier for the device global variable -/// \param blocking_read is true if the read should block -/// \param count is the number of bytes to copy -/// \param offset is the byte offset into the device global variable to start -/// copying -/// \param dst is a pointer to where the data must be copied to -/// \param num_events_in_wait_list is a number of events in the wait list -/// \param event_wait_list is the wait list -/// \param event is the resulting event -pi_result piextEnqueueDeviceGlobalVariableRead( - pi_queue queue, pi_program program, const char *name, pi_bool blocking_read, - size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { - cl_context Ctx = nullptr; - cl_int Res = - clGetCommandQueueInfo(cast(queue), CL_QUEUE_CONTEXT, - sizeof(Ctx), &Ctx, nullptr); - - if (Res != CL_SUCCESS) - return cast(Res); - - clEnqueueReadGlobalVariable_fn F = nullptr; - Res = getExtFuncFromContext( - Ctx, ExtFuncPtrCache->clEnqueueReadGlobalVariableCache, - clEnqueueReadGlobalVariableName, &F); - - if (!F || Res != CL_SUCCESS) - return PI_ERROR_INVALID_OPERATION; - Res = F(cast(queue), cast(program), name, - blocking_read, count, offset, dst, num_events_in_wait_list, - cast(event_wait_list), cast(event)); - return cast(Res); -} - -pi_result piextEnqueueReadHostPipe(pi_queue queue, pi_program program, - const char *pipe_symbol, pi_bool blocking, - void *ptr, size_t size, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - cl_context CLContext; - cl_int CLErr = - clGetCommandQueueInfo(cast(queue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return cast(CLErr); - } - - clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; - pi_result RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clEnqueueReadHostPipeINTELCache, - clEnqueueReadHostPipeName, &FuncPtr); - - if (FuncPtr) { - RetVal = cast(FuncPtr( - cast(queue), cast(program), pipe_symbol, - blocking, ptr, size, num_events_in_waitlist, - cast(events_waitlist), cast(event))); - } - - return RetVal; -} - -pi_result piextEnqueueWriteHostPipe(pi_queue queue, pi_program program, - const char *pipe_symbol, pi_bool blocking, - void *ptr, size_t size, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { - cl_context CLContext; - cl_int CLErr = - clGetCommandQueueInfo(cast(queue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return cast(CLErr); - } - - clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; - pi_result RetVal = getExtFuncFromContext( - CLContext, ExtFuncPtrCache->clEnqueueWriteHostPipeINTELCache, - clEnqueueWriteHostPipeName, &FuncPtr); - - if (FuncPtr) { - RetVal = cast(FuncPtr( - cast(queue), cast(program), pipe_symbol, - blocking, ptr, size, num_events_in_waitlist, - cast(events_waitlist), cast(event))); - } - - return RetVal; -} - -/// API to set attributes controlling kernel execution -/// -/// \param kernel is the pi kernel to execute -/// \param param_name is a pi_kernel_exec_info value that specifies the info -/// passed to the kernel -/// \param param_value_size is the size of the value in bytes -/// \param param_value is a pointer to the value to set for the kernel -/// -/// If param_name is PI_USM_INDIRECT_ACCESS, the value will be a ptr to -/// the pi_bool value PI_TRUE -/// If param_name is PI_USM_PTRS, the value will be an array of ptrs -pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info param_name, - size_t param_value_size, - const void *param_value) { - if (param_name == PI_USM_INDIRECT_ACCESS && - *(static_cast(param_value)) == PI_TRUE) { - return USMSetIndirectAccess(kernel); - } else { - return cast(clSetKernelExecInfo( - cast(kernel), param_name, param_value_size, param_value)); - } -} - -pi_result piextProgramSetSpecializationConstant(pi_program prog, - pi_uint32 spec_id, - size_t spec_size, - const void *spec_value) { - cl_program ClProg = cast(prog); - cl_context Ctx = nullptr; - size_t RetSize = 0; - cl_int Res = - clGetProgramInfo(ClProg, CL_PROGRAM_CONTEXT, sizeof(Ctx), &Ctx, &RetSize); - - if (Res != CL_SUCCESS) - return cast(Res); - - clSetProgramSpecializationConstant_fn F = nullptr; - Res = getExtFuncFromContext( - Ctx, ExtFuncPtrCache->clSetProgramSpecializationConstantCache, - clSetProgramSpecializationConstantName, &F); - - if (!F || Res != CL_SUCCESS) - return PI_ERROR_INVALID_OPERATION; - Res = F(ClProg, spec_id, spec_size, spec_value); - return cast(Res); -} - -/// Common API for getting the native handle of a PI object -/// -/// \param piObj is the pi object to get the native handle of -/// \param nativeHandle is a pointer to be set to the native handle -/// -/// PI_SUCCESS -static pi_result piextGetNativeHandle(void *piObj, - pi_native_handle *nativeHandle) { - assert(nativeHandle != nullptr); - *nativeHandle = reinterpret_cast(piObj); - return PI_SUCCESS; -} - -pi_result piextQueueGetNativeHandle(pi_queue queue, - pi_native_handle *nativeHandle, - int32_t *nativeHandleDesc) { - *nativeHandleDesc = 0; - return piextGetNativeHandle(queue, nativeHandle); -} - -pi_result piextMemGetNativeHandle(pi_mem mem, pi_native_handle *nativeHandle) { - return piextGetNativeHandle(mem, nativeHandle); -} - -pi_result piextProgramGetNativeHandle(pi_program program, - pi_native_handle *nativeHandle) { - return piextGetNativeHandle(program, nativeHandle); -} - -pi_result piextKernelGetNativeHandle(pi_kernel kernel, - pi_native_handle *nativeHandle) { - return piextGetNativeHandle(kernel, nativeHandle); -} - -// command-buffer extension -pi_result piextCommandBufferCreate(pi_context context, pi_device device, - const pi_ext_command_buffer_desc *desc, - pi_ext_command_buffer *ret_command_buffer) { - (void)context; - (void)device; - (void)desc; - (void)ret_command_buffer; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; -} - -pi_result piextCommandBufferRetain(pi_ext_command_buffer command_buffer) { - (void)command_buffer; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; -} - -pi_result piextCommandBufferRelease(pi_ext_command_buffer command_buffer) { - (void)command_buffer; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; -} - -pi_result piextCommandBufferFinalize(pi_ext_command_buffer command_buffer) { - (void)command_buffer; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; -} - -pi_result piextCommandBufferNDRangeKernel( - pi_ext_command_buffer command_buffer, pi_kernel kernel, pi_uint32 work_dim, - const size_t *global_work_offset, const size_t *global_work_size, - const size_t *local_work_size, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - (void)command_buffer; - (void)kernel; - (void)work_dim; - (void)global_work_offset; - (void)global_work_size; - (void)local_work_size; - (void)num_sync_points_in_wait_list; - (void)sync_point_wait_list; - (void)sync_point; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; -} - -pi_result -piextCommandBufferMemcpyUSM(pi_ext_command_buffer command_buffer, void *dst_ptr, - const void *src_ptr, size_t size, - pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - (void)command_buffer; - (void)dst_ptr; - (void)src_ptr; - (void)size; - (void)num_sync_points_in_wait_list; - (void)sync_point_wait_list; - (void)sync_point; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; -} - -pi_result piextCommandBufferMemBufferCopy( - pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, - size_t src_offset, size_t dst_offset, size_t size, - pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - (void)command_buffer; - (void)src_buffer; - (void)dst_buffer; - (void)src_offset; - (void)dst_offset; - (void)size; - (void)num_sync_points_in_wait_list; - (void)sync_point_wait_list; - (void)sync_point; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; -} - -pi_result piextCommandBufferMemBufferCopyRect( - pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer, - pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, - pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, - size_t dst_row_pitch, size_t dst_slice_pitch, - pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - (void)command_buffer; - (void)src_buffer; - (void)dst_buffer; - (void)src_origin; - (void)dst_origin; - (void)region; - (void)src_row_pitch; - (void)src_slice_pitch; - (void)dst_row_pitch; - (void)dst_slice_pitch; - (void)num_sync_points_in_wait_list; - (void)sync_point_wait_list; - (void)sync_point; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; -} - -pi_result piextCommandBufferMemBufferRead( - pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, - size_t size, void *dst, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - (void)command_buffer; - (void)buffer; - (void)offset; - (void)size; - (void)dst; - (void)num_sync_points_in_wait_list; - (void)sync_point_wait_list; - (void)sync_point; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; -} - -pi_result piextCommandBufferMemBufferReadRect( - pi_ext_command_buffer command_buffer, pi_mem buffer, - pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, - pi_buff_rect_region region, size_t buffer_row_pitch, - size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, - void *ptr, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - (void)command_buffer; - (void)buffer; - (void)buffer_offset; - (void)host_offset; - (void)region; - (void)buffer_row_pitch; - (void)buffer_slice_pitch; - (void)host_row_pitch; - (void)host_slice_pitch; - (void)ptr; - (void)num_sync_points_in_wait_list; - (void)sync_point_wait_list; - (void)sync_point; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; -} - -pi_result piextCommandBufferMemBufferWrite( - pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset, - size_t size, const void *ptr, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - (void)command_buffer; - (void)buffer; - (void)offset; - (void)size; - (void)ptr; - (void)num_sync_points_in_wait_list; - (void)sync_point_wait_list; - (void)sync_point; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; -} - -pi_result piextCommandBufferMemBufferWriteRect( - pi_ext_command_buffer command_buffer, pi_mem buffer, - pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, - pi_buff_rect_region region, size_t buffer_row_pitch, - size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, - const void *ptr, pi_uint32 num_sync_points_in_wait_list, - const pi_ext_sync_point *sync_point_wait_list, - pi_ext_sync_point *sync_point) { - (void)command_buffer; - (void)buffer; - (void)buffer_offset; - (void)host_offset; - (void)region; - (void)buffer_row_pitch; - (void)buffer_slice_pitch; - (void)host_row_pitch; - (void)host_slice_pitch; - (void)ptr; - (void)num_sync_points_in_wait_list; - (void)sync_point_wait_list; - (void)sync_point; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; -} - -pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer command_buffer, - pi_queue queue, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { - (void)command_buffer; - (void)queue; - (void)num_events_in_wait_list; - (void)event_wait_list; - (void)event; - - // Not implemented - return PI_ERROR_INVALID_OPERATION; -} - -// This API is called by Sycl RT to notify the end of the plugin lifetime. -// Windows: dynamically loaded plugins might have been unloaded already -// when this is called. Sycl RT holds onto the PI plugin so it can be -// called safely. But this is not transitive. If the PI plugin in turn -// dynamically loaded a different DLL, that may have been unloaded. -// TODO: add a global variable lifetime management code here (see -// pi_level_zero.cpp for reference). -pi_result piTearDown(void *PluginParameter) { - (void)PluginParameter; - delete ExtFuncPtrCache; - ExtFuncPtrCache = nullptr; - return PI_SUCCESS; -} - -pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, - uint64_t *HostTime) { - OCLV::OpenCLVersion devVer, platVer; - cl_platform_id platform; - cl_device_id deviceID = cast(Device); - - // TODO: Cache OpenCL version for each device and platform - auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &platform, nullptr); - if (ret_err != CL_SUCCESS) { - return cast(ret_err); - } - - ret_err = getDeviceVersion(deviceID, devVer); - - if (ret_err != CL_SUCCESS) { - return cast(ret_err); - } - - ret_err = getPlatformVersion(platform, platVer); - - if (platVer < OCLV::V2_1 || devVer < OCLV::V2_1) { - setErrorMessage( - "OpenCL version for device and/or platform is less than 2.1", - UR_RESULT_ERROR_INVALID_OPERATION); - return PI_ERROR_INVALID_OPERATION; - } - - if (DeviceTime) { - uint64_t dummy; - clGetDeviceAndHostTimer(deviceID, DeviceTime, - HostTime == nullptr ? &dummy : HostTime); - - } else if (HostTime) { - clGetHostTimer(deviceID, HostTime); - } - - return PI_SUCCESS; -} - -pi_result piEventGetInfo(pi_event event, pi_event_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - cl_int result = - clGetEventInfo(reinterpret_cast(event), param_name, - param_value_size, param_value, param_value_size_ret); - if (result == CL_SUCCESS && param_name == CL_EVENT_COMMAND_EXECUTION_STATUS) { - // If the CL_EVENT_COMMAND_EXECUTION_STATUS info value is CL_QUEUED, change - // it to CL_SUBMITTED. This change is needed since - // sycl::info::event::event_command_status has no equivalent to CL_QUEUED. - const auto param_value_int = static_cast(param_value); - if (*param_value_int == CL_QUEUED) { - *param_value_int = CL_SUBMITTED; - } - } - return static_cast(result); -} - const char SupportedVersion[] = _PI_OPENCL_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { @@ -2029,131 +285,138 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextContextCreateWithNativeHandle, pi2ur::piextContextCreateWithNativeHandle) // Queue - _PI_CL(piQueueCreate, piQueueCreate) - _PI_CL(piextQueueCreate, piextQueueCreate) - _PI_CL(piQueueGetInfo, piQueueGetInfo) - _PI_CL(piQueueFinish, clFinish) - _PI_CL(piQueueFlush, clFlush) - _PI_CL(piQueueRetain, clRetainCommandQueue) - _PI_CL(piQueueRelease, clReleaseCommandQueue) - _PI_CL(piextQueueGetNativeHandle, piextQueueGetNativeHandle) - _PI_CL(piextQueueCreateWithNativeHandle, piextQueueCreateWithNativeHandle) + _PI_CL(piQueueCreate, pi2ur::piQueueCreate) + _PI_CL(piextQueueCreate, pi2ur::piextQueueCreate) + _PI_CL(piQueueGetInfo, pi2ur::piQueueGetInfo) + _PI_CL(piQueueFinish, pi2ur::piQueueFinish) + _PI_CL(piQueueFlush, pi2ur::piQueueFlush) + _PI_CL(piQueueRetain, pi2ur::piQueueRetain) + _PI_CL(piQueueRelease, pi2ur::piQueueRelease) + _PI_CL(piextQueueGetNativeHandle, pi2ur::piextQueueGetNativeHandle) + _PI_CL(piextQueueCreateWithNativeHandle, + pi2ur::piextQueueCreateWithNativeHandle) // Memory - _PI_CL(piMemBufferCreate, piMemBufferCreate) - _PI_CL(piMemImageCreate, piMemImageCreate) - _PI_CL(piMemGetInfo, clGetMemObjectInfo) - _PI_CL(piMemImageGetInfo, clGetImageInfo) - _PI_CL(piMemRetain, clRetainMemObject) - _PI_CL(piMemRelease, clReleaseMemObject) - _PI_CL(piMemBufferPartition, piMemBufferPartition) - _PI_CL(piextMemGetNativeHandle, piextMemGetNativeHandle) - _PI_CL(piextMemCreateWithNativeHandle, piextMemCreateWithNativeHandle) + _PI_CL(piMemBufferCreate, pi2ur::piMemBufferCreate) + _PI_CL(piMemImageCreate, pi2ur::piMemImageCreate) + _PI_CL(piMemGetInfo, pi2ur::piMemGetInfo) + _PI_CL(piMemImageGetInfo, pi2ur::piMemImageGetInfo) + _PI_CL(piMemRetain, pi2ur::piMemRetain) + _PI_CL(piMemRelease, pi2ur::piMemRelease) + _PI_CL(piMemBufferPartition, pi2ur::piMemBufferPartition) + _PI_CL(piextMemGetNativeHandle, pi2ur::piextMemGetNativeHandle) + _PI_CL(piextMemCreateWithNativeHandle, pi2ur::piextMemCreateWithNativeHandle) + _PI_CL(piextMemImageCreateWithNativeHandle, + pi2ur::piextMemImageCreateWithNativeHandle) // Program - _PI_CL(piProgramCreate, piProgramCreate) - _PI_CL(piProgramCreateWithBinary, piProgramCreateWithBinary) - _PI_CL(piProgramGetInfo, clGetProgramInfo) - _PI_CL(piProgramCompile, clCompileProgram) - _PI_CL(piProgramBuild, clBuildProgram) - _PI_CL(piProgramLink, piProgramLink) - _PI_CL(piProgramGetBuildInfo, clGetProgramBuildInfo) - _PI_CL(piProgramRetain, clRetainProgram) - _PI_CL(piProgramRelease, clReleaseProgram) + _PI_CL(piProgramCreate, pi2ur::piProgramCreate) + _PI_CL(piProgramCreateWithBinary, pi2ur::piProgramCreateWithBinary) + _PI_CL(piProgramGetInfo, pi2ur::piProgramGetInfo) + _PI_CL(piProgramCompile, pi2ur::piProgramCompile) + _PI_CL(piProgramBuild, pi2ur::piProgramBuild) + _PI_CL(piProgramLink, pi2ur::piProgramLink) + _PI_CL(piProgramGetBuildInfo, pi2ur::piProgramGetBuildInfo) + _PI_CL(piProgramRetain, pi2ur::piProgramRetain) + _PI_CL(piProgramRelease, pi2ur::piProgramRelease) _PI_CL(piextProgramSetSpecializationConstant, - piextProgramSetSpecializationConstant) - _PI_CL(piextProgramGetNativeHandle, piextProgramGetNativeHandle) - _PI_CL(piextProgramCreateWithNativeHandle, piextProgramCreateWithNativeHandle) + pi2ur::piextProgramSetSpecializationConstant) + _PI_CL(piextProgramGetNativeHandle, pi2ur::piextProgramGetNativeHandle) + _PI_CL(piextProgramCreateWithNativeHandle, + pi2ur::piextProgramCreateWithNativeHandle) // Kernel - _PI_CL(piKernelCreate, piKernelCreate) - _PI_CL(piKernelSetArg, clSetKernelArg) - _PI_CL(piKernelGetInfo, clGetKernelInfo) - _PI_CL(piKernelGetGroupInfo, piKernelGetGroupInfo) - _PI_CL(piKernelGetSubGroupInfo, piKernelGetSubGroupInfo) - _PI_CL(piKernelRetain, clRetainKernel) - _PI_CL(piKernelRelease, clReleaseKernel) - _PI_CL(piKernelSetExecInfo, piKernelSetExecInfo) - _PI_CL(piextKernelSetArgPointer, piextKernelSetArgPointer) - _PI_CL(piextKernelCreateWithNativeHandle, piextKernelCreateWithNativeHandle) - _PI_CL(piextKernelGetNativeHandle, piextKernelGetNativeHandle) + _PI_CL(piKernelCreate, pi2ur::piKernelCreate) + _PI_CL(piKernelSetArg, pi2ur::piKernelSetArg) + _PI_CL(piKernelGetInfo, pi2ur::piKernelGetInfo) + _PI_CL(piKernelGetGroupInfo, pi2ur::piKernelGetGroupInfo) + _PI_CL(piKernelGetSubGroupInfo, pi2ur::piKernelGetSubGroupInfo) + _PI_CL(piKernelRetain, pi2ur::piKernelRetain) + _PI_CL(piKernelRelease, pi2ur::piKernelRelease) + _PI_CL(piKernelSetExecInfo, pi2ur::piKernelSetExecInfo) + _PI_CL(piextKernelSetArgPointer, pi2ur::piextKernelSetArgPointer) + _PI_CL(piextKernelCreateWithNativeHandle, + pi2ur::piextKernelCreateWithNativeHandle) + _PI_CL(piextKernelGetNativeHandle, pi2ur::piextKernelGetNativeHandle) // Event - _PI_CL(piEventCreate, piEventCreate) - _PI_CL(piEventGetInfo, piEventGetInfo) - _PI_CL(piEventGetProfilingInfo, clGetEventProfilingInfo) - _PI_CL(piEventsWait, clWaitForEvents) - _PI_CL(piEventSetCallback, clSetEventCallback) - _PI_CL(piEventSetStatus, clSetUserEventStatus) - _PI_CL(piEventRetain, clRetainEvent) - _PI_CL(piEventRelease, clReleaseEvent) - _PI_CL(piextEventGetNativeHandle, piextGetNativeHandle) - _PI_CL(piextEventCreateWithNativeHandle, piextEventCreateWithNativeHandle) + _PI_CL(piEventCreate, pi2ur::piEventCreate) + _PI_CL(piEventGetInfo, pi2ur::piEventGetInfo) + _PI_CL(piEventGetProfilingInfo, pi2ur::piEventGetProfilingInfo) + _PI_CL(piEventsWait, pi2ur::piEventsWait) + _PI_CL(piEventSetCallback, pi2ur::piEventSetCallback) + _PI_CL(piEventSetStatus, pi2ur::piEventSetStatus) + _PI_CL(piEventRetain, pi2ur::piEventRetain) + _PI_CL(piEventRelease, pi2ur::piEventRelease) + _PI_CL(piextEventGetNativeHandle, pi2ur::piextEventGetNativeHandle) + _PI_CL(piextEventCreateWithNativeHandle, + pi2ur::piextEventCreateWithNativeHandle) // Sampler _PI_CL(piSamplerCreate, pi2ur::piSamplerCreate) _PI_CL(piSamplerGetInfo, pi2ur::piSamplerGetInfo) _PI_CL(piSamplerRetain, pi2ur::piSamplerRetain) _PI_CL(piSamplerRelease, pi2ur::piSamplerRelease) // Queue commands - _PI_CL(piEnqueueKernelLaunch, clEnqueueNDRangeKernel) - _PI_CL(piEnqueueEventsWait, clEnqueueMarkerWithWaitList) - _PI_CL(piEnqueueEventsWaitWithBarrier, clEnqueueBarrierWithWaitList) - _PI_CL(piEnqueueMemBufferRead, clEnqueueReadBuffer) - _PI_CL(piEnqueueMemBufferReadRect, clEnqueueReadBufferRect) - _PI_CL(piEnqueueMemBufferWrite, clEnqueueWriteBuffer) - _PI_CL(piEnqueueMemBufferWriteRect, clEnqueueWriteBufferRect) - _PI_CL(piEnqueueMemBufferCopy, clEnqueueCopyBuffer) - _PI_CL(piEnqueueMemBufferCopyRect, clEnqueueCopyBufferRect) - _PI_CL(piEnqueueMemBufferFill, clEnqueueFillBuffer) - _PI_CL(piEnqueueMemImageRead, clEnqueueReadImage) - _PI_CL(piEnqueueMemImageWrite, clEnqueueWriteImage) - _PI_CL(piEnqueueMemImageCopy, clEnqueueCopyImage) - _PI_CL(piEnqueueMemImageFill, clEnqueueFillImage) - _PI_CL(piEnqueueMemBufferMap, piEnqueueMemBufferMap) - _PI_CL(piEnqueueMemUnmap, clEnqueueUnmapMemObject) + _PI_CL(piEnqueueKernelLaunch, pi2ur::piEnqueueKernelLaunch) + _PI_CL(piEnqueueEventsWait, pi2ur::piEnqueueEventsWait) + _PI_CL(piEnqueueEventsWaitWithBarrier, pi2ur::piEnqueueEventsWaitWithBarrier) + _PI_CL(piEnqueueMemBufferRead, pi2ur::piEnqueueMemBufferRead) + _PI_CL(piEnqueueMemBufferReadRect, pi2ur::piEnqueueMemBufferReadRect) + _PI_CL(piEnqueueMemBufferWrite, pi2ur::piEnqueueMemBufferWrite) + _PI_CL(piEnqueueMemBufferWriteRect, pi2ur::piEnqueueMemBufferWriteRect) + _PI_CL(piEnqueueMemBufferCopy, pi2ur::piEnqueueMemBufferCopy) + _PI_CL(piEnqueueMemBufferCopyRect, pi2ur::piEnqueueMemBufferCopyRect) + _PI_CL(piEnqueueMemBufferFill, pi2ur::piEnqueueMemBufferFill) + _PI_CL(piEnqueueMemImageRead, pi2ur::piEnqueueMemImageRead) + _PI_CL(piEnqueueMemImageWrite, pi2ur::piEnqueueMemImageWrite) + _PI_CL(piEnqueueMemImageCopy, pi2ur::piEnqueueMemImageCopy) + _PI_CL(piEnqueueMemImageFill, pi2ur::piEnqueueMemImageFill) + _PI_CL(piEnqueueMemBufferMap, pi2ur::piEnqueueMemBufferMap) + _PI_CL(piEnqueueMemUnmap, pi2ur::piEnqueueMemUnmap) // USM - _PI_CL(piextUSMHostAlloc, piextUSMHostAlloc) - _PI_CL(piextUSMDeviceAlloc, piextUSMDeviceAlloc) - _PI_CL(piextUSMSharedAlloc, piextUSMSharedAlloc) - _PI_CL(piextUSMFree, piextUSMFree) - _PI_CL(piextUSMEnqueueMemset, piextUSMEnqueueMemset) - _PI_CL(piextUSMEnqueueMemcpy, piextUSMEnqueueMemcpy) - _PI_CL(piextUSMEnqueuePrefetch, piextUSMEnqueuePrefetch) - _PI_CL(piextUSMEnqueueMemAdvise, piextUSMEnqueueMemAdvise) - _PI_CL(piextUSMEnqueueFill2D, piextUSMEnqueueFill2D) - _PI_CL(piextUSMEnqueueMemset2D, piextUSMEnqueueMemset2D) - _PI_CL(piextUSMEnqueueMemcpy2D, piextUSMEnqueueMemcpy2D) - _PI_CL(piextUSMGetMemAllocInfo, piextUSMGetMemAllocInfo) + _PI_CL(piextUSMHostAlloc, pi2ur::piextUSMHostAlloc) + _PI_CL(piextUSMDeviceAlloc, pi2ur::piextUSMDeviceAlloc) + _PI_CL(piextUSMSharedAlloc, pi2ur::piextUSMSharedAlloc) + _PI_CL(piextUSMFree, pi2ur::piextUSMFree) + _PI_CL(piextUSMEnqueueMemset, pi2ur::piextUSMEnqueueMemset) + _PI_CL(piextUSMEnqueueMemcpy, pi2ur::piextUSMEnqueueMemcpy) + _PI_CL(piextUSMEnqueuePrefetch, pi2ur::piextUSMEnqueuePrefetch) + _PI_CL(piextUSMEnqueueMemAdvise, pi2ur::piextUSMEnqueueMemAdvise) + _PI_CL(piextUSMEnqueueFill2D, pi2ur::piextUSMEnqueueFill2D) + _PI_CL(piextUSMEnqueueMemset2D, pi2ur::piextUSMEnqueueMemset2D) + _PI_CL(piextUSMEnqueueMemcpy2D, pi2ur::piextUSMEnqueueMemcpy2D) + _PI_CL(piextUSMGetMemAllocInfo, pi2ur::piextUSMGetMemAllocInfo) _PI_CL(piextUSMImport, piextUSMImport) _PI_CL(piextUSMRelease, piextUSMRelease) // Device global variable _PI_CL(piextEnqueueDeviceGlobalVariableWrite, - piextEnqueueDeviceGlobalVariableWrite) + pi2ur::piextEnqueueDeviceGlobalVariableWrite) _PI_CL(piextEnqueueDeviceGlobalVariableRead, - piextEnqueueDeviceGlobalVariableRead) + pi2ur::piextEnqueueDeviceGlobalVariableRead) // Host Pipe - _PI_CL(piextEnqueueReadHostPipe, piextEnqueueReadHostPipe) - _PI_CL(piextEnqueueWriteHostPipe, piextEnqueueWriteHostPipe) - - // command-buffer - _PI_CL(piextCommandBufferCreate, piextCommandBufferCreate) - _PI_CL(piextCommandBufferRetain, piextCommandBufferRetain) - _PI_CL(piextCommandBufferRelease, piextCommandBufferRelease) - _PI_CL(piextCommandBufferNDRangeKernel, piextCommandBufferNDRangeKernel) - _PI_CL(piextCommandBufferMemcpyUSM, piextCommandBufferMemcpyUSM) - _PI_CL(piextCommandBufferMemBufferCopy, piextCommandBufferMemBufferCopy) + _PI_CL(piextEnqueueReadHostPipe, pi2ur::piextEnqueueReadHostPipe) + _PI_CL(piextEnqueueWriteHostPipe, pi2ur::piextEnqueueWriteHostPipe) + // Command-buffer + _PI_CL(piextCommandBufferCreate, pi2ur::piextCommandBufferCreate) + _PI_CL(piextCommandBufferRetain, pi2ur::piextCommandBufferRetain) + _PI_CL(piextCommandBufferRelease, pi2ur::piextCommandBufferRelease) + _PI_CL(piextCommandBufferNDRangeKernel, + pi2ur::piextCommandBufferNDRangeKernel) + _PI_CL(piextCommandBufferMemcpyUSM, pi2ur::piextCommandBufferMemcpyUSM) + _PI_CL(piextCommandBufferMemBufferCopy, + pi2ur::piextCommandBufferMemBufferCopy) _PI_CL(piextCommandBufferMemBufferCopyRect, - piextCommandBufferMemBufferCopyRect) + pi2ur::piextCommandBufferMemBufferCopyRect) _PI_CL(piextCommandBufferMemBufferRead, piextCommandBufferMemBufferRead) _PI_CL(piextCommandBufferMemBufferReadRect, piextCommandBufferMemBufferReadRect) _PI_CL(piextCommandBufferMemBufferWrite, piextCommandBufferMemBufferWrite) _PI_CL(piextCommandBufferMemBufferWriteRect, piextCommandBufferMemBufferWriteRect) - _PI_CL(piextEnqueueCommandBuffer, piextEnqueueCommandBuffer) - - _PI_CL(piextKernelSetArgMemObj, piextKernelSetArgMemObj) - _PI_CL(piextKernelSetArgSampler, piextKernelSetArgSampler) + _PI_CL(piextEnqueueCommandBuffer, pi2ur::piextEnqueueCommandBuffer) + // Kernel + _PI_CL(piextKernelSetArgMemObj, pi2ur::piextKernelSetArgMemObj) + _PI_CL(piextKernelSetArgSampler, pi2ur::piextKernelSetArgSampler) _PI_CL(piPluginGetLastError, pi2ur::piPluginGetLastError) - _PI_CL(piTearDown, piTearDown) - _PI_CL(piGetDeviceAndHostTimer, piGetDeviceAndHostTimer) - _PI_CL(piPluginGetBackendOption, piPluginGetBackendOption) + _PI_CL(piTearDown, pi2ur::piTearDown) + _PI_CL(piGetDeviceAndHostTimer, pi2ur::piGetDeviceAndHostTimer) + _PI_CL(piPluginGetBackendOption, pi2ur::piPluginGetBackendOption) #undef _PI_CL diff --git a/sycl/plugins/opencl/pi_opencl.hpp b/sycl/plugins/opencl/pi_opencl.hpp index 54b1ad90abcaf..e4ac08a13a34a 100644 --- a/sycl/plugins/opencl/pi_opencl.hpp +++ b/sycl/plugins/opencl/pi_opencl.hpp @@ -21,6 +21,9 @@ #include #include +// Share code between the PI Plugin and UR Adapter +#include + // This version should be incremented for any change made to this file or its // corresponding .cpp file. #define _PI_OPENCL_PLUGIN_VERSION 1 @@ -28,92 +31,6 @@ #define _PI_OPENCL_PLUGIN_VERSION_STRING \ _PI_PLUGIN_VERSION_STRING(_PI_OPENCL_PLUGIN_VERSION) -namespace OCLV { -class OpenCLVersion { -protected: - unsigned int ocl_major; - unsigned int ocl_minor; - -public: - OpenCLVersion() : ocl_major(0), ocl_minor(0) {} - - OpenCLVersion(unsigned int ocl_major, unsigned int ocl_minor) - : ocl_major(ocl_major), ocl_minor(ocl_minor) { - if (!isValid()) - ocl_major = ocl_minor = 0; - } - - OpenCLVersion(const char *version) : OpenCLVersion(std::string(version)) {} - - OpenCLVersion(const std::string &version) : ocl_major(0), ocl_minor(0) { - /* The OpenCL specification defines the full version string as - * 'OpenCL' for platforms and as - * 'OpenCL' for devices. - */ - std::regex rx("OpenCL ([0-9]+)\\.([0-9]+)"); - std::smatch match; - - if (std::regex_search(version, match, rx) && (match.size() == 3)) { - ocl_major = strtoul(match[1].str().c_str(), nullptr, 10); - ocl_minor = strtoul(match[2].str().c_str(), nullptr, 10); - - if (!isValid()) - ocl_major = ocl_minor = 0; - } - } - - bool operator==(const OpenCLVersion &v) const { - return ocl_major == v.ocl_major && ocl_minor == v.ocl_minor; - } - - bool operator!=(const OpenCLVersion &v) const { return !(*this == v); } - - bool operator<(const OpenCLVersion &v) const { - if (ocl_major == v.ocl_major) - return ocl_minor < v.ocl_minor; - - return ocl_major < v.ocl_major; - } - - bool operator>(const OpenCLVersion &v) const { return v < *this; } - - bool operator<=(const OpenCLVersion &v) const { - return (*this < v) || (*this == v); - } - - bool operator>=(const OpenCLVersion &v) const { - return (*this > v) || (*this == v); - } - - bool isValid() const { - switch (ocl_major) { - case 0: - return false; - case 1: - case 2: - return ocl_minor <= 2; - case UINT_MAX: - return false; - default: - return ocl_minor != UINT_MAX; - } - } - - int getMajor() const { return ocl_major; } - int getMinor() const { return ocl_minor; } -}; - -inline const OpenCLVersion V1_0(1, 0); -inline const OpenCLVersion V1_1(1, 1); -inline const OpenCLVersion V1_2(1, 2); -inline const OpenCLVersion V2_0(2, 0); -inline const OpenCLVersion V2_1(2, 1); -inline const OpenCLVersion V2_2(2, 2); -inline const OpenCLVersion V3_0(3, 0); - -} // namespace OCLV struct _pi_ext_command_buffer {}; #endif // PI_OPENCL_HPP diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 297c6cae91cd9..ca4e9a39f70b4 100755 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -8,9 +8,9 @@ if (NOT DEFINED UNIFIED_RUNTIME_LIBRARY OR NOT DEFINED UNIFIED_RUNTIME_INCLUDE_D message(STATUS "Will fetch Unified Runtime from ${UNIFIED_RUNTIME_REPO}") FetchContent_Declare(unified-runtime - GIT_REPOSITORY ${UNIFIED_RUNTIME_REPO} - GIT_TAG ${UNIFIED_RUNTIME_TAG} - ) + GIT_REPOSITORY ${UNIFIED_RUNTIME_REPO} + GIT_TAG ${UNIFIED_RUNTIME_TAG} + ) # Disable errors from warnings while building the UR. # And remember origin flags before doing that. @@ -40,7 +40,7 @@ if (NOT DEFINED UNIFIED_RUNTIME_LIBRARY OR NOT DEFINED UNIFIED_RUNTIME_INCLUDE_D add_library(UnifiedRuntimeLoader ALIAS ur_loader) set(UNIFIED_RUNTIME_SOURCE_DIR - ${unified-runtime_SOURCE_DIR} CACHE PATH "Path to Unified Runtime Headers") + ${unified-runtime_SOURCE_DIR} CACHE PATH "Path to Unified Runtime Headers") set(UNIFIED_RUNTIME_INCLUDE_DIR "${UNIFIED_RUNTIME_SOURCE_DIR}/include") endif() @@ -48,84 +48,97 @@ endif() add_library (UnifiedRuntime-Headers INTERFACE) target_include_directories(UnifiedRuntime-Headers - INTERFACE - "${UNIFIED_RUNTIME_INCLUDE_DIR}" -) + INTERFACE + "${UNIFIED_RUNTIME_INCLUDE_DIR}" + ) find_package(Threads REQUIRED) -add_sycl_plugin(unified_runtime - SOURCES - # These are short-term shared with Unified Runtime - # The two plugins define a few things differrently so must - # be built separately. This difference is spelled in - # their "ur_bindings.hpp" files. - # - "ur_bindings.hpp" - "pi2ur.hpp" - "pi2ur.cpp" - # These below belong to Unified Runtime PI Plugin only - "pi_unified_runtime.hpp" - "pi_unified_runtime.cpp" - LIBRARIES - Threads::Threads - UnifiedRuntimeLoader - UnifiedRuntime-Headers - LevelZeroLoader-Headers # we need for #include in common.h -) +set(UNIFIED_RUNTIME_PLUGIN_ARGS + SOURCES + # These are short-term shared with Unified Runtime + # The two plugins define a few things differently so must + # be built separately. This difference is spelled in + # their "ur_bindings.hpp" files. + "ur_bindings.hpp" + "pi2ur.hpp" + "pi2ur.cpp" + # These below belong to Unified Runtime PI Plugin only + "pi_unified_runtime.hpp" + "pi_unified_runtime.cpp" + LIBRARIES + Threads::Threads + UnifiedRuntimeLoader + UnifiedRuntime-Headers + ) + +# We need for #include in common.h +if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) + list(APPEND UNIFIED_RUNTIME_PLUGIN_ARGS LevelZeroLoader-Headers) +endif() + +if ("opencl" IN_LIST SYCL_ENABLE_PLUGINS) + list(APPEND UNIFIED_RUNTIME_PLUGIN_ARGS OpenCL-ICD) +endif() + +add_sycl_plugin(unified_runtime ${UNIFIED_RUNTIME_PLUGIN_ARGS}) + +if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) + # Build level zero adapter + add_sycl_library("ur_adapter_level_zero" SHARED + SOURCES + "ur/ur.hpp" + "ur/ur.cpp" + "ur/usm_allocator.hpp" + "ur/usm_allocator.cpp" + "ur/usm_allocator_config.cpp" + "ur/usm_allocator_config.hpp" + "ur/adapters/level_zero/ur_level_zero.hpp" + "ur/adapters/level_zero/ur_level_zero.cpp" + "ur/adapters/level_zero/ur_interface_loader.cpp" + "ur/adapters/level_zero/command_buffer.hpp" + "ur/adapters/level_zero/common.hpp" + "ur/adapters/level_zero/context.hpp" + "ur/adapters/level_zero/device.hpp" + "ur/adapters/level_zero/event.hpp" + "ur/adapters/level_zero/image.cpp" + "ur/adapters/level_zero/image.hpp" + "ur/adapters/level_zero/memory.hpp" + "ur/adapters/level_zero/kernel.hpp" + "ur/adapters/level_zero/platform.hpp" + "ur/adapters/level_zero/program.hpp" + "ur/adapters/level_zero/queue.hpp" + "ur/adapters/level_zero/sampler.hpp" + "ur/adapters/level_zero/usm.hpp" + "ur/adapters/level_zero/command_buffer.cpp" + "ur/adapters/level_zero/common.cpp" + "ur/adapters/level_zero/context.cpp" + "ur/adapters/level_zero/device.cpp" + "ur/adapters/level_zero/event.cpp" + "ur/adapters/level_zero/memory.cpp" + "ur/adapters/level_zero/kernel.cpp" + "ur/adapters/level_zero/platform.cpp" + "ur/adapters/level_zero/program.cpp" + "ur/adapters/level_zero/queue.cpp" + "ur/adapters/level_zero/sampler.cpp" + "ur/adapters/level_zero/usm.cpp" + "ur/adapters/level_zero/usm_p2p.cpp" + INCLUDE_DIRS + ${sycl_inc_dir} + LIBRARIES + UnifiedRuntime-Headers + LevelZeroLoader-Headers + LevelZeroLoader + Threads::Threads + ) + + set_target_properties("ur_adapter_level_zero" PROPERTIES + VERSION "0.0.0" + SOVERSION "0" + ) +endif() -# Build level zero adapter -add_sycl_library("ur_adapter_level_zero" SHARED - SOURCES - "ur/ur.hpp" - "ur/ur.cpp" - "ur/usm_allocator.hpp" - "ur/usm_allocator.cpp" - "ur/usm_allocator_config.cpp" - "ur/usm_allocator_config.hpp" - "ur/adapters/level_zero/ur_level_zero.hpp" - "ur/adapters/level_zero/ur_level_zero.cpp" - "ur/adapters/level_zero/ur_interface_loader.cpp" - "ur/adapters/level_zero/command_buffer.hpp" - "ur/adapters/level_zero/common.hpp" - "ur/adapters/level_zero/context.hpp" - "ur/adapters/level_zero/device.hpp" - "ur/adapters/level_zero/event.hpp" - "ur/adapters/level_zero/image.cpp" - "ur/adapters/level_zero/image.hpp" - "ur/adapters/level_zero/memory.hpp" - "ur/adapters/level_zero/kernel.hpp" - "ur/adapters/level_zero/platform.hpp" - "ur/adapters/level_zero/program.hpp" - "ur/adapters/level_zero/queue.hpp" - "ur/adapters/level_zero/sampler.hpp" - "ur/adapters/level_zero/usm.hpp" - "ur/adapters/level_zero/command_buffer.cpp" - "ur/adapters/level_zero/common.cpp" - "ur/adapters/level_zero/context.cpp" - "ur/adapters/level_zero/device.cpp" - "ur/adapters/level_zero/event.cpp" - "ur/adapters/level_zero/memory.cpp" - "ur/adapters/level_zero/kernel.cpp" - "ur/adapters/level_zero/platform.cpp" - "ur/adapters/level_zero/program.cpp" - "ur/adapters/level_zero/queue.cpp" - "ur/adapters/level_zero/sampler.cpp" - "ur/adapters/level_zero/usm.cpp" - "ur/adapters/level_zero/usm_p2p.cpp" - INCLUDE_DIRS - ${sycl_inc_dir} - LIBRARIES - UnifiedRuntime-Headers - LevelZeroLoader-Headers - LevelZeroLoader - Threads::Threads -) -set_target_properties("ur_adapter_level_zero" PROPERTIES - VERSION "0.0.0" - SOVERSION "0" -) if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS) # Build CUDA adapter @@ -173,14 +186,14 @@ if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS) ) set_target_properties("ur_adapter_cuda" PROPERTIES - VERSION "0.0.0" - SOVERSION "0" - ) + VERSION "0.0.0" + SOVERSION "0" + ) endif() if ("hip" IN_LIST SYCL_ENABLE_PLUGINS) # Build HIP adapter - add_sycl_library("ur_adapter_hip" SHARED + add_sycl_library("ur_adapter_hip" SHARED SOURCES "ur/ur.hpp" "ur/ur.cpp" @@ -237,6 +250,38 @@ if ("hip" IN_LIST SYCL_ENABLE_PLUGINS) endif() endif() +# Build OpenCL adapter +add_sycl_library("ur_adapter_opencl" SHARED + SOURCES + "ur/ur.hpp" + "ur/ur.cpp" + "ur/adapters/opencl/common.cpp" + "ur/adapters/opencl/common.hpp" + "ur/adapters/opencl/context.cpp" + "ur/adapters/opencl/context.hpp" + "ur/adapters/opencl/device.cpp" + "ur/adapters/opencl/device.hpp" + "ur/adapters/opencl/enqueue.cpp" + "ur/adapters/opencl/kernel.cpp" + "ur/adapters/opencl/platform.cpp" + "ur/adapters/opencl/platform.hpp" + "ur/adapters/opencl/program.cpp" + "ur/adapters/opencl/sampler.cpp" + "ur/adapters/opencl/memory.cpp" + "ur/adapters/opencl/event.cpp" + "ur/adapters/opencl/queue.cpp" + "ur/adapters/opencl/ur_interface_loader.cpp" + "ur/adapters/opencl/usm.cpp" + "ur/adapters/opencl/command_buffer.hpp" + "ur/adapters/opencl/command_buffer.cpp" + INCLUDE_DIRS + ${sycl_inc_dir} + LIBRARIES + UnifiedRuntime-Headers + Threads::Threads + OpenCL-ICD + ) + if (TARGET UnifiedRuntimeLoader) set_target_properties(hello_world PROPERTIES EXCLUDE_FROM_ALL 1 EXCLUDE_FROM_DEFAULT_BUILD 1) # Install the UR loader. @@ -244,15 +289,17 @@ if (TARGET UnifiedRuntimeLoader) # When UR is moved to its separate repo perhaps we should introduce new component, # e.g. unified-runtime-sycl-dev. install(TARGETS ur_loader - LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev - ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev - RUNTIME DESTINATION "bin" COMPONENT level-zero-sycl-dev - ) + LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev + ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev + RUNTIME DESTINATION "bin" COMPONENT level-zero-sycl-dev + ) endif() # Install the UR adapters too -install(TARGETS ur_adapter_level_zero - LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev - ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev - RUNTIME DESTINATION "bin" COMPONENT level-zero-sycl-dev -) +if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) + install(TARGETS ur_adapter_level_zero + LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev + ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev + RUNTIME DESTINATION "bin" COMPONENT level-zero-sycl-dev + ) +endif() diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 72d42f13ae659..4b78c11fdb2ad 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -9,10 +9,10 @@ #include "ur_api.h" #include +#include #include #include #include -#include // Map of UR error codes to PI error codes static pi_result ur2piResult(ur_result_t urResult) { diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp new file mode 100644 index 0000000000000..5b09b47bb63a0 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp @@ -0,0 +1,253 @@ +//===--------- command_buffer.cpp - OpenCL Adapter ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "command_buffer.hpp" +#include "common.hpp" + +/// Stub implementations of UR experimental feature command-buffers + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_exp_command_buffer_desc_t *pCommandBufferDesc, + ur_exp_command_buffer_handle_t *phCommandBuffer) { + (void)hContext; + (void)hDevice; + (void)pCommandBufferDesc; + (void)phCommandBuffer; + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) { + (void)hCommandBuffer; + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { + (void)hCommandBuffer; + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { + (void)hCommandBuffer; + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, + uint32_t workDim, const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + (void)hCommandBuffer; + (void)hKernel; + (void)workDim; + (void)pGlobalWorkOffset; + (void)pGlobalWorkSize; + (void)pLocalWorkSize; + (void)numSyncPointsInWaitList; + (void)pSyncPointWaitList; + (void)pSyncPoint; + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( + ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc, + size_t size, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + (void)hCommandBuffer; + (void)pDst; + (void)pSrc; + (void)size; + (void)numSyncPointsInWaitList; + (void)pSyncPointWaitList; + (void)pSyncPoint; + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, + ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + (void)hCommandBuffer; + (void)hSrcMem; + (void)hDstMem; + (void)srcOffset; + (void)dstOffset; + (void)size; + (void)numSyncPointsInWaitList; + (void)pSyncPointWaitList; + (void)pSyncPoint; + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, + ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + (void)hCommandBuffer; + (void)hSrcMem; + (void)hDstMem; + (void)srcOrigin; + (void)dstOrigin; + (void)region; + (void)srcRowPitch; + (void)srcSlicePitch; + (void)dstRowPitch; + (void)dstSlicePitch; + (void)numSyncPointsInWaitList; + (void)pSyncPointWaitList; + (void)pSyncPoint; + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT +ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + size_t offset, size_t size, const void *pSrc, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + (void)hCommandBuffer; + (void)hBuffer; + (void)offset; + (void)size; + (void)pSrc; + (void)numSyncPointsInWaitList; + (void)pSyncPointWaitList; + (void)pSyncPoint; + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT +ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + (void)hCommandBuffer; + (void)hBuffer; + (void)offset; + (void)size; + (void)pDst; + (void)numSyncPointsInWaitList; + (void)pSyncPointWaitList; + (void)pSyncPoint; + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT +ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + (void)hCommandBuffer; + (void)hBuffer; + (void)bufferOffset; + (void)hostOffset; + (void)region; + (void)bufferRowPitch; + (void)bufferSlicePitch; + (void)hostRowPitch; + (void)hostSlicePitch; + (void)pSrc; + (void)numSyncPointsInWaitList; + (void)pSyncPointWaitList; + (void)pSyncPoint; + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT +ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pDst, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + (void)hCommandBuffer; + (void)hBuffer; + (void)bufferOffset; + (void)hostOffset; + (void)region; + (void)bufferRowPitch; + (void)bufferSlicePitch; + (void)hostRowPitch; + (void)hostSlicePitch; + (void)pDst; + + (void)numSyncPointsInWaitList; + (void)pSyncPointWaitList; + (void)pSyncPoint; + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + (void)hCommandBuffer; + (void)hQueue; + (void)numEventsInWaitList; + (void)phEventWaitList; + (void)phEvent; + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.hpp new file mode 100644 index 0000000000000..8375ee6d48a72 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.hpp @@ -0,0 +1,13 @@ +//===--------- command_buffer.hpp - OpenCL Adapter ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include + +/// Stub implementation of command-buffers for OpenCL + +struct ur_exp_command_buffer_handle_t_ {}; diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp index 3f7d81af7f72a..5845cda72da2e 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp @@ -64,11 +64,18 @@ ur_result_t map_cl_error_to_ur(cl_int result) { return UR_RESULT_ERROR_INVALID_WORK_DIMENSION; case CL_OUT_OF_RESOURCES: return UR_RESULT_ERROR_OUT_OF_RESOURCES; + case CL_INVALID_MEM_OBJECT: + return UR_RESULT_ERROR_INVALID_MEM_OBJECT; default: return UR_RESULT_ERROR_UNKNOWN; } } +void cl_adapter::die(const char *Message) { + std::cerr << "ur_die: " << Message << std::endl; + std::terminate(); +} + /// Common API for getting the native handle of a UR object /// /// \param urObj is the UR object to get the native handle of diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp index ce447a64f2619..e4d1d7c64513d 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include /** @@ -156,6 +157,9 @@ template To cast(From value) { return static_cast(value); } } + +[[noreturn]] void die(const char *Message); + } // namespace cl_adapter namespace cl_ext { diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp index 2bdfb1401f3b9..112d0998ea1ed 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp @@ -18,9 +18,9 @@ urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urKernelSetArgValue(ur_kernel_handle_t hKernel, uint32_t argIndex, - size_t argSize, const void *pArgValue) { +UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( + ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, + const ur_kernel_arg_value_properties_t *, const void *pArgValue) { CL_RETURN_ON_FAILURE(clSetKernelArg(cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), @@ -299,9 +299,9 @@ static ur_result_t USMSetIndirectAccess(ur_kernel_handle_t hKernel) { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urKernelSetExecInfo(ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName, - size_t propSize, const void *pPropValue) { +UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( + ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName, size_t propSize, + const ur_kernel_exec_info_properties_t *, const void *pPropValue) { switch (propName) { case UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS: { @@ -326,7 +326,8 @@ urKernelSetExecInfo(ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName, } UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( - ur_kernel_handle_t hKernel, uint32_t argIndex, const void *pArgValue) { + ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_pointer_properties_t *, const void *pArgValue) { cl_context CLContext; CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), @@ -385,11 +386,10 @@ urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urKernelSetArgSampler(ur_kernel_handle_t hKernel, uint32_t argIndex, - ur_sampler_handle_t hArgValue) { - UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hArgValue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); +UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( + ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_sampler_properties_t *, ur_sampler_handle_t hArgValue) { + cl_int ret_err = clSetKernelArg( cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), sizeof(hArgValue), cl_adapter::cast(&hArgValue)); From c4007b7bf011e48414f08c9035c6d13b42d8b942 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Wed, 5 Jul 2023 14:31:34 +0100 Subject: [PATCH 16/36] [SYCL][OpenCL] Address review comments on the rebase --- sycl/plugins/opencl/pi_opencl.hpp | 2 - sycl/plugins/unified_runtime/CMakeLists.txt | 222 ++++++++++---------- sycl/plugins/unified_runtime/pi2ur.hpp | 3 +- 3 files changed, 112 insertions(+), 115 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.hpp b/sycl/plugins/opencl/pi_opencl.hpp index e4ac08a13a34a..6894d5cb65d30 100644 --- a/sycl/plugins/opencl/pi_opencl.hpp +++ b/sycl/plugins/opencl/pi_opencl.hpp @@ -31,6 +31,4 @@ #define _PI_OPENCL_PLUGIN_VERSION_STRING \ _PI_PLUGIN_VERSION_STRING(_PI_OPENCL_PLUGIN_VERSION) -struct _pi_ext_command_buffer {}; - #endif // PI_OPENCL_HPP diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index ca4e9a39f70b4..0a66f5cf60d6e 100755 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -8,9 +8,9 @@ if (NOT DEFINED UNIFIED_RUNTIME_LIBRARY OR NOT DEFINED UNIFIED_RUNTIME_INCLUDE_D message(STATUS "Will fetch Unified Runtime from ${UNIFIED_RUNTIME_REPO}") FetchContent_Declare(unified-runtime - GIT_REPOSITORY ${UNIFIED_RUNTIME_REPO} - GIT_TAG ${UNIFIED_RUNTIME_TAG} - ) + GIT_REPOSITORY ${UNIFIED_RUNTIME_REPO} + GIT_TAG ${UNIFIED_RUNTIME_TAG} + ) # Disable errors from warnings while building the UR. # And remember origin flags before doing that. @@ -40,7 +40,7 @@ if (NOT DEFINED UNIFIED_RUNTIME_LIBRARY OR NOT DEFINED UNIFIED_RUNTIME_INCLUDE_D add_library(UnifiedRuntimeLoader ALIAS ur_loader) set(UNIFIED_RUNTIME_SOURCE_DIR - ${unified-runtime_SOURCE_DIR} CACHE PATH "Path to Unified Runtime Headers") + ${unified-runtime_SOURCE_DIR} CACHE PATH "Path to Unified Runtime Headers") set(UNIFIED_RUNTIME_INCLUDE_DIR "${UNIFIED_RUNTIME_SOURCE_DIR}/include") endif() @@ -48,29 +48,29 @@ endif() add_library (UnifiedRuntime-Headers INTERFACE) target_include_directories(UnifiedRuntime-Headers - INTERFACE - "${UNIFIED_RUNTIME_INCLUDE_DIR}" - ) + INTERFACE + "${UNIFIED_RUNTIME_INCLUDE_DIR}" +) find_package(Threads REQUIRED) set(UNIFIED_RUNTIME_PLUGIN_ARGS - SOURCES - # These are short-term shared with Unified Runtime - # The two plugins define a few things differently so must - # be built separately. This difference is spelled in - # their "ur_bindings.hpp" files. - "ur_bindings.hpp" - "pi2ur.hpp" - "pi2ur.cpp" - # These below belong to Unified Runtime PI Plugin only - "pi_unified_runtime.hpp" - "pi_unified_runtime.cpp" - LIBRARIES - Threads::Threads - UnifiedRuntimeLoader - UnifiedRuntime-Headers - ) + SOURCES + # These are short-term shared with Unified Runtime + # The two plugins define a few things differently so must + # be built separately. This difference is spelled in + # their "ur_bindings.hpp" files. + "ur_bindings.hpp" + "pi2ur.hpp" + "pi2ur.cpp" + # These below belong to Unified Runtime PI Plugin only + "pi_unified_runtime.hpp" + "pi_unified_runtime.cpp" + LIBRARIES + Threads::Threads + UnifiedRuntimeLoader + UnifiedRuntime-Headers +) # We need for #include in common.h if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) @@ -86,56 +86,56 @@ add_sycl_plugin(unified_runtime ${UNIFIED_RUNTIME_PLUGIN_ARGS}) if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) # Build level zero adapter add_sycl_library("ur_adapter_level_zero" SHARED - SOURCES - "ur/ur.hpp" - "ur/ur.cpp" - "ur/usm_allocator.hpp" - "ur/usm_allocator.cpp" - "ur/usm_allocator_config.cpp" - "ur/usm_allocator_config.hpp" - "ur/adapters/level_zero/ur_level_zero.hpp" - "ur/adapters/level_zero/ur_level_zero.cpp" - "ur/adapters/level_zero/ur_interface_loader.cpp" - "ur/adapters/level_zero/command_buffer.hpp" - "ur/adapters/level_zero/common.hpp" - "ur/adapters/level_zero/context.hpp" - "ur/adapters/level_zero/device.hpp" - "ur/adapters/level_zero/event.hpp" - "ur/adapters/level_zero/image.cpp" - "ur/adapters/level_zero/image.hpp" - "ur/adapters/level_zero/memory.hpp" - "ur/adapters/level_zero/kernel.hpp" - "ur/adapters/level_zero/platform.hpp" - "ur/adapters/level_zero/program.hpp" - "ur/adapters/level_zero/queue.hpp" - "ur/adapters/level_zero/sampler.hpp" - "ur/adapters/level_zero/usm.hpp" - "ur/adapters/level_zero/command_buffer.cpp" - "ur/adapters/level_zero/common.cpp" - "ur/adapters/level_zero/context.cpp" - "ur/adapters/level_zero/device.cpp" - "ur/adapters/level_zero/event.cpp" - "ur/adapters/level_zero/memory.cpp" - "ur/adapters/level_zero/kernel.cpp" - "ur/adapters/level_zero/platform.cpp" - "ur/adapters/level_zero/program.cpp" - "ur/adapters/level_zero/queue.cpp" - "ur/adapters/level_zero/sampler.cpp" - "ur/adapters/level_zero/usm.cpp" - "ur/adapters/level_zero/usm_p2p.cpp" - INCLUDE_DIRS - ${sycl_inc_dir} - LIBRARIES - UnifiedRuntime-Headers - LevelZeroLoader-Headers - LevelZeroLoader - Threads::Threads - ) + SOURCES + "ur/ur.hpp" + "ur/ur.cpp" + "ur/usm_allocator.hpp" + "ur/usm_allocator.cpp" + "ur/usm_allocator_config.cpp" + "ur/usm_allocator_config.hpp" + "ur/adapters/level_zero/ur_level_zero.hpp" + "ur/adapters/level_zero/ur_level_zero.cpp" + "ur/adapters/level_zero/ur_interface_loader.cpp" + "ur/adapters/level_zero/command_buffer.hpp" + "ur/adapters/level_zero/common.hpp" + "ur/adapters/level_zero/context.hpp" + "ur/adapters/level_zero/device.hpp" + "ur/adapters/level_zero/event.hpp" + "ur/adapters/level_zero/image.cpp" + "ur/adapters/level_zero/image.hpp" + "ur/adapters/level_zero/memory.hpp" + "ur/adapters/level_zero/kernel.hpp" + "ur/adapters/level_zero/platform.hpp" + "ur/adapters/level_zero/program.hpp" + "ur/adapters/level_zero/queue.hpp" + "ur/adapters/level_zero/sampler.hpp" + "ur/adapters/level_zero/usm.hpp" + "ur/adapters/level_zero/command_buffer.cpp" + "ur/adapters/level_zero/common.cpp" + "ur/adapters/level_zero/context.cpp" + "ur/adapters/level_zero/device.cpp" + "ur/adapters/level_zero/event.cpp" + "ur/adapters/level_zero/memory.cpp" + "ur/adapters/level_zero/kernel.cpp" + "ur/adapters/level_zero/platform.cpp" + "ur/adapters/level_zero/program.cpp" + "ur/adapters/level_zero/queue.cpp" + "ur/adapters/level_zero/sampler.cpp" + "ur/adapters/level_zero/usm.cpp" + "ur/adapters/level_zero/usm_p2p.cpp" + INCLUDE_DIRS + ${sycl_inc_dir} + LIBRARIES + UnifiedRuntime-Headers + LevelZeroLoader-Headers + LevelZeroLoader + Threads::Threads + ) set_target_properties("ur_adapter_level_zero" PROPERTIES - VERSION "0.0.0" - SOVERSION "0" - ) + VERSION "0.0.0" + SOVERSION "0" + ) endif() @@ -186,9 +186,9 @@ if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS) ) set_target_properties("ur_adapter_cuda" PROPERTIES - VERSION "0.0.0" - SOVERSION "0" - ) + VERSION "0.0.0" + SOVERSION "0" + ) endif() if ("hip" IN_LIST SYCL_ENABLE_PLUGINS) @@ -252,35 +252,35 @@ endif() # Build OpenCL adapter add_sycl_library("ur_adapter_opencl" SHARED - SOURCES - "ur/ur.hpp" - "ur/ur.cpp" - "ur/adapters/opencl/common.cpp" - "ur/adapters/opencl/common.hpp" - "ur/adapters/opencl/context.cpp" - "ur/adapters/opencl/context.hpp" - "ur/adapters/opencl/device.cpp" - "ur/adapters/opencl/device.hpp" - "ur/adapters/opencl/enqueue.cpp" - "ur/adapters/opencl/kernel.cpp" - "ur/adapters/opencl/platform.cpp" - "ur/adapters/opencl/platform.hpp" - "ur/adapters/opencl/program.cpp" - "ur/adapters/opencl/sampler.cpp" - "ur/adapters/opencl/memory.cpp" - "ur/adapters/opencl/event.cpp" - "ur/adapters/opencl/queue.cpp" - "ur/adapters/opencl/ur_interface_loader.cpp" - "ur/adapters/opencl/usm.cpp" - "ur/adapters/opencl/command_buffer.hpp" - "ur/adapters/opencl/command_buffer.cpp" - INCLUDE_DIRS - ${sycl_inc_dir} - LIBRARIES - UnifiedRuntime-Headers - Threads::Threads - OpenCL-ICD - ) + SOURCES + "ur/ur.hpp" + "ur/ur.cpp" + "ur/adapters/opencl/common.cpp" + "ur/adapters/opencl/common.hpp" + "ur/adapters/opencl/context.cpp" + "ur/adapters/opencl/context.hpp" + "ur/adapters/opencl/device.cpp" + "ur/adapters/opencl/device.hpp" + "ur/adapters/opencl/enqueue.cpp" + "ur/adapters/opencl/kernel.cpp" + "ur/adapters/opencl/platform.cpp" + "ur/adapters/opencl/platform.hpp" + "ur/adapters/opencl/program.cpp" + "ur/adapters/opencl/sampler.cpp" + "ur/adapters/opencl/memory.cpp" + "ur/adapters/opencl/event.cpp" + "ur/adapters/opencl/queue.cpp" + "ur/adapters/opencl/ur_interface_loader.cpp" + "ur/adapters/opencl/usm.cpp" + "ur/adapters/opencl/command_buffer.hpp" + "ur/adapters/opencl/command_buffer.cpp" + INCLUDE_DIRS + ${sycl_inc_dir} + LIBRARIES + UnifiedRuntime-Headers + Threads::Threads + OpenCL-ICD +) if (TARGET UnifiedRuntimeLoader) set_target_properties(hello_world PROPERTIES EXCLUDE_FROM_ALL 1 EXCLUDE_FROM_DEFAULT_BUILD 1) @@ -289,17 +289,17 @@ if (TARGET UnifiedRuntimeLoader) # When UR is moved to its separate repo perhaps we should introduce new component, # e.g. unified-runtime-sycl-dev. install(TARGETS ur_loader - LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev - ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev - RUNTIME DESTINATION "bin" COMPONENT level-zero-sycl-dev - ) + LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev + ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev + RUNTIME DESTINATION "bin" COMPONENT level-zero-sycl-dev + ) endif() # Install the UR adapters too if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) install(TARGETS ur_adapter_level_zero - LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev - ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev - RUNTIME DESTINATION "bin" COMPONENT level-zero-sycl-dev - ) + LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev + ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev + RUNTIME DESTINATION "bin" COMPONENT level-zero-sycl-dev +) endif() diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 4b78c11fdb2ad..925dece418d36 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -9,7 +9,6 @@ #include "ur_api.h" #include -#include #include #include #include @@ -3896,7 +3895,7 @@ inline pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, * PI_EVENT_QUEUED, change it to PI_EVENT_SUBMITTED. This change is needed * since sycl::info::event::event_command_status has no equivalent to * PI_EVENT_QUEUED. */ - const auto param_value_int = static_cast(ParamValue); + const auto param_value_int = static_cast(ParamValue); if (*param_value_int == PI_EVENT_QUEUED) { *param_value_int = PI_EVENT_SUBMITTED; } From b5dca2a213e967493fcd44eec6955b689653ddf8 Mon Sep 17 00:00:00 2001 From: Martin Morrison-Grant Date: Wed, 5 Jul 2023 15:47:13 +0000 Subject: [PATCH 17/36] [SYCL][OpenCL] Port piextGetDeviceFunctionPointer and piextDeviceSelectBinary to UR --- sycl/plugins/opencl/pi_opencl.cpp | 193 +----------------- sycl/plugins/unified_runtime/CMakeLists.txt | 14 +- .../ur/adapters/opencl/device.cpp | 70 +++++++ .../ur/adapters/opencl/program.cpp | 84 ++++++++ .../adapters/opencl/ur_interface_loader.cpp | 4 +- 5 files changed, 166 insertions(+), 199 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index b80251d26b308..b2d2a45214a83 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -33,33 +33,6 @@ #include #include -#include "../unified_runtime/ur/adapters/opencl/common.hpp" -#include "../unified_runtime/ur/adapters/opencl/device.hpp" -#include "../unified_runtime/ur/adapters/opencl/platform.hpp" - -#define CHECK_ERR_SET_NULL_RET(err, ptr, reterr) \ - if (err != CL_SUCCESS) { \ - if (ptr != nullptr) \ - *ptr = nullptr; \ - return cast(reterr); \ - } - -// TODO(ur) remove when other endpoints have been ported -pi_result map_ur_error(ur_result_t result) { - switch (result) { - case UR_RESULT_SUCCESS: - return PI_SUCCESS; - case UR_RESULT_ERROR_OUT_OF_HOST_MEMORY: - return PI_ERROR_OUT_OF_HOST_MEMORY; - case UR_RESULT_ERROR_INVALID_VALUE: - return PI_ERROR_INVALID_VALUE; - case UR_RESULT_ERROR_INVALID_PLATFORM: - return PI_ERROR_INVALID_PLATFORM; - default: - return PI_ERROR_UNKNOWN; - } -} - // Want all the needed casts be explicit, do not define conversion operators. template To cast(From value) { // TODO: see if more sanity checks are possible. @@ -69,168 +42,6 @@ template To cast(From value) { extern "C" { -pi_result piextDeviceSelectBinary(pi_device device, pi_device_binary *images, - pi_uint32 num_images, - pi_uint32 *selected_image_ind) { - - // TODO: this is a bare-bones implementation for choosing a device image - // that would be compatible with the targeted device. An AOT-compiled - // image is preferred over SPIR-V for known devices (i.e. Intel devices) - // The implementation makes no effort to differentiate between multiple images - // for the given device, and simply picks the first one compatible - // Real implementation will use the same mechanism OpenCL ICD dispatcher - // uses. Something like: - // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_ERROR_INVALID_CONTEXT); - // return context->dispatch->piextDeviceSelectIR( - // ctx, images, num_images, selected_image); - // where context->dispatch is set to the dispatch table provided by PI - // plugin for platform/device the ctx was created for. - - // Choose the binary target for the provided device - const char *image_target = nullptr; - // Get the type of the device - cl_device_type device_type; - constexpr pi_uint32 invalid_ind = std::numeric_limits::max(); - cl_int ret_err = - clGetDeviceInfo(cast(device), CL_DEVICE_TYPE, - sizeof(cl_device_type), &device_type, nullptr); - if (ret_err != CL_SUCCESS) { - *selected_image_ind = invalid_ind; - return cast(ret_err); - } - - switch (device_type) { - // TODO: Factor out vendor specifics into a separate source - // E.g. sycl/source/detail/vendor/intel/detail/pi_opencl.cpp? - - // We'll attempt to find an image that was AOT-compiled - // from a SPIR-V image into an image specific for: - - case CL_DEVICE_TYPE_CPU: // OpenCL 64-bit CPU - image_target = __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64; - break; - case CL_DEVICE_TYPE_GPU: // OpenCL 64-bit GEN GPU - image_target = __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN; - break; - case CL_DEVICE_TYPE_ACCELERATOR: // OpenCL 64-bit FPGA - image_target = __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA; - break; - default: - // Otherwise, we'll attempt to find and JIT-compile - // a device-independent SPIR-V image - image_target = __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64; - break; - } - - // Find the appropriate device image, fallback to spirv if not found - pi_uint32 fallback = invalid_ind; - for (pi_uint32 i = 0; i < num_images; ++i) { - if (strcmp(images[i]->DeviceTargetSpec, image_target) == 0) { - *selected_image_ind = i; - return PI_SUCCESS; - } - if (strcmp(images[i]->DeviceTargetSpec, - __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64) == 0) - fallback = i; - } - // Points to a spirv image, if such indeed was found - if ((*selected_image_ind = fallback) != invalid_ind) - return PI_SUCCESS; - // No image can be loaded for the given device - return PI_ERROR_INVALID_BINARY; -} - -// Function gets characters between delimeter's in str -// then checks if they are equal to the sub_str. -// returns true if there is at least one instance -// returns false if there are no instances of the name -static bool is_in_separated_string(const std::string &str, char delimiter, - const std::string &sub_str) { - size_t beg = 0; - size_t length = 0; - for (const auto &x : str) { - if (x == delimiter) { - if (str.substr(beg, length) == sub_str) - return true; - - beg += length + 1; - length = 0; - continue; - } - length++; - } - if (length != 0) - if (str.substr(beg, length) == sub_str) - return true; - - return false; -} - -pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program, - const char *func_name, - pi_uint64 *function_pointer_ret) { - - cl_context CLContext = nullptr; - cl_int ret_err = - clGetProgramInfo(cast(program), CL_PROGRAM_CONTEXT, - sizeof(CLContext), &CLContext, nullptr); - - if (ret_err != CL_SUCCESS) - return cast(ret_err); - - cl_ext::clGetDeviceFunctionPointer_fn FuncT = nullptr; - ret_err = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clGetDeviceFunctionPointerCache, - cl_ext::clGetDeviceFunctionPointerName, &FuncT); - - pi_result pi_ret_err = PI_SUCCESS; - - // Check if kernel name exists, to prevent opencl runtime throwing exception - // with cpu runtime - // TODO: Use fallback search method if extension does not exist once CPU - // runtime no longer throws exceptions and prints messages when given - // unavailable functions. - *function_pointer_ret = 0; - size_t Size; - cl_int Res = - clGetProgramInfo(cast(program), PI_PROGRAM_INFO_KERNEL_NAMES, - 0, nullptr, &Size); - if (Res != CL_SUCCESS) - return cast(Res); - - std::string ClResult(Size, ' '); - Res = - clGetProgramInfo(cast(program), PI_PROGRAM_INFO_KERNEL_NAMES, - ClResult.size(), &ClResult[0], nullptr); - if (Res != CL_SUCCESS) - return cast(Res); - - // Get rid of the null terminator and search for kernel_name - // If function cannot be found return error code to indicate it - // exists - ClResult.pop_back(); - if (!is_in_separated_string(ClResult, ';', func_name)) - return PI_ERROR_INVALID_KERNEL_NAME; - - pi_ret_err = PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE; - - // If clGetDeviceFunctionPointer is in list of extensions - if (FuncT) { - pi_ret_err = cast(FuncT(cast(device), - cast(program), func_name, - function_pointer_ret)); - // GPU runtime sometimes returns PI_ERROR_INVALID_ARG_VALUE if func address - // cannot be found even if kernel exits. As the kernel does exist return - // that the address is not available - if (pi_ret_err == CL_INVALID_ARG_VALUE) { - *function_pointer_ret = 0; - return PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE; - } - } - return pi_ret_err; -} - pi_result piextUSMImport(const void *ptr, size_t size, pi_context context) { std::ignore = ptr; std::ignore = size; @@ -271,8 +82,8 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piDevicePartition, pi2ur::piDevicePartition) _PI_CL(piDeviceRetain, pi2ur::piDeviceRetain) _PI_CL(piDeviceRelease, pi2ur::piDeviceRelease) - _PI_CL(piextDeviceSelectBinary, piextDeviceSelectBinary) - _PI_CL(piextGetDeviceFunctionPointer, piextGetDeviceFunctionPointer) + _PI_CL(piextDeviceSelectBinary, pi2ur::piextDeviceSelectBinary) + _PI_CL(piextGetDeviceFunctionPointer, pi2ur::piextGetDeviceFunctionPointer) _PI_CL(piextDeviceGetNativeHandle, pi2ur::piextDeviceGetNativeHandle) _PI_CL(piextDeviceCreateWithNativeHandle, pi2ur::piextDeviceCreateWithNativeHandle) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 0a66f5cf60d6e..7823501547ea0 100755 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -250,9 +250,10 @@ if ("hip" IN_LIST SYCL_ENABLE_PLUGINS) endif() endif() -# Build OpenCL adapter -add_sycl_library("ur_adapter_opencl" SHARED - SOURCES +if ("opencl" IN_LIST SYCL_ENABLE_PLUGINS) + # Build OpenCL adapter + add_sycl_library("ur_adapter_opencl" SHARED + SOURCES "ur/ur.hpp" "ur/ur.cpp" "ur/adapters/opencl/common.cpp" @@ -274,13 +275,14 @@ add_sycl_library("ur_adapter_opencl" SHARED "ur/adapters/opencl/usm.cpp" "ur/adapters/opencl/command_buffer.hpp" "ur/adapters/opencl/command_buffer.cpp" - INCLUDE_DIRS + INCLUDE_DIRS ${sycl_inc_dir} - LIBRARIES + LIBRARIES UnifiedRuntime-Headers Threads::Threads OpenCL-ICD -) + ) +endif() if (TARGET UnifiedRuntimeLoader) set_target_properties(hello_world PROPERTIES EXCLUDE_FROM_ALL 1 EXCLUDE_FROM_DEFAULT_BUILD 1) diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp index 2fc200fbec92f..7d6ff8447e5da 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp @@ -1126,3 +1126,73 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary( + ur_device_handle_t hDevice, const ur_device_binary_t *pBinaries, + uint32_t NumBinaries, uint32_t *pSelectedBinary) { + // TODO: this is a bare-bones implementation for choosing a device image + // that would be compatible with the targeted device. An AOT-compiled + // image is preferred over SPIR-V for known devices (i.e. Intel devices) + // The implementation makes no effort to differentiate between multiple images + // for the given device, and simply picks the first one compatible + // Real implementation will use the same mechanism OpenCL ICD dispatcher + // uses. Something like: + // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_ERROR_INVALID_CONTEXT); + // return context->dispatch->piextDeviceSelectIR( + // ctx, images, num_images, selected_image); + // where context->dispatch is set to the dispatch table provided by PI + // plugin for platform/device the ctx was created for. + + // Choose the binary target for the provided device + const char *image_target = nullptr; + // Get the type of the device + cl_device_type device_type; + constexpr uint32_t invalid_ind = std::numeric_limits::max(); + cl_int ret_err = + clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, + sizeof(cl_device_type), &device_type, nullptr); + if (ret_err != CL_SUCCESS) { + *pSelectedBinary = invalid_ind; + CL_RETURN_ON_FAILURE(ret_err); + } + + switch (device_type) { + // TODO: Factor out vendor specifics into a separate source + // E.g. sycl/source/detail/vendor/intel/detail/pi_opencl.cpp? + + // We'll attempt to find an image that was AOT-compiled + // from a SPIR-V image into an image specific for: + + case CL_DEVICE_TYPE_CPU: // OpenCL 64-bit CPU + image_target = UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64; + break; + case CL_DEVICE_TYPE_GPU: // OpenCL 64-bit GEN GPU + image_target = UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; + break; + case CL_DEVICE_TYPE_ACCELERATOR: // OpenCL 64-bit FPGA + image_target = UR_DEVICE_BINARY_TARGET_SPIRV64_FPGA; + break; + default: + // Otherwise, we'll attempt to find and JIT-compile + // a device-independent SPIR-V image + image_target = UR_DEVICE_BINARY_TARGET_SPIRV64; + break; + } + + // Find the appropriate device image, fallback to spirv if not found + uint32_t fallback = invalid_ind; + for (uint32_t i = 0; i < NumBinaries; ++i) { + if (strcmp(pBinaries[i].pDeviceTargetSpec, image_target) == 0) { + *pSelectedBinary = i; + return UR_RESULT_SUCCESS; + } + if (strcmp(pBinaries[i].pDeviceTargetSpec, + UR_DEVICE_BINARY_TARGET_SPIRV64) == 0) + fallback = i; + } + // Points to a spirv image, if such indeed was found + if ((*pSelectedBinary = fallback) != invalid_ind) + return UR_RESULT_SUCCESS; + // No image can be loaded for the given device + return UR_RESULT_ERROR_INVALID_BINARY; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp index 7779c7d9eafae..2dc52f27651a9 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp @@ -349,3 +349,87 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( return UR_RESULT_SUCCESS; } + +// Function gets characters between delimeter's in str +// then checks if they are equal to the sub_str. +// returns true if there is at least one instance +// returns false if there are no instances of the name +static bool is_in_separated_string(const std::string &str, char delimiter, + const std::string &sub_str) { + size_t beg = 0; + size_t length = 0; + for (const auto &x : str) { + if (x == delimiter) { + if (str.substr(beg, length) == sub_str) + return true; + + beg += length + 1; + length = 0; + continue; + } + length++; + } + if (length != 0) + if (str.substr(beg, length) == sub_str) + return true; + + return false; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( + ur_device_handle_t hDevice, ur_program_handle_t hProgram, + const char *pFunctionName, void **ppFunctionPointer) { + cl_context CLContext = nullptr; + CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), + CL_PROGRAM_CONTEXT, sizeof(CLContext), + &CLContext, nullptr)); + + cl_ext::clGetDeviceFunctionPointer_fn FuncT = nullptr; + + UR_RETURN_ON_FAILURE( + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clGetDeviceFunctionPointerCache, + cl_ext::clGetDeviceFunctionPointerName, &FuncT)); + + // Check if kernel name exists, to prevent opencl runtime throwing exception + // with cpu runtime + // TODO: Use fallback search method if extension does not exist once CPU + // runtime no longer throws exceptions and prints messages when given + // unavailable functions. + *ppFunctionPointer = 0; + size_t Size; + CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), + PI_PROGRAM_INFO_KERNEL_NAMES, 0, + nullptr, &Size)); + + std::string ClResult(Size, ' '); + + CL_RETURN_ON_FAILURE(clGetProgramInfo( + cl_adapter::cast(hProgram), PI_PROGRAM_INFO_KERNEL_NAMES, + ClResult.size(), &ClResult[0], nullptr)); + + // Get rid of the null terminator and search for kernel_name + // If function cannot be found return error code to indicate it + // exists + ClResult.pop_back(); + if (!is_in_separated_string(ClResult, ';', pFunctionName)) + return UR_RESULT_ERROR_INVALID_KERNEL_NAME; + + ur_result_t ur_result = UR_RESULT_ERROR_INVALID_FUNCTION_NAME; + + // If clGetDeviceFunctionPointer is in list of extensions + if (FuncT) { + cl_int cl_result = + FuncT(cl_adapter::cast(hDevice), + cl_adapter::cast(hProgram), pFunctionName, + reinterpret_cast(ppFunctionPointer)); + // GPU runtime sometimes returns PI_ERROR_INVALID_ARG_VALUE if func address + // cannot be found even if kernel exits. As the kernel does exist return + // that the address is not available + if (cl_result == CL_INVALID_ARG_VALUE) { + *ppFunctionPointer = 0; + return UR_RESULT_ERROR_INVALID_FUNCTION_NAME; + } + } + return ur_result; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index 24ffd0c50e1cf..680c5bd240747 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -91,7 +91,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( pDdiTable->pfnCreateWithIL = urProgramCreateWithIL; pDdiTable->pfnCreateWithNativeHandle = urProgramCreateWithNativeHandle; pDdiTable->pfnGetBuildInfo = urProgramGetBuildInfo; - pDdiTable->pfnGetFunctionPointer = nullptr; + pDdiTable->pfnGetFunctionPointer = urProgramGetFunctionPointer; pDdiTable->pfnGetInfo = urProgramGetInfo; pDdiTable->pfnGetNativeHandle = urProgramGetNativeHandle; pDdiTable->pfnLink = urProgramLink; @@ -251,7 +251,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( pDdiTable->pfnPartition = urDevicePartition; pDdiTable->pfnRelease = urDeviceRelease; pDdiTable->pfnRetain = urDeviceRetain; - // pDdiTable->pfnSelectBinary = nullptr; + pDdiTable->pfnSelectBinary = urDeviceSelectBinary; return UR_RESULT_SUCCESS; } From 71200198af451d331265c381ad0927c2393955fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Thu, 6 Jul 2023 16:35:36 +0100 Subject: [PATCH 18/36] [SYCL][OpenCL] Cleanup error messages and remove urGetLastResult --- .../ur/adapters/opencl/common.cpp | 18 +++++------------- .../ur/adapters/opencl/common.hpp | 7 +++---- .../ur/adapters/opencl/context.cpp | 4 +--- .../ur/adapters/opencl/device.cpp | 3 --- .../ur/adapters/opencl/platform.cpp | 8 +++++--- 5 files changed, 14 insertions(+), 26 deletions(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp index 5845cda72da2e..5b090a38adae0 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp @@ -9,26 +9,18 @@ #include "common.hpp" namespace cl_adapter { -// Global variables for ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR -thread_local ur_result_t ErrorMessageCode = UR_RESULT_SUCCESS; -thread_local char ErrorMessage[cl_adapter::MaxMessageSize]; -// Utility function for setting a message and warning -[[maybe_unused]] void setErrorMessage(const char *message, - ur_result_t error_code) { +/* Global variables for urPlatformGetLastError() */ +thread_local int32_t ErrorMessageCode = 0; +thread_local char ErrorMessage[MaxMessageSize]; + +[[maybe_unused]] void setErrorMessage(const char *message, int32_t error_code) { assert(strlen(message) <= cl_adapter::MaxMessageSize); strcpy(cl_adapter::ErrorMessage, message); ErrorMessageCode = error_code; } } // namespace cl_adapter -// Returns plugin specific error and warning messages; common implementation -// that can be shared between adapters -ur_result_t urGetLastResult(ur_platform_handle_t, const char **ppMessage) { - *ppMessage = &cl_adapter::ErrorMessage[0]; - return cl_adapter::ErrorMessageCode; -} - ur_result_t map_cl_error_to_ur(cl_int result) { switch (result) { case CL_SUCCESS: diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp index e4d1d7c64513d..8bee23d515106 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp @@ -137,13 +137,15 @@ inline const OpenCLVersion V3_0(3, 0); namespace cl_adapter { constexpr size_t MaxMessageSize = 256; -extern thread_local ur_result_t ErrorMessageCode; +extern thread_local int32_t ErrorMessageCode; extern thread_local char ErrorMessage[MaxMessageSize]; // Utility function for setting a message and warning [[maybe_unused]] void setErrorMessage(const char *message, ur_result_t error_code); +[[noreturn]] void die(const char *Message); + template To cast(From value) { if constexpr (std::is_pointer_v) { @@ -157,9 +159,6 @@ template To cast(From value) { return static_cast(value); } } - -[[noreturn]] void die(const char *Message); - } // namespace cl_adapter namespace cl_ext { diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp index 577cef3d56a9b..590e6a03a4b77 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp @@ -88,9 +88,7 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { /* These queries should be dealt with in context_impl.cpp by calling the * queries of each device separately and building the intersection set. */ - cl_adapter::setErrorMessage("These queries should have never come here.", - UR_RESULT_ERROR_INVALID_ARGUMENT); - return UR_RESULT_ERROR_INVALID_ENUMERATION; + return UR_RESULT_ERROR_INVALID_ARGUMENT; } case UR_CONTEXT_INFO_NUM_DEVICES: case UR_CONTEXT_INFO_DEVICES: diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp index 7d6ff8447e5da..e679f063346ca 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp @@ -1108,9 +1108,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( ret_err = cl_adapter::getPlatformVersion(platform, platVer); if (platVer < OCLV::V2_1 || devVer < OCLV::V2_1) { - cl_adapter::setErrorMessage( - "OpenCL version for device and/or platform is less than 2.1", - UR_RESULT_ERROR_INVALID_OPERATION); return UR_RESULT_ERROR_INVALID_OPERATION; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp index ebea289cb8bd4..ce85efdd2e050 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp @@ -186,8 +186,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetLastError( ur_platform_handle_t hPlatform, const char **ppMessage, int32_t *pError) { + std::ignore = hPlatform; - std::ignore = ppMessage; - std::ignore = pError; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + *ppMessage = cl_adapter::ErrorMessage; + *pError = cl_adapter::ErrorMessageCode; + + return UR_RESULT_SUCCESS; } From 381356cade8bfdfdeac8b3a4ccd5b9b424c036a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Tue, 11 Jul 2023 12:55:45 +0000 Subject: [PATCH 19/36] [SYCL][OpenCL] Change variable naming to match LLVM Coding Style guidelines --- sycl/plugins/unified_runtime/pi2ur.hpp | 25 +- .../ur/adapters/opencl/common.cpp | 22 +- .../ur/adapters/opencl/common.hpp | 140 ++-- .../ur/adapters/opencl/context.cpp | 46 +- .../ur/adapters/opencl/context.hpp | 2 +- .../ur/adapters/opencl/device.cpp | 763 ++++++++---------- .../ur/adapters/opencl/device.hpp | 8 +- .../ur/adapters/opencl/enqueue.cpp | 50 +- .../ur/adapters/opencl/event.cpp | 55 +- .../ur/adapters/opencl/kernel.cpp | 172 ++-- .../ur/adapters/opencl/memory.cpp | 291 ++++--- .../ur/adapters/opencl/platform.cpp | 54 +- .../ur/adapters/opencl/platform.hpp | 4 +- .../ur/adapters/opencl/program.cpp | 231 +++--- .../ur/adapters/opencl/queue.cpp | 102 ++- .../ur/adapters/opencl/sampler.cpp | 72 +- .../adapters/opencl/ur_interface_loader.cpp | 108 +-- .../ur/adapters/opencl/usm.cpp | 34 +- 18 files changed, 1000 insertions(+), 1179 deletions(-) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 925dece418d36..c64237466ed49 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -343,7 +343,7 @@ inline pi_result ur2piDeviceInfoValue(ur_device_info_t ParamName, /* Helper function to perform conversions in-place */ ConvertHelper Value(ParamValueSize, ParamValue, ParamValueSizeRet); - pi_result error = PI_SUCCESS; + pi_result Error = PI_SUCCESS; if (ParamName == UR_DEVICE_INFO_TYPE) { auto ConvertFunc = [](ur_device_type_t UrValue) { switch (UrValue) { @@ -435,25 +435,24 @@ inline pi_result ur2piDeviceInfoValue(ur_device_info_t ParamName, sizeof(ur_device_partition_property_t), PI_ERROR_UNKNOWN); - const uint32_t ur_number_elements = + const uint32_t UrNumberElements = *ParamValueSizeRet / sizeof(ur_device_partition_property_t); if (ParamValue) { - auto param_value_copy = - std::make_unique( - ur_number_elements); - std::memcpy(param_value_copy.get(), ParamValue, - ur_number_elements * sizeof(ur_device_partition_property_t)); + auto ParamValueCopy = + std::make_unique(UrNumberElements); + std::memcpy(ParamValueCopy.get(), ParamValue, + UrNumberElements * sizeof(ur_device_partition_property_t)); pi_device_partition_property *pValuePI = reinterpret_cast(ParamValue); ur_device_partition_property_t *pValueUR = reinterpret_cast( - param_value_copy.get()); - const ur_device_partition_t type = pValueUR->type; - *pValuePI = ConvertFunc(type); + ParamValueCopy.get()); + const ur_device_partition_t Type = pValueUR->type; + *pValuePI = ConvertFunc(Type); ++pValuePI; - for (uint32_t i = 0; i < ur_number_elements; ++i) { + for (uint32_t i = 0; i < UrNumberElements; ++i) { switch (pValueUR->type) { case UR_DEVICE_PARTITION_EQUALLY: { *pValuePI = pValueUR->value.equally; @@ -480,7 +479,7 @@ inline pi_result ur2piDeviceInfoValue(ur_device_info_t ParamName, /* Add 2 extra elements to the return value (one for the type at the * beginning and another to terminate the array with a 0 */ *ParamValueSizeRet = - (ur_number_elements + 2) * sizeof(pi_device_partition_property); + (UrNumberElements + 2) * sizeof(pi_device_partition_property); } } @@ -610,7 +609,7 @@ inline pi_result ur2piDeviceInfoValue(ur_device_info_t ParamName, (int)ParamValueSize, (int)*ParamValueSizeRet); die("ur2piDeviceInfoValue: size mismatch"); } - return error; + return Error; } inline pi_result ur2piSamplerInfoValue(ur_sampler_info_t ParamName, diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp index 5b090a38adae0..b9b7fc1a820d1 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp @@ -14,15 +14,15 @@ namespace cl_adapter { thread_local int32_t ErrorMessageCode = 0; thread_local char ErrorMessage[MaxMessageSize]; -[[maybe_unused]] void setErrorMessage(const char *message, int32_t error_code) { - assert(strlen(message) <= cl_adapter::MaxMessageSize); - strcpy(cl_adapter::ErrorMessage, message); - ErrorMessageCode = error_code; +[[maybe_unused]] void setErrorMessage(const char *Message, int32_t ErrorCode) { + assert(strlen(Message) <= cl_adapter::MaxMessageSize); + strcpy(cl_adapter::ErrorMessage, Message); + ErrorMessageCode = ErrorCode; } } // namespace cl_adapter -ur_result_t map_cl_error_to_ur(cl_int result) { - switch (result) { +ur_result_t mapCLErrorToUR(cl_int Result) { + switch (Result) { case CL_SUCCESS: return UR_RESULT_SUCCESS; case CL_OUT_OF_HOST_MEMORY: @@ -70,13 +70,13 @@ void cl_adapter::die(const char *Message) { /// Common API for getting the native handle of a UR object /// -/// \param urObj is the UR object to get the native handle of -/// \param nativeHandle is a pointer to be set to the native handle +/// \param URObj is the UR object to get the native handle of +/// \param NativeHandle is a pointer to be set to the native handle /// /// UR_RESULT_SUCCESS -ur_result_t urGetNativeHandle(void *urObj, ur_native_handle_t *nativeHandle) { - UR_ASSERT(nativeHandle, UR_RESULT_ERROR_INVALID_NULL_POINTER) - *nativeHandle = reinterpret_cast(urObj); +ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle) { + UR_ASSERT(NativeHandle, UR_RESULT_ERROR_INVALID_NULL_POINTER) + *NativeHandle = reinterpret_cast(URObj); return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp index 8bee23d515106..35e346b7c5cd3 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp @@ -10,8 +10,8 @@ #include #include #include -#include #include +#include #include /** @@ -20,7 +20,7 @@ */ #define CL_RETURN_ON_FAILURE(clCall) \ if (const cl_int cl_result_macro = clCall; cl_result_macro != CL_SUCCESS) { \ - return map_cl_error_to_ur(cl_result_macro); \ + return mapCLErrorToUR(cl_result_macro); \ } /** @@ -43,10 +43,10 @@ if (outPtr != nullptr) { \ *outPtr = nullptr; \ } \ - return map_cl_error_to_ur(cl_result_macro); \ + return mapCLErrorToUR(cl_result_macro); \ } -namespace OCLV { +namespace oclv { class OpenCLVersion { protected: unsigned int ocl_major; @@ -55,28 +55,28 @@ class OpenCLVersion { public: OpenCLVersion() : ocl_major(0), ocl_minor(0) {} - OpenCLVersion(unsigned int ocl_major, unsigned int ocl_minor) - : ocl_major(ocl_major), ocl_minor(ocl_minor) { + OpenCLVersion(unsigned int OclMajor, unsigned int OclMinor) + : ocl_major(OclMajor), ocl_minor(OclMinor) { if (!isValid()) { - ocl_major = ocl_minor = 0; + OclMajor = OclMinor = 0; } } - OpenCLVersion(const char *version) : OpenCLVersion(std::string(version)) {} + OpenCLVersion(const char *Version) : OpenCLVersion(std::string(Version)) {} - OpenCLVersion(const std::string &version) : ocl_major(0), ocl_minor(0) { + OpenCLVersion(const std::string &Version) : ocl_major(0), ocl_minor(0) { /* The OpenCL specification defines the full version string as * 'OpenCL' for platforms and as * 'OpenCL' for devices. */ - std::regex rx("OpenCL ([0-9]+)\\.([0-9]+)"); - std::smatch match; + std::regex Rx("OpenCL ([0-9]+)\\.([0-9]+)"); + std::smatch Match; - if (std::regex_search(version, match, rx) && (match.size() == 3)) { - ocl_major = strtoul(match[1].str().c_str(), nullptr, 10); - ocl_minor = strtoul(match[2].str().c_str(), nullptr, 10); + if (std::regex_search(Version, Match, Rx) && (Match.size() == 3)) { + ocl_major = strtoul(Match[1].str().c_str(), nullptr, 10); + ocl_minor = strtoul(Match[2].str().c_str(), nullptr, 10); if (!isValid()) { ocl_major = ocl_minor = 0; @@ -84,27 +84,27 @@ class OpenCLVersion { } } - bool operator==(const OpenCLVersion &v) const { - return ocl_major == v.ocl_major && ocl_minor == v.ocl_minor; + bool operator==(const OpenCLVersion &V) const { + return ocl_major == V.ocl_major && ocl_minor == V.ocl_minor; } - bool operator!=(const OpenCLVersion &v) const { return !(*this == v); } + bool operator!=(const OpenCLVersion &V) const { return !(*this == V); } - bool operator<(const OpenCLVersion &v) const { - if (ocl_major == v.ocl_major) - return ocl_minor < v.ocl_minor; + bool operator<(const OpenCLVersion &V) const { + if (ocl_major == V.ocl_major) + return ocl_minor < V.ocl_minor; - return ocl_major < v.ocl_major; + return ocl_major < V.ocl_major; } - bool operator>(const OpenCLVersion &v) const { return v < *this; } + bool operator>(const OpenCLVersion &V) const { return V < *this; } - bool operator<=(const OpenCLVersion &v) const { - return (*this < v) || (*this == v); + bool operator<=(const OpenCLVersion &V) const { + return (*this < V) || (*this == V); } - bool operator>=(const OpenCLVersion &v) const { - return (*this > v) || (*this == v); + bool operator>=(const OpenCLVersion &V) const { + return (*this > V) || (*this == V); } bool isValid() const { @@ -133,7 +133,7 @@ inline const OpenCLVersion V2_1(2, 1); inline const OpenCLVersion V2_2(2, 2); inline const OpenCLVersion V3_0(3, 0); -} // namespace OCLV +} // namespace oclv namespace cl_adapter { constexpr size_t MaxMessageSize = 256; @@ -141,22 +141,22 @@ extern thread_local int32_t ErrorMessageCode; extern thread_local char ErrorMessage[MaxMessageSize]; // Utility function for setting a message and warning -[[maybe_unused]] void setErrorMessage(const char *message, - ur_result_t error_code); +[[maybe_unused]] void setErrorMessage(const char *Message, + ur_result_t ErrorCode); [[noreturn]] void die(const char *Message); -template To cast(From value) { +template To cast(From Value) { if constexpr (std::is_pointer_v) { static_assert(std::is_pointer_v == std::is_pointer_v, "Cast failed pointer check"); - return reinterpret_cast(value); + return reinterpret_cast(Value); } else { static_assert(sizeof(From) == sizeof(To), "Cast failed size check"); static_assert(std::is_signed_v == std::is_signed_v, "Cast failed sign check"); - return static_cast(value); + return static_cast(Value); } } } // namespace cl_adapter @@ -170,27 +170,27 @@ namespace cl_ext { #endif // Names of USM functions that are queried from OpenCL -CONSTFIX char clHostMemAllocName[] = "clHostMemAllocINTEL"; -CONSTFIX char clDeviceMemAllocName[] = "clDeviceMemAllocINTEL"; -CONSTFIX char clSharedMemAllocName[] = "clSharedMemAllocINTEL"; -CONSTFIX char clMemBlockingFreeName[] = "clMemBlockingFreeINTEL"; -CONSTFIX char clCreateBufferWithPropertiesName[] = +CONSTFIX char HostMemAllocName[] = "clHostMemAllocINTEL"; +CONSTFIX char DeviceMemAllocName[] = "clDeviceMemAllocINTEL"; +CONSTFIX char SharedMemAllocName[] = "clSharedMemAllocINTEL"; +CONSTFIX char MemBlockingFreeName[] = "clMemBlockingFreeINTEL"; +CONSTFIX char CreateBufferWithPropertiesName[] = "clCreateBufferWithPropertiesINTEL"; -CONSTFIX char clSetKernelArgMemPointerName[] = "clSetKernelArgMemPointerINTEL"; -CONSTFIX char clEnqueueMemFillName[] = "clEnqueueMemFillINTEL"; -CONSTFIX char clEnqueueMemcpyName[] = "clEnqueueMemcpyINTEL"; -CONSTFIX char clGetMemAllocInfoName[] = "clGetMemAllocInfoINTEL"; -CONSTFIX char clSetProgramSpecializationConstantName[] = +CONSTFIX char SetKernelArgMemPointerName[] = "clSetKernelArgMemPointerINTEL"; +CONSTFIX char EnqueueMemFillName[] = "clEnqueueMemFillINTEL"; +CONSTFIX char EnqueueMemcpyName[] = "clEnqueueMemcpyINTEL"; +CONSTFIX char GetMemAllocInfoName[] = "clGetMemAllocInfoINTEL"; +CONSTFIX char SetProgramSpecializationConstantName[] = "clSetProgramSpecializationConstant"; -CONSTFIX char clGetDeviceFunctionPointerName[] = +CONSTFIX char GetDeviceFunctionPointerName[] = "clGetDeviceFunctionPointerINTEL"; -CONSTFIX char clEnqueueWriteGlobalVariableName[] = +CONSTFIX char EnqueueWriteGlobalVariableName[] = "clEnqueueWriteGlobalVariableINTEL"; -CONSTFIX char clEnqueueReadGlobalVariableName[] = +CONSTFIX char EnqueueReadGlobalVariableName[] = "clEnqueueReadGlobalVariableINTEL"; // Names of host pipe functions queried from OpenCL -CONSTFIX char clEnqueueReadHostPipeName[] = "clEnqueueReadHostPipeINTEL"; -CONSTFIX char clEnqueueWriteHostPipeName[] = "clEnqueueWriteHostPipeINTEL"; +CONSTFIX char EnqueueReadHostPipeName[] = "clEnqueueReadHostPipeINTEL"; +CONSTFIX char EnqueueWriteHostPipeName[] = "clEnqueueWriteHostPipeINTEL"; #undef CONSTFIX @@ -263,64 +263,64 @@ inline ExtFuncPtrCacheT *ExtFuncPtrCache; // USM helper function to get an extension function pointer template -static ur_result_t getExtFuncFromContext(cl_context context, +static ur_result_t getExtFuncFromContext(cl_context Context, FuncPtrCache &FPtrCache, - const char *FuncName, T *fptr) { + const char *FuncName, T *Fptr) { // TODO // Potentially redo caching as UR interface changes. // if cached, return cached FuncPtr std::lock_guard CacheLock{FPtrCache.Mutex}; std::map &FPtrMap = FPtrCache.Map; - auto It = FPtrMap.find(context); + auto It = FPtrMap.find(Context); if (It != FPtrMap.end()) { auto F = It->second; // if cached that extension is not available return nullptr and // UR_RESULT_ERROR_INVALID_VALUE - *fptr = F; + *Fptr = F; return F ? UR_RESULT_SUCCESS : UR_RESULT_ERROR_INVALID_VALUE; } - cl_uint deviceCount; - cl_int ret_err = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, - sizeof(cl_uint), &deviceCount, nullptr); + cl_uint DeviceCount; + cl_int RetErr = clGetContextInfo(Context, CL_CONTEXT_NUM_DEVICES, + sizeof(cl_uint), &DeviceCount, nullptr); - if (ret_err != CL_SUCCESS || deviceCount < 1) { + if (RetErr != CL_SUCCESS || DeviceCount < 1) { return UR_RESULT_ERROR_INVALID_CONTEXT; } - std::vector devicesInCtx(deviceCount); - ret_err = clGetContextInfo(context, CL_CONTEXT_DEVICES, - deviceCount * sizeof(cl_device_id), - devicesInCtx.data(), nullptr); + std::vector DevicesInCtx(DeviceCount); + RetErr = clGetContextInfo(Context, CL_CONTEXT_DEVICES, + DeviceCount * sizeof(cl_device_id), + DevicesInCtx.data(), nullptr); - if (ret_err != CL_SUCCESS) { + if (RetErr != CL_SUCCESS) { return UR_RESULT_ERROR_INVALID_CONTEXT; } - cl_platform_id curPlatform; - ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &curPlatform, nullptr); + cl_platform_id CurPlatform; + RetErr = clGetDeviceInfo(DevicesInCtx[0], CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &CurPlatform, nullptr); - if (ret_err != CL_SUCCESS) { + if (RetErr != CL_SUCCESS) { return UR_RESULT_ERROR_INVALID_CONTEXT; } T FuncPtr = - (T)clGetExtensionFunctionAddressForPlatform(curPlatform, FuncName); + (T)clGetExtensionFunctionAddressForPlatform(CurPlatform, FuncName); if (!FuncPtr) { // Cache that the extension is not available - FPtrMap[context] = nullptr; + FPtrMap[Context] = nullptr; return UR_RESULT_ERROR_INVALID_VALUE; } - *fptr = FuncPtr; - FPtrMap[context] = FuncPtr; + *Fptr = FuncPtr; + FPtrMap[Context] = FuncPtr; return UR_RESULT_SUCCESS; } } // namespace cl_ext -ur_result_t map_cl_error_to_ur(cl_int result); +ur_result_t mapCLErrorToUR(cl_int Result); -ur_result_t urGetNativeHandle(void *urObj, ur_native_handle_t *nativeHandle); +ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp index 590e6a03a4b77..8021f6cb45cfb 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp @@ -12,22 +12,22 @@ ur_result_t cl_adapter::getDevicesFromContext( ur_context_handle_t hContext, - std::unique_ptr> &devicesInCtx) { + std::unique_ptr> &DevicesInCtx) { - cl_uint deviceCount; + cl_uint DeviceCount; CL_RETURN_ON_FAILURE(clGetContextInfo(cl_adapter::cast(hContext), CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), - &deviceCount, nullptr)); + &DeviceCount, nullptr)); - if (deviceCount < 1) { + if (DeviceCount < 1) { return UR_RESULT_ERROR_INVALID_CONTEXT; } - devicesInCtx = std::make_unique>(deviceCount); + DevicesInCtx = std::make_unique>(DeviceCount); CL_RETURN_ON_FAILURE(clGetContextInfo( cl_adapter::cast(hContext), CL_CONTEXT_DEVICES, - deviceCount * sizeof(cl_device_id), (*devicesInCtx).data(), nullptr)); + DeviceCount * sizeof(cl_device_id), (*DevicesInCtx).data(), nullptr)); return UR_RESULT_SUCCESS; } @@ -39,33 +39,33 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( UR_ASSERT(phDevices, UR_RESULT_ERROR_INVALID_NULL_POINTER); UR_ASSERT(phContext, UR_RESULT_ERROR_INVALID_NULL_POINTER); - cl_int ret; + cl_int Ret; *phContext = cl_adapter::cast( clCreateContext(nullptr, cl_adapter::cast(DeviceCount), cl_adapter::cast(phDevices), - nullptr, nullptr, cl_adapter::cast(&ret))); + nullptr, nullptr, cl_adapter::cast(&Ret))); - return map_cl_error_to_ur(ret); + return mapCLErrorToUR(Ret); } -static cl_int map_ur_context_info_to_cl(ur_context_info_t urPropName) { +static cl_int mapURContextInfoToCL(ur_context_info_t URPropName) { - cl_int cl_propName; - switch (urPropName) { + cl_int CLPropName; + switch (URPropName) { case UR_CONTEXT_INFO_NUM_DEVICES: - cl_propName = CL_CONTEXT_NUM_DEVICES; + CLPropName = CL_CONTEXT_NUM_DEVICES; break; case UR_CONTEXT_INFO_DEVICES: - cl_propName = CL_CONTEXT_DEVICES; + CLPropName = CL_CONTEXT_DEVICES; break; case UR_CONTEXT_INFO_REFERENCE_COUNT: - cl_propName = CL_CONTEXT_REFERENCE_COUNT; + CLPropName = CL_CONTEXT_REFERENCE_COUNT; break; default: - cl_propName = -1; + CLPropName = -1; } - return cl_propName; + return CLPropName; } UR_APIEXPORT ur_result_t UR_APICALL @@ -74,7 +74,7 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - const cl_int cl_propName = map_ur_context_info_to_cl(propName); + const cl_int CLPropName = mapURContextInfoToCL(propName); switch (static_cast(propName)) { /* 2D USM memops are not supported. */ @@ -95,7 +95,7 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, case UR_CONTEXT_INFO_REFERENCE_COUNT: { CL_RETURN_ON_FAILURE( - clGetContextInfo(cl_adapter::cast(hContext), cl_propName, + clGetContextInfo(cl_adapter::cast(hContext), CLPropName, propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } @@ -108,16 +108,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextRelease(ur_context_handle_t hContext) { UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_int ret = clReleaseContext(cl_adapter::cast(hContext)); - return map_cl_error_to_ur(ret); + cl_int Ret = clReleaseContext(cl_adapter::cast(hContext)); + return mapCLErrorToUR(Ret); } UR_APIEXPORT ur_result_t UR_APICALL urContextRetain(ur_context_handle_t hContext) { UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_int ret = clRetainContext(cl_adapter::cast(hContext)); - return map_cl_error_to_ur(ret); + cl_int Ret = clRetainContext(cl_adapter::cast(hContext)); + return mapCLErrorToUR(Ret); } UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp index 1ce55c81e748b..2964ca20e7268 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp @@ -15,5 +15,5 @@ namespace cl_adapter { ur_result_t getDevicesFromContext(ur_context_handle_t hContext, - std::unique_ptr> &devicesInCtx); + std::unique_ptr> &DevicesInCtx); } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp index e679f063346ca..f623696c7f532 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp @@ -13,19 +13,19 @@ #include #include -ur_result_t cl_adapter::getDeviceVersion(cl_device_id dev, - OCLV::OpenCLVersion &version) { +ur_result_t cl_adapter::getDeviceVersion(cl_device_id Dev, + oclv::OpenCLVersion &Version) { - size_t devVerSize = 0; + size_t DevVerSize = 0; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(dev, CL_DEVICE_VERSION, 0, nullptr, &devVerSize)); + clGetDeviceInfo(Dev, CL_DEVICE_VERSION, 0, nullptr, &DevVerSize)); - std::string devVer(devVerSize, '\0'); - CL_RETURN_ON_FAILURE(clGetDeviceInfo(dev, CL_DEVICE_VERSION, devVerSize, - devVer.data(), nullptr)); + std::string DevVer(DevVerSize, '\0'); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_VERSION, DevVerSize, + DevVer.data(), nullptr)); - version = OCLV::OpenCLVersion(devVer); - if (!version.isValid()) { + Version = oclv::OpenCLVersion(DevVer); + if (!Version.isValid()) { return UR_RESULT_ERROR_INVALID_DEVICE; } @@ -33,19 +33,19 @@ ur_result_t cl_adapter::getDeviceVersion(cl_device_id dev, } ur_result_t cl_adapter::checkDeviceExtensions( - cl_device_id dev, const std::vector &exts, bool &supported) { - size_t extSize = 0; + cl_device_id Dev, const std::vector &Exts, bool &Supported) { + size_t ExtSize = 0; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &extSize)); + clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); - std::string extStr(extSize, '\0'); + std::string ExtStr(ExtSize, '\0'); - CL_RETURN_ON_FAILURE(clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, extSize, - extStr.data(), nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize, + ExtStr.data(), nullptr)); - supported = true; - for (const std::string &ext : exts) { - if (!(supported = (extStr.find(ext) != std::string::npos))) { + Supported = true; + for (const std::string &Ext : Exts) { + if (!(Supported = (ExtStr.find(Ext) != std::string::npos))) { break; } } @@ -59,341 +59,253 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, ur_device_handle_t *phDevices, uint32_t *pNumDevices) { - cl_device_type type; + cl_device_type Type; switch (DeviceType) { case UR_DEVICE_TYPE_ALL: - type = CL_DEVICE_TYPE_ALL; + Type = CL_DEVICE_TYPE_ALL; break; case UR_DEVICE_TYPE_GPU: - type = CL_DEVICE_TYPE_GPU; + Type = CL_DEVICE_TYPE_GPU; break; case UR_DEVICE_TYPE_CPU: - type = CL_DEVICE_TYPE_CPU; + Type = CL_DEVICE_TYPE_CPU; break; case UR_DEVICE_TYPE_FPGA: case UR_DEVICE_TYPE_MCA: case UR_DEVICE_TYPE_VPU: - type = CL_DEVICE_TYPE_ACCELERATOR; + Type = CL_DEVICE_TYPE_ACCELERATOR; break; case UR_DEVICE_TYPE_DEFAULT: - type = UR_DEVICE_TYPE_DEFAULT; + Type = UR_DEVICE_TYPE_DEFAULT; break; default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } - cl_int result = clGetDeviceIDs(cl_adapter::cast(hPlatform), - type, cl_adapter::cast(NumEntries), + cl_int Result = clGetDeviceIDs(cl_adapter::cast(hPlatform), + Type, cl_adapter::cast(NumEntries), cl_adapter::cast(phDevices), cl_adapter::cast(pNumDevices)); // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in num_devices - if (result == CL_DEVICE_NOT_FOUND) { - result = CL_SUCCESS; + if (Result == CL_DEVICE_NOT_FOUND) { + Result = CL_SUCCESS; if (pNumDevices) { *pNumDevices = 0; } } - return map_cl_error_to_ur(result); + return mapCLErrorToUR(Result); } static ur_device_fp_capability_flags_t -map_ur_cl_device_fp_config_to_ur(cl_device_fp_config cl_value) { +mapCLDeviceFpConfigToUR(cl_device_fp_config CLValue) { - ur_device_fp_capability_flags_t ur_value = 0; - if (cl_value & CL_FP_DENORM) { - ur_value |= UR_DEVICE_FP_CAPABILITY_FLAG_DENORM; + ur_device_fp_capability_flags_t URValue = 0; + if (CLValue & CL_FP_DENORM) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_DENORM; } - if (cl_value & CL_FP_INF_NAN) { - ur_value |= UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN; + if (CLValue & CL_FP_INF_NAN) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN; } - if (cl_value & CL_FP_ROUND_TO_NEAREST) { - ur_value |= UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST; + if (CLValue & CL_FP_ROUND_TO_NEAREST) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST; } - if (cl_value & CL_FP_ROUND_TO_ZERO) { - ur_value |= UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO; + if (CLValue & CL_FP_ROUND_TO_ZERO) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO; } - if (cl_value & CL_FP_ROUND_TO_INF) { - ur_value |= UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF; + if (CLValue & CL_FP_ROUND_TO_INF) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF; } - if (cl_value & CL_FP_FMA) { - ur_value |= UR_DEVICE_FP_CAPABILITY_FLAG_FMA; + if (CLValue & CL_FP_FMA) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_FMA; } - if (cl_value & CL_FP_SOFT_FLOAT) { - ur_value |= UR_DEVICE_FP_CAPABILITY_FLAG_SOFT_FLOAT; + if (CLValue & CL_FP_SOFT_FLOAT) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_SOFT_FLOAT; } - if (cl_value & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) { - ur_value |= UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; + if (CLValue & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; } - return ur_value; + return URValue; } -static cl_int map_ur_device_info_to_cl(ur_device_info_t urPropName) { +static cl_int mapURDeviceInfoToCL(ur_device_info_t URPropName) { - cl_int cl_propName; - switch (static_cast(urPropName)) { + switch (static_cast(URPropName)) { case UR_DEVICE_INFO_TYPE: - cl_propName = CL_DEVICE_TYPE; - break; + return CL_DEVICE_TYPE; case UR_DEVICE_INFO_PARENT_DEVICE: - cl_propName = CL_DEVICE_PARENT_DEVICE; - break; + return CL_DEVICE_PARENT_DEVICE; case UR_DEVICE_INFO_PLATFORM: - cl_propName = CL_DEVICE_PLATFORM; - break; + return CL_DEVICE_PLATFORM; case UR_DEVICE_INFO_VENDOR_ID: - cl_propName = CL_DEVICE_VENDOR_ID; - break; + return CL_DEVICE_VENDOR_ID; case UR_DEVICE_INFO_EXTENSIONS: - cl_propName = CL_DEVICE_EXTENSIONS; - break; + return CL_DEVICE_EXTENSIONS; case UR_DEVICE_INFO_NAME: - cl_propName = CL_DEVICE_NAME; - break; + return CL_DEVICE_NAME; case UR_DEVICE_INFO_COMPILER_AVAILABLE: - cl_propName = CL_DEVICE_COMPILER_AVAILABLE; - break; + return CL_DEVICE_COMPILER_AVAILABLE; case UR_DEVICE_INFO_LINKER_AVAILABLE: - cl_propName = CL_DEVICE_LINKER_AVAILABLE; - break; + return CL_DEVICE_LINKER_AVAILABLE; case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: - cl_propName = CL_DEVICE_MAX_COMPUTE_UNITS; - break; + return CL_DEVICE_MAX_COMPUTE_UNITS; case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: - cl_propName = CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS; - break; + return CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS; case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE: - cl_propName = CL_DEVICE_MAX_WORK_GROUP_SIZE; - break; + return CL_DEVICE_MAX_WORK_GROUP_SIZE; case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: - cl_propName = CL_DEVICE_MAX_WORK_ITEM_SIZES; - break; + return CL_DEVICE_MAX_WORK_ITEM_SIZES; case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: - cl_propName = CL_DEVICE_MAX_CLOCK_FREQUENCY; - break; + return CL_DEVICE_MAX_CLOCK_FREQUENCY; case UR_DEVICE_INFO_ADDRESS_BITS: - cl_propName = CL_DEVICE_ADDRESS_BITS; - break; + return CL_DEVICE_ADDRESS_BITS; case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: - cl_propName = CL_DEVICE_MAX_MEM_ALLOC_SIZE; - break; + return CL_DEVICE_MAX_MEM_ALLOC_SIZE; case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: - cl_propName = CL_DEVICE_GLOBAL_MEM_SIZE; - break; + return CL_DEVICE_GLOBAL_MEM_SIZE; case UR_DEVICE_INFO_LOCAL_MEM_SIZE: - cl_propName = CL_DEVICE_LOCAL_MEM_SIZE; - break; + return CL_DEVICE_LOCAL_MEM_SIZE; case UR_DEVICE_INFO_IMAGE_SUPPORTED: - cl_propName = CL_DEVICE_IMAGE_SUPPORT; - break; + return CL_DEVICE_IMAGE_SUPPORT; case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: - cl_propName = CL_DEVICE_HOST_UNIFIED_MEMORY; - break; + return CL_DEVICE_HOST_UNIFIED_MEMORY; case UR_DEVICE_INFO_AVAILABLE: - cl_propName = CL_DEVICE_AVAILABLE; - break; + return CL_DEVICE_AVAILABLE; case UR_DEVICE_INFO_VENDOR: - cl_propName = CL_DEVICE_VENDOR; - break; + return CL_DEVICE_VENDOR; case UR_DEVICE_INFO_DRIVER_VERSION: - cl_propName = CL_DRIVER_VERSION; - break; + return CL_DRIVER_VERSION; case UR_DEVICE_INFO_VERSION: - cl_propName = CL_DEVICE_VERSION; - break; + return CL_DEVICE_VERSION; case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: - cl_propName = CL_DEVICE_PARTITION_MAX_SUB_DEVICES; - break; + return CL_DEVICE_PARTITION_MAX_SUB_DEVICES; case UR_DEVICE_INFO_REFERENCE_COUNT: - cl_propName = CL_DEVICE_REFERENCE_COUNT; - break; + return CL_DEVICE_REFERENCE_COUNT; case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: - cl_propName = CL_DEVICE_PARTITION_PROPERTIES; - break; + return CL_DEVICE_PARTITION_PROPERTIES; case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: - cl_propName = CL_DEVICE_PARTITION_AFFINITY_DOMAIN; - break; + return CL_DEVICE_PARTITION_AFFINITY_DOMAIN; case UR_DEVICE_INFO_PARTITION_TYPE: - cl_propName = CL_DEVICE_PARTITION_TYPE; - break; + return CL_DEVICE_PARTITION_TYPE; case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: - cl_propName = CL_DEVICE_OPENCL_C_VERSION; - break; + return CL_DEVICE_OPENCL_C_VERSION; case UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: - cl_propName = CL_DEVICE_PREFERRED_INTEROP_USER_SYNC; - break; + return CL_DEVICE_PREFERRED_INTEROP_USER_SYNC; case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: - cl_propName = CL_DEVICE_PRINTF_BUFFER_SIZE; - break; + return CL_DEVICE_PRINTF_BUFFER_SIZE; case UR_DEVICE_INFO_PROFILE: - cl_propName = CL_DEVICE_PROFILE; - break; + return CL_DEVICE_PROFILE; case UR_DEVICE_INFO_BUILT_IN_KERNELS: - cl_propName = CL_DEVICE_BUILT_IN_KERNELS; - break; + return CL_DEVICE_BUILT_IN_KERNELS; case UR_DEVICE_INFO_QUEUE_PROPERTIES: - cl_propName = CL_DEVICE_QUEUE_PROPERTIES; - break; + return CL_DEVICE_QUEUE_PROPERTIES; case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: - cl_propName = CL_DEVICE_QUEUE_ON_HOST_PROPERTIES; - break; + return CL_DEVICE_QUEUE_ON_HOST_PROPERTIES; case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: - cl_propName = CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES; - break; + return CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES; case UR_DEVICE_INFO_EXECUTION_CAPABILITIES: - cl_propName = CL_DEVICE_EXECUTION_CAPABILITIES; - break; + return CL_DEVICE_EXECUTION_CAPABILITIES; case UR_DEVICE_INFO_ENDIAN_LITTLE: - cl_propName = CL_DEVICE_ENDIAN_LITTLE; - break; + return CL_DEVICE_ENDIAN_LITTLE; case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: - cl_propName = CL_DEVICE_ERROR_CORRECTION_SUPPORT; - break; + return CL_DEVICE_ERROR_CORRECTION_SUPPORT; case UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: - cl_propName = CL_DEVICE_PROFILING_TIMER_RESOLUTION; - break; + return CL_DEVICE_PROFILING_TIMER_RESOLUTION; case UR_DEVICE_INFO_LOCAL_MEM_TYPE: - cl_propName = CL_DEVICE_LOCAL_MEM_TYPE; - break; + return CL_DEVICE_LOCAL_MEM_TYPE; case UR_DEVICE_INFO_MAX_CONSTANT_ARGS: - cl_propName = CL_DEVICE_MAX_CONSTANT_ARGS; - break; + return CL_DEVICE_MAX_CONSTANT_ARGS; case UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: - cl_propName = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE; - break; + return CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE; case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: - cl_propName = CL_DEVICE_GLOBAL_MEM_CACHE_TYPE; - break; + return CL_DEVICE_GLOBAL_MEM_CACHE_TYPE; case UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: - cl_propName = CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE; - break; + return CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE; case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: - cl_propName = CL_DEVICE_GLOBAL_MEM_CACHE_SIZE; - break; + return CL_DEVICE_GLOBAL_MEM_CACHE_SIZE; case UR_DEVICE_INFO_MAX_PARAMETER_SIZE: - cl_propName = CL_DEVICE_MAX_PARAMETER_SIZE; - break; + return CL_DEVICE_MAX_PARAMETER_SIZE; case UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: - cl_propName = CL_DEVICE_MEM_BASE_ADDR_ALIGN; - break; + return CL_DEVICE_MEM_BASE_ADDR_ALIGN; case UR_DEVICE_INFO_MAX_SAMPLERS: - cl_propName = CL_DEVICE_MAX_SAMPLERS; - break; + return CL_DEVICE_MAX_SAMPLERS; case UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS: - cl_propName = CL_DEVICE_MAX_READ_IMAGE_ARGS; - break; + return CL_DEVICE_MAX_READ_IMAGE_ARGS; case UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: - cl_propName = CL_DEVICE_MAX_WRITE_IMAGE_ARGS; - break; + return CL_DEVICE_MAX_WRITE_IMAGE_ARGS; case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS: - cl_propName = CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS; - break; + return CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS; case UR_DEVICE_INFO_SINGLE_FP_CONFIG: - cl_propName = CL_DEVICE_SINGLE_FP_CONFIG; - break; + return CL_DEVICE_SINGLE_FP_CONFIG; case UR_DEVICE_INFO_HALF_FP_CONFIG: - cl_propName = CL_DEVICE_HALF_FP_CONFIG; - break; + return CL_DEVICE_HALF_FP_CONFIG; case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: - cl_propName = CL_DEVICE_DOUBLE_FP_CONFIG; - break; + return CL_DEVICE_DOUBLE_FP_CONFIG; case UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH: - cl_propName = CL_DEVICE_IMAGE2D_MAX_WIDTH; - break; + return CL_DEVICE_IMAGE2D_MAX_WIDTH; case UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: - cl_propName = CL_DEVICE_IMAGE2D_MAX_HEIGHT; - break; + return CL_DEVICE_IMAGE2D_MAX_HEIGHT; case UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH: - cl_propName = CL_DEVICE_IMAGE3D_MAX_WIDTH; - break; + return CL_DEVICE_IMAGE3D_MAX_WIDTH; case UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: - cl_propName = CL_DEVICE_IMAGE3D_MAX_HEIGHT; - break; + return CL_DEVICE_IMAGE3D_MAX_HEIGHT; case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH: - cl_propName = CL_DEVICE_IMAGE3D_MAX_DEPTH; - break; + return CL_DEVICE_IMAGE3D_MAX_DEPTH; case UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: - cl_propName = CL_DEVICE_IMAGE_MAX_BUFFER_SIZE; - break; + return CL_DEVICE_IMAGE_MAX_BUFFER_SIZE; case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: - cl_propName = CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR; - break; + return CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR; case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: - cl_propName = CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR; - break; + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR; case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT: - cl_propName = CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT; - break; + return CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT; case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT: - cl_propName = CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT; - break; + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT; case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT: - cl_propName = CL_DEVICE_NATIVE_VECTOR_WIDTH_INT; - break; + return CL_DEVICE_NATIVE_VECTOR_WIDTH_INT; case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT: - cl_propName = CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT; - break; + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT; case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG: - cl_propName = CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG; - break; + return CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG; case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG: - cl_propName = CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG; - break; + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG; case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT: - cl_propName = CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT; - break; + return CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT; case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT: - cl_propName = CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT; - break; + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT; case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: - cl_propName = CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE; - break; + return CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE; case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: - cl_propName = CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE; - break; + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE; case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: - cl_propName = CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF; - break; + return CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF; case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: - cl_propName = CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF; - break; + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF; case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: - cl_propName = CL_DEVICE_MAX_NUM_SUB_GROUPS; - break; + return CL_DEVICE_MAX_NUM_SUB_GROUPS; case UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: - cl_propName = CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS; - break; + return CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS; case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: - cl_propName = CL_DEVICE_SUB_GROUP_SIZES_INTEL; - break; + return CL_DEVICE_SUB_GROUP_SIZES_INTEL; case UR_DEVICE_INFO_IL_VERSION: - cl_propName = CL_DEVICE_IL_VERSION; - break; + return CL_DEVICE_IL_VERSION; case UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: - cl_propName = CL_DEVICE_IMAGE_MAX_ARRAY_SIZE; - break; + return CL_DEVICE_IMAGE_MAX_ARRAY_SIZE; case UR_DEVICE_INFO_USM_HOST_SUPPORT: - cl_propName = CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL; - break; + return CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL; case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: - cl_propName = CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL; - break; + return CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL; case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: - cl_propName = CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL; - break; + return CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL; case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: - cl_propName = CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL; - break; + return CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL; case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: - cl_propName = CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL; - break; + return CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL; default: - cl_propName = -1; + return -1; } - - return cl_propName; } UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, @@ -405,112 +317,111 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - const cl_device_info cl_propName = map_ur_device_info_to_cl(propName); + const cl_device_info CLPropName = mapURDeviceInfoToCL(propName); /* TODO UR: Casting to uint32_t to silence warnings due to some values not * being part of the enum. Can be removed once all UR_EXT enums are promoted * to UR */ switch (static_cast(propName)) { case UR_DEVICE_INFO_TYPE: { - cl_device_type cl_type; + cl_device_type CLType; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, - sizeof(cl_device_type), &cl_type, nullptr)); + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + sizeof(cl_device_type), &CLType, nullptr)); /* TODO UR: If the device is an Accelerator (FPGA, VPU, etc.), there is not * enough information in the OpenCL runtime to know exactly which type it * is. Assuming FPGA for now */ /* TODO UR: In OpenCL, a device can have multiple types (e.g. CPU and GPU). * We are potentially losing information by returning only one type */ - ur_device_type_t ur_device_type = UR_DEVICE_TYPE_DEFAULT; - if (cl_type & CL_DEVICE_TYPE_CPU) { - ur_device_type = UR_DEVICE_TYPE_CPU; - } else if (cl_type & CL_DEVICE_TYPE_GPU) { - ur_device_type = UR_DEVICE_TYPE_GPU; - } else if (cl_type & CL_DEVICE_TYPE_ACCELERATOR) { - ur_device_type = UR_DEVICE_TYPE_FPGA; + ur_device_type_t URDeviceType = UR_DEVICE_TYPE_DEFAULT; + if (CLType & CL_DEVICE_TYPE_CPU) { + URDeviceType = UR_DEVICE_TYPE_CPU; + } else if (CLType & CL_DEVICE_TYPE_GPU) { + URDeviceType = UR_DEVICE_TYPE_GPU; + } else if (CLType & CL_DEVICE_TYPE_ACCELERATOR) { + URDeviceType = UR_DEVICE_TYPE_FPGA; } - return ReturnValue(ur_device_type); + return ReturnValue(URDeviceType); } case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: { - OCLV::OpenCLVersion version; + oclv::OpenCLVersion Version; CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), version)); + cl_adapter::cast(hDevice), Version)); - const std::string results = std::to_string(version.getMajor()) + "." + - std::to_string(version.getMinor()); - return ReturnValue(results.c_str(), results.size() + 1); + const std::string Results = std::to_string(Version.getMajor()) + "." + + std::to_string(Version.getMinor()); + return ReturnValue(Results.c_str(), Results.size() + 1); } case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { - size_t cl_size; + size_t CLSize; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, 0, - nullptr, &cl_size)); - const size_t n_properties = cl_size / sizeof(cl_device_partition_property); + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, 0, + nullptr, &CLSize)); + const size_t NProperties = CLSize / sizeof(cl_device_partition_property); - std::vector cl_value(n_properties); + std::vector CLValue(NProperties); CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, - cl_size, cl_value.data(), nullptr)); + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + CLSize, CLValue.data(), nullptr)); /* The OpenCL implementation returns a value of 0 if no properties are * supported. UR will return a size of 0 for now. */ - if (pPropSizeRet && cl_value[0] == 0) { + if (pPropSizeRet && CLValue[0] == 0) { *pPropSizeRet = 0; return UR_RESULT_SUCCESS; } - std::vector ur_value{}; - for (size_t i = 0; i < n_properties; ++i) { - if (cl_value[i] != CL_DEVICE_PARTITION_BY_NAMES_INTEL && - cl_value[i] != 0) { - ur_value.push_back(static_cast(cl_value[i])); + std::vector URValue{}; + for (size_t i = 0; i < NProperties; ++i) { + if (CLValue[i] != CL_DEVICE_PARTITION_BY_NAMES_INTEL && CLValue[i] != 0) { + URValue.push_back(static_cast(CLValue[i])); } } - return ReturnValue(ur_value.data(), ur_value.size()); + return ReturnValue(URValue.data(), URValue.size()); } case UR_DEVICE_INFO_PARTITION_TYPE: { - size_t cl_size; + size_t CLSize; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, 0, - nullptr, &cl_size)); - const size_t n_properties = cl_size / sizeof(cl_device_partition_property); + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, 0, + nullptr, &CLSize)); + const size_t NProperties = CLSize / sizeof(cl_device_partition_property); /* The OpenCL implementation returns either a size of 0 or a value of 0 if * the device is not a sub-device. UR will return a size of 0 for now. * TODO Ideally, this could become an error once PI is removed from SYCL RT */ - if (pPropSizeRet && (cl_size == 0 || n_properties == 1)) { + if (pPropSizeRet && (CLSize == 0 || NProperties == 1)) { *pPropSizeRet = 0; return UR_RESULT_SUCCESS; } - auto cl_value = - reinterpret_cast(alloca(cl_size)); + auto CLValue = + reinterpret_cast(alloca(CLSize)); CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, - cl_size, cl_value, nullptr)); + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + CLSize, CLValue, nullptr)); - std::vector ur_value(n_properties - 1); + std::vector URValue(NProperties - 1); /* OpenCL will always return exactly one partition type followed by one or * more values. */ - for (uint32_t i = 0; i < ur_value.size(); ++i) { - ur_value[i].type = static_cast(cl_value[0]); - switch (ur_value[i].type) { + for (uint32_t i = 0; i < URValue.size(); ++i) { + URValue[i].type = static_cast(CLValue[0]); + switch (URValue[i].type) { case UR_DEVICE_PARTITION_EQUALLY: { - ur_value[i].value.equally = cl_value[i + 1]; + URValue[i].value.equally = CLValue[i + 1]; break; } case UR_DEVICE_PARTITION_BY_COUNTS: { - ur_value[i].value.count = cl_value[i + 1]; + URValue[i].value.count = CLValue[i + 1]; break; } case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: { - ur_value[i].value.affinity_domain = cl_value[i + 1]; + URValue[i].value.affinity_domain = CLValue[i + 1]; break; } default: { @@ -519,23 +430,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } } - return ReturnValue(ur_value.data(), ur_value.size()); + return ReturnValue(URValue.data(), URValue.size()); } case UR_DEVICE_INFO_MAX_WORK_GROUPS_3D: { /* Returns the maximum sizes of a work group for each dimension one could * use to submit a kernel. There is no such query defined in OpenCL. So * we'll return the maximum value. */ - static constexpr uint32_t max_work_item_dimensions = 3u; + static constexpr uint32_t MaxWorkItemDimensions = 3u; static constexpr size_t Max = (std::numeric_limits::max)(); struct { - size_t sizes[max_work_item_dimensions]; - } return_sizes; + size_t sizes[MaxWorkItemDimensions]; + } ReturnSizes; - return_sizes.sizes[0] = Max; - return_sizes.sizes[1] = Max; - return_sizes.sizes[2] = Max; - return ReturnValue(return_sizes); + ReturnSizes.sizes[0] = Max; + ReturnSizes.sizes[1] = Max; + ReturnSizes.sizes[2] = Max; + return ReturnValue(ReturnSizes); } case UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: { return ReturnValue(static_cast(1u)); @@ -543,22 +454,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: { /* Corresponding OpenCL query is only available starting with OpenCL 2.1 * and we have to emulate it on older OpenCL runtimes. */ - OCLV::OpenCLVersion devVer; + oclv::OpenCLVersion DevVer; CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), devVer)); + cl_adapter::cast(hDevice), DevVer)); - if (devVer >= OCLV::V2_1) { - cl_uint cl_value; + if (DevVer >= oclv::V2_1) { + cl_uint CLValue; CL_RETURN_ON_FAILURE(clGetDeviceInfo( cl_adapter::cast(hDevice), CL_DEVICE_MAX_NUM_SUB_GROUPS, - sizeof(cl_uint), &cl_value, nullptr)); + sizeof(cl_uint), &CLValue, nullptr)); - if (cl_value == 0u) { + if (CLValue == 0u) { /* OpenCL returns 0 if sub-groups are not supported, but SYCL 2020 * spec says that minimum possible value is 1. */ return ReturnValue(1u); } else { - return ReturnValue(static_cast(cl_value)); + return ReturnValue(static_cast(CLValue)); } } else { /* Otherwise, we can't query anything, because even cl_khr_subgroups @@ -573,73 +484,73 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* CL type: cl_device_fp_config * UR type: ur_device_fp_capability_flags_t */ if (propName == UR_DEVICE_INFO_HALF_FP_CONFIG) { - bool supported; + bool Supported; CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), {"cl_khr_fp16"}, supported)); + cl_adapter::cast(hDevice), {"cl_khr_fp16"}, Supported)); - if (!supported) { + if (!Supported) { return UR_RESULT_ERROR_INVALID_ENUMERATION; } } - cl_device_fp_config cl_value; + cl_device_fp_config CLValue; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, - sizeof(cl_device_fp_config), &cl_value, nullptr)); + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + sizeof(cl_device_fp_config), &CLValue, nullptr)); - return ReturnValue(map_ur_cl_device_fp_config_to_ur(cl_value)); + return ReturnValue(mapCLDeviceFpConfigToUR(CLValue)); } case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { /* This query is missing before OpenCL 3.0. Check version and handle * appropriately */ - OCLV::OpenCLVersion devVer; + oclv::OpenCLVersion DevVer; CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), devVer)); + cl_adapter::cast(hDevice), DevVer)); /* Minimum required capability to be returned. For OpenCL 1.2, this is all * that is required */ - ur_memory_order_capability_flags_t ur_capabilities = + ur_memory_order_capability_flags_t URCapabilities = UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED; - if (devVer >= OCLV::V3_0) { + if (DevVer >= oclv::V3_0) { /* For OpenCL >=3.0, the query should be implemented */ - cl_device_atomic_capabilities cl_capabilities; + cl_device_atomic_capabilities CLCapabilities; CL_RETURN_ON_FAILURE(clGetDeviceInfo( cl_adapter::cast(hDevice), CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, - sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr)); + sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); /* Mask operation to only consider atomic_memory_order* capabilities */ - const cl_int mask = CL_DEVICE_ATOMIC_ORDER_RELAXED | + const cl_int Mask = CL_DEVICE_ATOMIC_ORDER_RELAXED | CL_DEVICE_ATOMIC_ORDER_ACQ_REL | CL_DEVICE_ATOMIC_ORDER_SEQ_CST; - cl_capabilities &= mask; + CLCapabilities &= Mask; /* The memory order capabilities are hierarchical, if one is implied, all * preceding capabilities are implied as well. Especially in the case of * ACQ_REL. */ - if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { - ur_capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; + if (CLCapabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { + URCapabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; } - if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) { - ur_capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL | - UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | - UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE; + if (CLCapabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) { + URCapabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE; } - } else if (devVer >= OCLV::V2_0) { + } else if (DevVer >= oclv::V2_0) { /* For OpenCL 2.x, return all capabilities. * (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_memory_consistency_model) */ - ur_capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | - UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | - UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL | - UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; + URCapabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL | + UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; } /* cl_device_atomic_capabilities is uint64_t and * ur_memory_order_capability_flags_t is uint32_t */ return ReturnValue( - static_cast(ur_capabilities)); + static_cast(URCapabilities)); } case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { /* Initialize result to minimum mandated capabilities according to @@ -647,23 +558,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * all narrower scopes. At a minimum, each device must support WORK_ITEM, * SUB_GROUP and WORK_GROUP. * (https://github.com/KhronosGroup/SYCL-Docs/pull/382) */ - ur_memory_scope_capability_flags_t ur_capabilities = + ur_memory_scope_capability_flags_t URCapabilities = UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; - OCLV::OpenCLVersion devVer; + oclv::OpenCLVersion DevVer; CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), devVer)); + cl_adapter::cast(hDevice), DevVer)); - cl_device_atomic_capabilities cl_capabilities; - if (devVer >= OCLV::V3_0) { + cl_device_atomic_capabilities CLCapabilities; + if (DevVer >= oclv::V3_0) { CL_RETURN_ON_FAILURE(clGetDeviceInfo( cl_adapter::cast(hDevice), CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, - sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr)); + sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); - assert((cl_capabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && + assert((CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && "Violates minimum mandated guarantee"); /* Because scopes are hierarchical, wider scopes support all narrower @@ -671,77 +582,77 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382). We * already initialized to these minimum mandated capabilities. Just check * wider scopes. */ - if (cl_capabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { - ur_capabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE; + if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE; } - if (cl_capabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { - ur_capabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; } } else { /* This info is only available in OpenCL version >= 3.0. Just return * minimum mandated capabilities for older versions. OpenCL 1.x minimum * mandated capabilities are WORK_GROUP, we already initialized using it. */ - if (devVer >= OCLV::V2_0) { + if (DevVer >= oclv::V2_0) { /* OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE | * ALL_DEVICES */ - ur_capabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | - UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; } } /* cl_device_atomic_capabilities is uint64_t and * ur_memory_scope_capability_flags_t is uint32_t */ return ReturnValue( - static_cast(ur_capabilities)); + static_cast(URCapabilities)); } case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { /* Initialize result to minimum mandated capabilities according to * SYCL2020 4.6.3.2 */ - ur_memory_order_capability_flags_t ur_capabilities = + ur_memory_order_capability_flags_t URCapabilities = UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED | UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL; - OCLV::OpenCLVersion devVer; + oclv::OpenCLVersion DevVer; CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), devVer)); + cl_adapter::cast(hDevice), DevVer)); - cl_device_atomic_capabilities cl_capabilities; - if (devVer >= OCLV::V3_0) { + cl_device_atomic_capabilities CLCapabilities; + if (DevVer >= oclv::V3_0) { CL_RETURN_ON_FAILURE(clGetDeviceInfo( cl_adapter::cast(hDevice), CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, - sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr)); + sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); - assert((cl_capabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED) && + assert((CLCapabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED) && "Violates minimum mandated guarantee"); - assert((cl_capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) && + assert((CLCapabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) && "Violates minimum mandated guarantee"); /* We already initialized to minimum mandated capabilities. Just check * stronger orders. */ - if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { - ur_capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; + if (CLCapabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { + URCapabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; } } else { /* This info is only available in OpenCL version >= 3.0. Just return * minimum mandated capabilities for older versions. OpenCL 1.x minimum * mandated capabilities are RELAXED | ACQ_REL, we already initialized * using these. */ - if (devVer >= OCLV::V2_0) { + if (DevVer >= oclv::V2_0) { /* OpenCL 2.x minimum mandated capabilities are RELAXED | ACQ_REL | * SEQ_CST */ - ur_capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; + URCapabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; } } /* cl_device_atomic_capabilities is uint64_t and * ur_memory_order_capability_flags_t is uint32_t */ return ReturnValue( - static_cast(ur_capabilities)); + static_cast(URCapabilities)); } case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { /* Initialize result to minimum mandated capabilities according to @@ -749,23 +660,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * all narrower scopes. At a minimum, each device must support WORK_ITEM, * SUB_GROUP and WORK_GROUP. * (https://github.com/KhronosGroup/SYCL-Docs/pull/382) */ - ur_memory_scope_capability_flags_t ur_capabilities = + ur_memory_scope_capability_flags_t URCapabilities = UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; - OCLV::OpenCLVersion devVer; + oclv::OpenCLVersion DevVer; CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), devVer)); + cl_adapter::cast(hDevice), DevVer)); - cl_device_atomic_capabilities cl_capabilities; - if (devVer >= OCLV::V3_0) { + cl_device_atomic_capabilities CLCapabilities; + if (DevVer >= oclv::V3_0) { CL_RETURN_ON_FAILURE(clGetDeviceInfo( cl_adapter::cast(hDevice), CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, - sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr)); + sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); - assert((cl_capabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && + assert((CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && "Violates minimum mandated guarantee"); /* Because scopes are hierarchical, wider scopes support all narrower @@ -773,30 +684,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382). We * already initialized to these minimum mandated capabilities. Just check * wider scopes. */ - if (cl_capabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { - ur_capabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE; + if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE; } - if (cl_capabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { - ur_capabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; } } else { /* This info is only available in OpenCL version >= 3.0. Just return * minimum mandated capabilities for older versions. OpenCL 1.x minimum * mandated capabilities are WORK_GROUP, we already initialized using it. */ - if (devVer >= OCLV::V2_0) { + if (DevVer >= oclv::V2_0) { /* OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE | * ALL_DEVICES */ - ur_capabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | - UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; } } /* cl_device_atomic_capabilities is uint64_t and * ur_memory_scope_capability_flags_t is uint32_t */ return ReturnValue( - static_cast(ur_capabilities)); + static_cast(URCapabilities)); } case UR_DEVICE_INFO_IMAGE_SRGB: { @@ -807,30 +718,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(false); } case UR_DEVICE_INFO_ATOMIC_64: { - bool supported = false; + bool Supported = false; CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( cl_adapter::cast(hDevice), {"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics"}, - supported)); + Supported)); - return ReturnValue(supported); + return ReturnValue(Supported); } case UR_DEVICE_INFO_BUILD_ON_SUBDEVICE: { - cl_device_type devType = CL_DEVICE_TYPE_DEFAULT; + cl_device_type DevType = CL_DEVICE_TYPE_DEFAULT; CL_RETURN_ON_FAILURE( clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, - sizeof(cl_device_type), &devType, nullptr)); + sizeof(cl_device_type), &DevType, nullptr)); - return ReturnValue(devType == CL_DEVICE_TYPE_GPU); + return ReturnValue(DevType == CL_DEVICE_TYPE_GPU); } case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT: { - bool supported = false; + bool Supported = false; CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( cl_adapter::cast(hDevice), - {"cl_intel_mem_channel_property"}, supported)); + {"cl_intel_mem_channel_property"}, Supported)); - return ReturnValue(supported); + return ReturnValue(Supported); } case UR_DEVICE_INFO_QUEUE_PROPERTIES: case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: @@ -847,15 +758,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* CL type: cl_bitfield / enum * UR type: ur_flags_t (uint32_t) */ - cl_bitfield cl_value; + cl_bitfield CLValue; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, - sizeof(cl_bitfield), &cl_value, nullptr)); + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + sizeof(cl_bitfield), &CLValue, nullptr)); /* We can just static_cast the output because OpenCL and UR bitfields * map 1 to 1 for these properties. cl_bitfield is uint64_t and ur_flags_t * types are uint32_t */ - return ReturnValue(static_cast(cl_value)); + return ReturnValue(static_cast(CLValue)); } case UR_DEVICE_INFO_IMAGE_SUPPORTED: case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: @@ -870,13 +781,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* CL type: cl_bool * UR type: ur_bool_t */ - cl_bool cl_value; + cl_bool CLValue; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, - sizeof(cl_bool), &cl_value, nullptr)); + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + sizeof(cl_bool), &CLValue, nullptr)); /* cl_bool is uint32_t and ur_bool_t is bool */ - return ReturnValue(static_cast(cl_value)); + return ReturnValue(static_cast(CLValue)); } case UR_DEVICE_INFO_VENDOR_ID: case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: @@ -947,7 +858,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, */ CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), cl_propName, + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; @@ -991,29 +902,29 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(pProperties, UR_RESULT_ERROR_INVALID_NULL_POINTER); - std::vector cl_properties( + std::vector CLProperties( pProperties->PropCount + 2); /* The type must be the same for all properties since OpenCL doesn't support * property lists with multiple types */ - cl_properties[0] = + CLProperties[0] = static_cast(pProperties->pProperties->type); for (uint32_t i = 0; i < pProperties->PropCount; ++i) { - cl_device_partition_property cl_property; + cl_device_partition_property CLProperty; switch (pProperties->pProperties->type) { case UR_DEVICE_PARTITION_EQUALLY: { - cl_property = static_cast( + CLProperty = static_cast( pProperties->pProperties->value.equally); break; } case UR_DEVICE_PARTITION_BY_COUNTS: { - cl_property = static_cast( + CLProperty = static_cast( pProperties->pProperties->value.count); break; } case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: { - cl_property = static_cast( + CLProperty = static_cast( pProperties->pProperties->value.affinity_domain); break; } @@ -1021,30 +932,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( return UR_RESULT_ERROR_INVALID_ENUMERATION; } } - cl_properties[i + 1] = cl_property; + CLProperties[i + 1] = CLProperty; } /* Terminate the list with 0 */ - cl_properties[cl_properties.size() - 1] = 0; + CLProperties[CLProperties.size() - 1] = 0; - cl_uint cl_num_devices_ret; - CL_RETURN_ON_FAILURE(clCreateSubDevices( - cl_adapter::cast(hDevice), cl_properties.data(), 0, nullptr, - &cl_num_devices_ret)); + cl_uint CLNumDevicesRet; + CL_RETURN_ON_FAILURE( + clCreateSubDevices(cl_adapter::cast(hDevice), + CLProperties.data(), 0, nullptr, &CLNumDevicesRet)); if (pNumDevicesRet) { - *pNumDevicesRet = cl_num_devices_ret; + *pNumDevicesRet = CLNumDevicesRet; } /*If NumDevices is less than the number of sub-devices available, then the * function shall only retrieve that number of sub-devices. */ if (phSubDevices) { - std::vector cl_sub_devices(cl_num_devices_ret); + std::vector CLSubDevices(CLNumDevicesRet); CL_RETURN_ON_FAILURE(clCreateSubDevices( - cl_adapter::cast(hDevice), cl_properties.data(), - cl_num_devices_ret, cl_sub_devices.data(), nullptr)); + cl_adapter::cast(hDevice), CLProperties.data(), + CLNumDevicesRet, CLSubDevices.data(), nullptr)); - std::memcpy(phSubDevices, cl_sub_devices.data(), + std::memcpy(phSubDevices, CLSubDevices.data(), sizeof(cl_device_id) * NumDevices); } @@ -1055,9 +966,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceRetain(ur_device_handle_t hDevice) { UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_int result = clRetainDevice(cl_adapter::cast(hDevice)); + cl_int Result = clRetainDevice(cl_adapter::cast(hDevice)); - return map_cl_error_to_ur(result); + return mapCLErrorToUR(Result); } UR_APIEXPORT ur_result_t UR_APICALL @@ -1065,9 +976,9 @@ urDeviceRelease(ur_device_handle_t hDevice) { UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_int result = clReleaseDevice(cl_adapter::cast(hDevice)); + cl_int Result = clReleaseDevice(cl_adapter::cast(hDevice)); - return map_cl_error_to_ur(result); + return mapCLErrorToUR(Result); } UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( @@ -1093,32 +1004,32 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( ur_device_handle_t hDevice, uint64_t *pDeviceTimestamp, uint64_t *pHostTimestamp) { - OCLV::OpenCLVersion devVer, platVer; - cl_platform_id platform; - cl_device_id deviceID = cl_adapter::cast(hDevice); + oclv::OpenCLVersion DevVer, PlatVer; + cl_platform_id Platform; + cl_device_id DeviceId = cl_adapter::cast(hDevice); // TODO: Cache OpenCL version for each device and platform - auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &platform, nullptr); - CL_RETURN_ON_FAILURE(ret_err); + auto RetErr = clGetDeviceInfo(DeviceId, CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &Platform, nullptr); + CL_RETURN_ON_FAILURE(RetErr); - ret_err = cl_adapter::getDeviceVersion(deviceID, devVer); - CL_RETURN_ON_FAILURE(ret_err); + RetErr = cl_adapter::getDeviceVersion(DeviceId, DevVer); + CL_RETURN_ON_FAILURE(RetErr); - ret_err = cl_adapter::getPlatformVersion(platform, platVer); + RetErr = cl_adapter::getPlatformVersion(Platform, PlatVer); - if (platVer < OCLV::V2_1 || devVer < OCLV::V2_1) { + if (PlatVer < oclv::V2_1 || DevVer < oclv::V2_1) { return UR_RESULT_ERROR_INVALID_OPERATION; } if (pDeviceTimestamp) { - uint64_t dummy; - clGetDeviceAndHostTimer(deviceID, pDeviceTimestamp, - pHostTimestamp == nullptr ? &dummy + uint64_t Dummy; + clGetDeviceAndHostTimer(DeviceId, pDeviceTimestamp, + pHostTimestamp == nullptr ? &Dummy : pHostTimestamp); } else if (pHostTimestamp) { - clGetHostTimer(deviceID, pHostTimestamp); + clGetHostTimer(DeviceId, pHostTimestamp); } return UR_RESULT_SUCCESS; @@ -1141,19 +1052,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary( // plugin for platform/device the ctx was created for. // Choose the binary target for the provided device - const char *image_target = nullptr; + const char *ImageTarget = nullptr; // Get the type of the device - cl_device_type device_type; - constexpr uint32_t invalid_ind = std::numeric_limits::max(); - cl_int ret_err = + cl_device_type DeviceType; + constexpr uint32_t InvalidInd = std::numeric_limits::max(); + cl_int RetErr = clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, - sizeof(cl_device_type), &device_type, nullptr); - if (ret_err != CL_SUCCESS) { - *pSelectedBinary = invalid_ind; - CL_RETURN_ON_FAILURE(ret_err); + sizeof(cl_device_type), &DeviceType, nullptr); + if (RetErr != CL_SUCCESS) { + *pSelectedBinary = InvalidInd; + CL_RETURN_ON_FAILURE(RetErr); } - switch (device_type) { + switch (DeviceType) { // TODO: Factor out vendor specifics into a separate source // E.g. sycl/source/detail/vendor/intel/detail/pi_opencl.cpp? @@ -1161,34 +1072,34 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary( // from a SPIR-V image into an image specific for: case CL_DEVICE_TYPE_CPU: // OpenCL 64-bit CPU - image_target = UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64; + ImageTarget = UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64; break; case CL_DEVICE_TYPE_GPU: // OpenCL 64-bit GEN GPU - image_target = UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; + ImageTarget = UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; break; case CL_DEVICE_TYPE_ACCELERATOR: // OpenCL 64-bit FPGA - image_target = UR_DEVICE_BINARY_TARGET_SPIRV64_FPGA; + ImageTarget = UR_DEVICE_BINARY_TARGET_SPIRV64_FPGA; break; default: // Otherwise, we'll attempt to find and JIT-compile // a device-independent SPIR-V image - image_target = UR_DEVICE_BINARY_TARGET_SPIRV64; + ImageTarget = UR_DEVICE_BINARY_TARGET_SPIRV64; break; } // Find the appropriate device image, fallback to spirv if not found - uint32_t fallback = invalid_ind; + uint32_t Fallback = InvalidInd; for (uint32_t i = 0; i < NumBinaries; ++i) { - if (strcmp(pBinaries[i].pDeviceTargetSpec, image_target) == 0) { + if (strcmp(pBinaries[i].pDeviceTargetSpec, ImageTarget) == 0) { *pSelectedBinary = i; return UR_RESULT_SUCCESS; } if (strcmp(pBinaries[i].pDeviceTargetSpec, UR_DEVICE_BINARY_TARGET_SPIRV64) == 0) - fallback = i; + Fallback = i; } // Points to a spirv image, if such indeed was found - if ((*pSelectedBinary = fallback) != invalid_ind) + if ((*pSelectedBinary = Fallback) != InvalidInd) return UR_RESULT_SUCCESS; // No image can be loaded for the given device return UR_RESULT_ERROR_INVALID_BINARY; diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp index 14890f8ce8090..98ff0426a32da 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp @@ -13,9 +13,9 @@ #include namespace cl_adapter { -ur_result_t getDeviceVersion(cl_device_id dev, OCLV::OpenCLVersion &version); +ur_result_t getDeviceVersion(cl_device_id Dev, oclv::OpenCLVersion &Version); -ur_result_t checkDeviceExtensions(cl_device_id dev, - const std::vector &exts, - bool &supported); +ur_result_t checkDeviceExtensions(cl_device_id Dev, + const std::vector &Exts, + bool &Supported); } // namespace cl_adapter diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/enqueue.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/enqueue.cpp index 437bd5dc8418d..9d6cd14d6cee9 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/enqueue.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/enqueue.cpp @@ -8,19 +8,19 @@ #include "common.hpp" -cl_map_flags convert_ur_map_flags_to_cl(ur_map_flags_t ur_flags) { - cl_map_flags cl_flags = 0; - if (ur_flags & UR_MAP_FLAG_READ) { - cl_flags |= CL_MAP_READ; +cl_map_flags convertURMapFlagsToCL(ur_map_flags_t URFlags) { + cl_map_flags CLFlags = 0; + if (URFlags & UR_MAP_FLAG_READ) { + CLFlags |= CL_MAP_READ; } - if (ur_flags & UR_MAP_FLAG_WRITE) { - cl_flags |= CL_MAP_WRITE; + if (URFlags & UR_MAP_FLAG_WRITE) { + CLFlags |= CL_MAP_WRITE; } - if (ur_flags & UR_MAP_FLAG_WRITE_INVALIDATE_REGION) { - cl_flags |= CL_MAP_WRITE_INVALIDATE_REGION; + if (URFlags & UR_MAP_FLAG_WRITE_INVALIDATE_REGION) { + CLFlags |= CL_MAP_WRITE_INVALIDATE_REGION; } - return cl_flags; + return CLFlags; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( @@ -339,15 +339,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - cl_int err; + cl_int Err; *ppRetMap = clEnqueueMapBuffer( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingMap, - convert_ur_map_flags_to_cl(mapFlags), offset, size, numEventsInWaitList, + convertURMapFlagsToCL(mapFlags), offset, size, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent), &err); + cl_adapter::cast(phEvent), &Err); - CL_RETURN_ON_FAILURE(err); + CL_RETURN_ON_FAILURE(Err); return UR_RESULT_SUCCESS; } @@ -391,12 +391,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( CL_QUEUE_CONTEXT, sizeof(Ctx), &Ctx, nullptr); if (Res != CL_SUCCESS) - return map_cl_error_to_ur(Res); + return mapCLErrorToUR(Res); cl_ext::clEnqueueWriteGlobalVariable_fn F = nullptr; Res = cl_ext::getExtFuncFromContext( Ctx, cl_ext::ExtFuncPtrCache->clEnqueueWriteGlobalVariableCache, - cl_ext::clEnqueueWriteGlobalVariableName, &F); + cl_ext::EnqueueWriteGlobalVariableName, &F); if (!F || Res != CL_SUCCESS) return UR_RESULT_ERROR_INVALID_OPERATION; @@ -407,7 +407,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent)); - return map_cl_error_to_ur(Res); + return mapCLErrorToUR(Res); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( @@ -429,12 +429,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( CL_QUEUE_CONTEXT, sizeof(Ctx), &Ctx, nullptr); if (Res != CL_SUCCESS) - return map_cl_error_to_ur(Res); + return mapCLErrorToUR(Res); cl_ext::clEnqueueReadGlobalVariable_fn F = nullptr; Res = cl_ext::getExtFuncFromContext( Ctx, cl_ext::ExtFuncPtrCache->clEnqueueReadGlobalVariableCache, - cl_ext::clEnqueueReadGlobalVariableName, &F); + cl_ext::EnqueueReadGlobalVariableName, &F); if (!F || Res != CL_SUCCESS) return UR_RESULT_ERROR_INVALID_OPERATION; @@ -445,7 +445,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent)); - return map_cl_error_to_ur(Res); + return mapCLErrorToUR(Res); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( @@ -467,17 +467,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, sizeof(cl_context), &CLContext, nullptr); if (CLErr != CL_SUCCESS) { - return map_cl_error_to_ur(CLErr); + return mapCLErrorToUR(CLErr); } clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueReadHostPipeINTELCache, - cl_ext::clEnqueueReadHostPipeName, &FuncPtr); + cl_ext::EnqueueReadHostPipeName, &FuncPtr); if (FuncPtr) { - RetVal = map_cl_error_to_ur( + RetVal = mapCLErrorToUR( FuncPtr(cl_adapter::cast(hQueue), cl_adapter::cast(hProgram), pipe_symbol, blocking, pDst, size, numEventsInWaitList, @@ -507,17 +507,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, sizeof(cl_context), &CLContext, nullptr); if (CLErr != CL_SUCCESS) { - return map_cl_error_to_ur(CLErr); + return mapCLErrorToUR(CLErr); } clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueWriteHostPipeINTELCache, - cl_ext::clEnqueueWriteHostPipeName, &FuncPtr); + cl_ext::EnqueueWriteHostPipeName, &FuncPtr); if (FuncPtr) { - RetVal = map_cl_error_to_ur( + RetVal = mapCLErrorToUR( FuncPtr(cl_adapter::cast(hQueue), cl_adapter::cast(hProgram), pipe_symbol, blocking, pSrc, size, numEventsInWaitList, diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp index 406b5788e51c9..723c863ec8831 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp @@ -10,8 +10,8 @@ #include -cl_event_info convert_ur_event_info_to_cl(const ur_event_info_t propName) { - switch (propName) { +cl_event_info convertUREventInfoToCL(const ur_event_info_t PropName) { + switch (PropName) { case UR_EVENT_INFO_COMMAND_QUEUE: return CL_EVENT_COMMAND_QUEUE; break; @@ -34,45 +34,34 @@ cl_event_info convert_ur_event_info_to_cl(const ur_event_info_t propName) { } cl_profiling_info -convert_ur_profiling_info_to_cl(const ur_profiling_info_t propName) { - switch (propName) { +convertURProfilingInfoToCL(const ur_profiling_info_t PropName) { + switch (PropName) { case UR_PROFILING_INFO_COMMAND_QUEUED: return CL_PROFILING_COMMAND_QUEUED; - break; case UR_PROFILING_INFO_COMMAND_SUBMIT: return CL_PROFILING_COMMAND_SUBMIT; - break; case UR_PROFILING_INFO_COMMAND_START: return CL_PROFILING_COMMAND_START; - break; // TODO(ur) add UR_PROFILING_INFO_COMMAND_COMPLETE once spec has been updated case UR_PROFILING_INFO_COMMAND_END: return CL_PROFILING_COMMAND_END; - break; default: return -1; - break; } } -cl_int -convert_ur_profiling_info_to_cl(const ur_execution_info_t executionInfo) { - switch (executionInfo) { +cl_int convertURProfilingInfoToCL(const ur_execution_info_t ExecutionInfo) { + switch (ExecutionInfo) { case UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE: return CL_COMPLETE; - break; case UR_EXECUTION_INFO_EXECUTION_INFO_RUNNING: return CL_RUNNING; - break; case UR_EXECUTION_INFO_EXECUTION_INFO_SUBMITTED: return CL_SUBMITTED; - break; case UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED: return CL_QUEUED; - break; default: return -1; - break; } } @@ -89,29 +78,29 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( ur_event_handle_t hEvent, ur_native_handle_t *phNativeEvent) { - return urGetNativeHandle(hEvent, phNativeEvent); + return getNativeHandle(hEvent, phNativeEvent); } UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { UR_ASSERT(hEvent, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_int ret_err = clReleaseEvent(cl_adapter::cast(hEvent)); - CL_RETURN_ON_FAILURE(ret_err); + cl_int RetErr = clReleaseEvent(cl_adapter::cast(hEvent)); + CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { UR_ASSERT(hEvent, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_int ret_err = clRetainEvent(cl_adapter::cast(hEvent)); - CL_RETURN_ON_FAILURE(ret_err); + cl_int RetErr = clRetainEvent(cl_adapter::cast(hEvent)); + CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) { UR_ASSERT(phEventWaitList, UR_RESULT_ERROR_INVALID_NULL_POINTER); - cl_int ret_err = clWaitForEvents( + cl_int RetErr = clWaitForEvents( numEvents, cl_adapter::cast(phEventWaitList)); - CL_RETURN_ON_FAILURE(ret_err); + CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } @@ -121,11 +110,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, void *pPropValue, size_t *pPropSizeRet) { UR_ASSERT(hEvent, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_event_info clEventInfo = convert_ur_event_info_to_cl(propName); - cl_int ret_err = - clGetEventInfo(cl_adapter::cast(hEvent), clEventInfo, propSize, + cl_event_info CLEventInfo = convertUREventInfoToCL(propName); + cl_int RetErr = + clGetEventInfo(cl_adapter::cast(hEvent), CLEventInfo, propSize, pPropValue, pPropSizeRet); - CL_RETURN_ON_FAILURE(ret_err); + CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } @@ -133,11 +122,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( ur_event_handle_t hEvent, ur_profiling_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { UR_ASSERT(hEvent, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_profiling_info clProfilingInfo = convert_ur_profiling_info_to_cl(propName); - cl_int ret_err = clGetEventProfilingInfo(cl_adapter::cast(hEvent), - clProfilingInfo, propSize, - pPropValue, pPropSizeRet); - CL_RETURN_ON_FAILURE(ret_err); + cl_profiling_info CLProfilingInfo = convertURProfilingInfoToCL(propName); + cl_int RetErr = clGetEventProfilingInfo(cl_adapter::cast(hEvent), + CLProfilingInfo, propSize, pPropValue, + pPropSizeRet); + CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp index 112d0998ea1ed..bc68f1d68bb41 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp @@ -11,10 +11,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, ur_kernel_handle_t *phKernel) { - cl_int cl_result; + cl_int CLResult; *phKernel = cl_adapter::cast(clCreateKernel( - cl_adapter::cast(hProgram), pKernelName, &cl_result)); - CL_RETURN_ON_FAILURE(cl_result); + cl_adapter::cast(hProgram), pKernelName, &CLResult)); + CL_RETURN_ON_FAILURE(CLResult); return UR_RESULT_SUCCESS; } @@ -29,36 +29,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( return UR_RESULT_SUCCESS; } -static cl_int map_ur_kernel_info_to_cl(ur_kernel_info_t urPropName) { +static cl_int mapURKernelInfoToCL(ur_kernel_info_t URPropName) { - cl_int cl_propName; - switch (static_cast(urPropName)) { + switch (static_cast(URPropName)) { case UR_KERNEL_INFO_FUNCTION_NAME: - cl_propName = CL_KERNEL_FUNCTION_NAME; - break; + return CL_KERNEL_FUNCTION_NAME; case UR_KERNEL_INFO_NUM_ARGS: - cl_propName = CL_KERNEL_NUM_ARGS; - break; + return CL_KERNEL_NUM_ARGS; case UR_KERNEL_INFO_REFERENCE_COUNT: - cl_propName = CL_KERNEL_REFERENCE_COUNT; - break; + return CL_KERNEL_REFERENCE_COUNT; case UR_KERNEL_INFO_CONTEXT: - cl_propName = CL_KERNEL_CONTEXT; - break; + return CL_KERNEL_CONTEXT; case UR_KERNEL_INFO_PROGRAM: - cl_propName = CL_KERNEL_PROGRAM; - break; + return CL_KERNEL_PROGRAM; case UR_KERNEL_INFO_ATTRIBUTES: - cl_propName = CL_KERNEL_ATTRIBUTES; - break; + return CL_KERNEL_ATTRIBUTES; case UR_KERNEL_INFO_NUM_REGS: - cl_propName = CL_KERNEL_NUM_ARGS; - break; + return CL_KERNEL_NUM_ARGS; default: - cl_propName = -1; + return -1; } - - return cl_propName; } UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, @@ -70,40 +60,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), - map_ur_kernel_info_to_cl(propName), - propSize, pPropValue, pPropSizeRet)); + mapURKernelInfoToCL(propName), propSize, + pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } -static cl_int -map_ur_kernel_group_info_to_cl(ur_kernel_group_info_t urPropName) { +static cl_int mapURKernelGroupInfoToCL(ur_kernel_group_info_t URPropName) { - cl_int cl_propName; - switch (static_cast(urPropName)) { + switch (static_cast(URPropName)) { case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: - cl_propName = CL_KERNEL_GLOBAL_WORK_SIZE; - break; + return CL_KERNEL_GLOBAL_WORK_SIZE; case UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: - cl_propName = CL_KERNEL_WORK_GROUP_SIZE; - break; + return CL_KERNEL_WORK_GROUP_SIZE; case UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: - cl_propName = CL_KERNEL_COMPILE_WORK_GROUP_SIZE; - break; + return CL_KERNEL_COMPILE_WORK_GROUP_SIZE; case UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: - cl_propName = CL_KERNEL_LOCAL_MEM_SIZE; - break; + return CL_KERNEL_LOCAL_MEM_SIZE; case UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: - cl_propName = CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE; - break; + return CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE; case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: - cl_propName = CL_KERNEL_PRIVATE_MEM_SIZE; - break; + return CL_KERNEL_PRIVATE_MEM_SIZE; default: - cl_propName = -1; + return -1; } - - return cl_propName; } UR_APIEXPORT ur_result_t UR_APICALL @@ -114,37 +94,29 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - CL_RETURN_ON_FAILURE( - clGetKernelWorkGroupInfo(cl_adapter::cast(hKernel), - cl_adapter::cast(hDevice), - map_ur_kernel_group_info_to_cl(propName), - propSize, pPropValue, pPropSizeRet)); + CL_RETURN_ON_FAILURE(clGetKernelWorkGroupInfo( + cl_adapter::cast(hKernel), + cl_adapter::cast(hDevice), + mapURKernelGroupInfoToCL(propName), propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } static cl_int -map_ur_kernel_sub_group_info_to_cl(ur_kernel_sub_group_info_t urPropName) { +mapURKernelSubGroupInfoToCL(ur_kernel_sub_group_info_t URPropName) { - cl_int cl_propName; - switch (static_cast(urPropName)) { + switch (static_cast(URPropName)) { case UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE: - cl_propName = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE; - break; + return CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE; case UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS: - cl_propName = CL_KERNEL_MAX_NUM_SUB_GROUPS; - break; + return CL_KERNEL_MAX_NUM_SUB_GROUPS; case UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS: - cl_propName = CL_KERNEL_COMPILE_NUM_SUB_GROUPS; - break; + return CL_KERNEL_COMPILE_NUM_SUB_GROUPS; case UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL: - cl_propName = CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL; - break; + return CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL; default: - cl_propName = -1; + return -1; } - - return cl_propName; } UR_APIEXPORT ur_result_t UR_APICALL @@ -164,27 +136,27 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, // value is given we use the max work item size of the device in the first // dimention to avoid truncation of max sub-group size. uint32_t MaxDims = 0; - ur_result_t UrRet = + ur_result_t URRet = urDeviceGetInfo(hDevice, UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS, sizeof(uint32_t), &MaxDims, nullptr); - if (UrRet != UR_RESULT_SUCCESS) - return UrRet; - std::shared_ptr WGSizes{new size_t[MaxDims]}; - UrRet = urDeviceGetInfo(hDevice, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES, - MaxDims * sizeof(size_t), WGSizes.get(), nullptr); - if (UrRet != UR_RESULT_SUCCESS) - return UrRet; + if (URRet != UR_RESULT_SUCCESS) + return URRet; + std::shared_ptr WgSizes{new size_t[MaxDims]}; + URRet = urDeviceGetInfo(hDevice, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES, + MaxDims * sizeof(size_t), WgSizes.get(), nullptr); + if (URRet != UR_RESULT_SUCCESS) + return URRet; for (size_t i = 1; i < MaxDims; ++i) - WGSizes.get()[i] = 1; - InputValue = std::move(WGSizes); + WgSizes.get()[i] = 1; + InputValue = std::move(WgSizes); InputValueSize = MaxDims * sizeof(size_t); } - cl_int Ret = clGetKernelSubGroupInfo( - cl_adapter::cast(hKernel), - cl_adapter::cast(hDevice), - map_ur_kernel_sub_group_info_to_cl(propName), InputValueSize, - InputValue.get(), sizeof(size_t), &RetVal, pPropSizeRet); + cl_int Ret = clGetKernelSubGroupInfo(cl_adapter::cast(hKernel), + cl_adapter::cast(hDevice), + mapURKernelSubGroupInfoToCL(propName), + InputValueSize, InputValue.get(), + sizeof(size_t), &RetVal, pPropSizeRet); if (Ret == CL_INVALID_OPERATION) { // clGetKernelSubGroupInfo returns CL_INVALID_OPERATION if the device does @@ -197,24 +169,24 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, Ret = CL_SUCCESS; } else if (propName == UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE) { // Return the maximum sub group size for the device - size_t result_size = 0; + size_t ResultSize = 0; // Two calls to urDeviceGetInfo are needed: the first determines the size // required to store the result, and the second returns the actual size // values. - ur_result_t UrRet = + ur_result_t URRet = urDeviceGetInfo(hDevice, UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, 0, - nullptr, &result_size); - if (UrRet != UR_RESULT_SUCCESS) { - return UrRet; + nullptr, &ResultSize); + if (URRet != UR_RESULT_SUCCESS) { + return URRet; } - assert(result_size % sizeof(size_t) == 0); - std::vector result(result_size / sizeof(size_t)); - UrRet = urDeviceGetInfo(hDevice, UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, - result_size, result.data(), nullptr); - if (UrRet != UR_RESULT_SUCCESS) { - return UrRet; + assert(ResultSize % sizeof(size_t) == 0); + std::vector Result(ResultSize / sizeof(size_t)); + URRet = urDeviceGetInfo(hDevice, UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, + ResultSize, Result.data(), nullptr); + if (URRet != UR_RESULT_SUCCESS) { + return URRet; } - RetVal = *std::max_element(result.begin(), result.end()); + RetVal = *std::max_element(Result.begin(), Result.end()); Ret = CL_SUCCESS; } else if (propName == UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL) { RetVal = 0; // Not specified by kernel @@ -250,7 +222,7 @@ urKernelRelease(ur_kernel_handle_t hKernel) { * Enables indirect access of pointers in kernels. Necessary to avoid telling CL * about every pointer that might be used. */ -static ur_result_t USMSetIndirectAccess(ur_kernel_handle_t hKernel) { +static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { cl_bool TrueVal = CL_TRUE; clHostMemAllocINTEL_fn HFunc = nullptr; @@ -266,7 +238,7 @@ static ur_result_t USMSetIndirectAccess(ur_kernel_handle_t hKernel) { UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, - cl_ext::clHostMemAllocName, &HFunc)); + cl_ext::HostMemAllocName, &HFunc)); if (HFunc) { CL_RETURN_ON_FAILURE( @@ -277,7 +249,7 @@ static ur_result_t USMSetIndirectAccess(ur_kernel_handle_t hKernel) { UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, - cl_ext::clDeviceMemAllocName, &DFunc)); + cl_ext::DeviceMemAllocName, &DFunc)); if (DFunc) { CL_RETURN_ON_FAILURE( @@ -288,7 +260,7 @@ static ur_result_t USMSetIndirectAccess(ur_kernel_handle_t hKernel) { UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, - cl_ext::clSharedMemAllocName, &SFunc)); + cl_ext::SharedMemAllocName, &SFunc)); if (SFunc) { CL_RETURN_ON_FAILURE( @@ -306,7 +278,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( switch (propName) { case UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS: { if (*(static_cast(pPropValue)) == true) { - CL_RETURN_ON_FAILURE(USMSetIndirectAccess(hKernel)); + CL_RETURN_ON_FAILURE(usmSetIndirectAccess(hKernel)); } return UR_RESULT_SUCCESS; } @@ -339,7 +311,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clSetKernelArgMemPointerINTELCache, - cl_ext::clSetKernelArgMemPointerName, &FuncPtr)); + cl_ext::SetKernelArgMemPointerName, &FuncPtr)); if (FuncPtr) { /* OpenCL passes pointers by value not by reference. This means we need to @@ -379,10 +351,10 @@ urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, ur_mem_handle_t hArgValue) { UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_int ret_err = clSetKernelArg( + cl_int RetErr = clSetKernelArg( cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), sizeof(hArgValue), cl_adapter::cast(hArgValue)); - CL_RETURN_ON_FAILURE(ret_err); + CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } @@ -390,9 +362,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_sampler_properties_t *, ur_sampler_handle_t hArgValue) { - cl_int ret_err = clSetKernelArg( + cl_int RetErr = clSetKernelArg( cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), sizeof(hArgValue), cl_adapter::cast(&hArgValue)); - CL_RETURN_ON_FAILURE(ret_err); + CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp index 1531ff4ab0153..3a0392da14de9 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp @@ -10,229 +10,215 @@ #include -cl_image_format -map_ur_image_format_to_cl(const ur_image_format_t *pImageFormat) { - cl_image_format clImageFormat; - switch (pImageFormat->channelOrder) { +cl_image_format mapURImageFormatToCL(const ur_image_format_t *PImageFormat) { + cl_image_format CLImageFormat; + switch (PImageFormat->channelOrder) { case UR_IMAGE_CHANNEL_ORDER_A: - clImageFormat.image_channel_order = CL_A; + CLImageFormat.image_channel_order = CL_A; break; case UR_IMAGE_CHANNEL_ORDER_R: - clImageFormat.image_channel_order = CL_R; + CLImageFormat.image_channel_order = CL_R; break; case UR_IMAGE_CHANNEL_ORDER_RG: - clImageFormat.image_channel_order = CL_RG; + CLImageFormat.image_channel_order = CL_RG; break; case UR_IMAGE_CHANNEL_ORDER_RA: - clImageFormat.image_channel_order = CL_RA; + CLImageFormat.image_channel_order = CL_RA; break; case UR_IMAGE_CHANNEL_ORDER_RGB: - clImageFormat.image_channel_order = CL_RGB; + CLImageFormat.image_channel_order = CL_RGB; break; case UR_IMAGE_CHANNEL_ORDER_RGBA: - clImageFormat.image_channel_order = CL_RGBA; + CLImageFormat.image_channel_order = CL_RGBA; break; case UR_IMAGE_CHANNEL_ORDER_BGRA: - clImageFormat.image_channel_order = CL_BGRA; + CLImageFormat.image_channel_order = CL_BGRA; break; case UR_IMAGE_CHANNEL_ORDER_ARGB: - clImageFormat.image_channel_order = CL_ARGB; + CLImageFormat.image_channel_order = CL_ARGB; break; case UR_IMAGE_CHANNEL_ORDER_ABGR: - clImageFormat.image_channel_order = CL_ABGR; + CLImageFormat.image_channel_order = CL_ABGR; break; case UR_IMAGE_CHANNEL_ORDER_INTENSITY: - clImageFormat.image_channel_order = CL_INTENSITY; + CLImageFormat.image_channel_order = CL_INTENSITY; break; case UR_IMAGE_CHANNEL_ORDER_LUMINANCE: - clImageFormat.image_channel_order = CL_LUMINANCE; + CLImageFormat.image_channel_order = CL_LUMINANCE; break; case UR_IMAGE_CHANNEL_ORDER_RX: - clImageFormat.image_channel_order = CL_Rx; + CLImageFormat.image_channel_order = CL_Rx; break; case UR_IMAGE_CHANNEL_ORDER_RGX: - clImageFormat.image_channel_order = CL_RGx; + CLImageFormat.image_channel_order = CL_RGx; break; case UR_IMAGE_CHANNEL_ORDER_RGBX: - clImageFormat.image_channel_order = CL_RGBx; + CLImageFormat.image_channel_order = CL_RGBx; break; case UR_IMAGE_CHANNEL_ORDER_SRGBA: - clImageFormat.image_channel_order = CL_sRGBA; + CLImageFormat.image_channel_order = CL_sRGBA; break; default: - clImageFormat.image_channel_order = -1; + CLImageFormat.image_channel_order = -1; break; } - switch (pImageFormat->channelType) { + switch (PImageFormat->channelType) { case UR_IMAGE_CHANNEL_TYPE_SNORM_INT8: - clImageFormat.image_channel_data_type = CL_SNORM_INT8; + CLImageFormat.image_channel_data_type = CL_SNORM_INT8; break; case UR_IMAGE_CHANNEL_TYPE_SNORM_INT16: - clImageFormat.image_channel_data_type = CL_SNORM_INT16; + CLImageFormat.image_channel_data_type = CL_SNORM_INT16; break; case UR_IMAGE_CHANNEL_TYPE_UNORM_INT8: - clImageFormat.image_channel_data_type = CL_UNORM_INT8; + CLImageFormat.image_channel_data_type = CL_UNORM_INT8; break; case UR_IMAGE_CHANNEL_TYPE_UNORM_INT16: - clImageFormat.image_channel_data_type = CL_UNORM_INT16; + CLImageFormat.image_channel_data_type = CL_UNORM_INT16; break; case UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565: - clImageFormat.image_channel_data_type = CL_UNORM_SHORT_565; + CLImageFormat.image_channel_data_type = CL_UNORM_SHORT_565; break; case UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555: - clImageFormat.image_channel_data_type = CL_UNORM_SHORT_555; + CLImageFormat.image_channel_data_type = CL_UNORM_SHORT_555; break; case UR_IMAGE_CHANNEL_TYPE_INT_101010: - clImageFormat.image_channel_data_type = CL_UNORM_INT_101010; + CLImageFormat.image_channel_data_type = CL_UNORM_INT_101010; break; case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8: - clImageFormat.image_channel_data_type = CL_SIGNED_INT8; + CLImageFormat.image_channel_data_type = CL_SIGNED_INT8; break; case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16: - clImageFormat.image_channel_data_type = CL_SIGNED_INT16; + CLImageFormat.image_channel_data_type = CL_SIGNED_INT16; break; case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32: - clImageFormat.image_channel_data_type = CL_SIGNED_INT32; + CLImageFormat.image_channel_data_type = CL_SIGNED_INT32; break; case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8: - clImageFormat.image_channel_data_type = CL_UNSIGNED_INT8; + CLImageFormat.image_channel_data_type = CL_UNSIGNED_INT8; break; case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16: - clImageFormat.image_channel_data_type = CL_UNSIGNED_INT16; + CLImageFormat.image_channel_data_type = CL_UNSIGNED_INT16; break; case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32: - clImageFormat.image_channel_data_type = CL_UNSIGNED_INT32; + CLImageFormat.image_channel_data_type = CL_UNSIGNED_INT32; break; case UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT: - clImageFormat.image_channel_data_type = CL_HALF_FLOAT; + CLImageFormat.image_channel_data_type = CL_HALF_FLOAT; break; case UR_IMAGE_CHANNEL_TYPE_FLOAT: - clImageFormat.image_channel_data_type = CL_FLOAT; + CLImageFormat.image_channel_data_type = CL_FLOAT; break; default: - clImageFormat.image_channel_data_type = -1; + CLImageFormat.image_channel_data_type = -1; break; } - return clImageFormat; + return CLImageFormat; } -cl_image_desc map_ur_image_desc_to_cl(const ur_image_desc_t *pImageDesc) { - cl_image_desc clImageDesc; - clImageDesc.image_type = - cl_adapter::cast(pImageDesc->type); +cl_image_desc mapURImageDescToCL(const ur_image_desc_t *PImageDesc) { + cl_image_desc CLImageDesc; + CLImageDesc.image_type = + cl_adapter::cast(PImageDesc->type); - switch (pImageDesc->type) { + switch (PImageDesc->type) { case UR_MEM_TYPE_BUFFER: - clImageDesc.image_type = CL_MEM_OBJECT_BUFFER; + CLImageDesc.image_type = CL_MEM_OBJECT_BUFFER; break; case UR_MEM_TYPE_IMAGE2D: - clImageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; + CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; break; case UR_MEM_TYPE_IMAGE3D: - clImageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; + CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; break; case UR_MEM_TYPE_IMAGE2D_ARRAY: - clImageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; + CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; break; case UR_MEM_TYPE_IMAGE1D: - clImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; + CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; break; case UR_MEM_TYPE_IMAGE1D_ARRAY: - clImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; + CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; break; case UR_MEM_TYPE_IMAGE1D_BUFFER: - clImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; + CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; break; default: - clImageDesc.image_type = -1; + CLImageDesc.image_type = -1; break; } - clImageDesc.image_width = pImageDesc->width; - clImageDesc.image_height = pImageDesc->height; - clImageDesc.image_depth = pImageDesc->depth; - clImageDesc.image_array_size = pImageDesc->arraySize; - clImageDesc.image_row_pitch = pImageDesc->rowPitch; - clImageDesc.image_slice_pitch = pImageDesc->slicePitch; - clImageDesc.num_mip_levels = pImageDesc->numMipLevel; - clImageDesc.num_samples = pImageDesc->numSamples; - clImageDesc.buffer = nullptr; - clImageDesc.mem_object = nullptr; - - return clImageDesc; + CLImageDesc.image_width = PImageDesc->width; + CLImageDesc.image_height = PImageDesc->height; + CLImageDesc.image_depth = PImageDesc->depth; + CLImageDesc.image_array_size = PImageDesc->arraySize; + CLImageDesc.image_row_pitch = PImageDesc->rowPitch; + CLImageDesc.image_slice_pitch = PImageDesc->slicePitch; + CLImageDesc.num_mip_levels = PImageDesc->numMipLevel; + CLImageDesc.num_samples = PImageDesc->numSamples; + CLImageDesc.buffer = nullptr; + CLImageDesc.mem_object = nullptr; + + return CLImageDesc; } -cl_int map_ur_mem_image_info_to_cl(ur_image_info_t urPropName) { - cl_int clPropName; - switch (urPropName) { +cl_int mapURMemImageInfoToCL(ur_image_info_t URPropName) { + + switch (URPropName) { case UR_IMAGE_INFO_FORMAT: - clPropName = CL_IMAGE_FORMAT; - break; + return CL_IMAGE_FORMAT; case UR_IMAGE_INFO_ELEMENT_SIZE: - clPropName = CL_IMAGE_ELEMENT_SIZE; - break; + return CL_IMAGE_ELEMENT_SIZE; case UR_IMAGE_INFO_ROW_PITCH: - clPropName = CL_IMAGE_ROW_PITCH; - break; + return CL_IMAGE_ROW_PITCH; case UR_IMAGE_INFO_SLICE_PITCH: - clPropName = CL_IMAGE_SLICE_PITCH; - break; + return CL_IMAGE_SLICE_PITCH; case UR_IMAGE_INFO_WIDTH: - clPropName = CL_IMAGE_WIDTH; - break; + return CL_IMAGE_WIDTH; case UR_IMAGE_INFO_HEIGHT: - clPropName = CL_IMAGE_HEIGHT; - break; + return CL_IMAGE_HEIGHT; case UR_IMAGE_INFO_DEPTH: - clPropName = CL_IMAGE_DEPTH; - break; + return CL_IMAGE_DEPTH; default: - clPropName = -1; + return -1; } - - return clPropName; } -cl_int map_ur_mem_info_to_cl(ur_mem_info_t urPropName) { - cl_int clPropName; - switch (urPropName) { +cl_int mapURMemInfoToCL(ur_mem_info_t URPropName) { + + switch (URPropName) { case UR_MEM_INFO_SIZE: - clPropName = CL_MEM_SIZE; - break; + return CL_MEM_SIZE; case UR_MEM_INFO_CONTEXT: - clPropName = CL_MEM_CONTEXT; - break; + return CL_MEM_CONTEXT; default: - clPropName = -1; + return -1; } - - return clPropName; } -cl_map_flags convert_ur_mem_flags_to_cl(ur_mem_flags_t ur_flags) { - cl_map_flags cl_flags = 0; - if (ur_flags & UR_MEM_FLAG_READ_WRITE) { - cl_flags |= CL_MEM_READ_WRITE; +cl_map_flags convertURMemFlagsToCL(ur_mem_flags_t URFlags) { + cl_map_flags CLFlags = 0; + if (URFlags & UR_MEM_FLAG_READ_WRITE) { + CLFlags |= CL_MEM_READ_WRITE; } - if (ur_flags & UR_MEM_FLAG_WRITE_ONLY) { - cl_flags |= CL_MEM_WRITE_ONLY; + if (URFlags & UR_MEM_FLAG_WRITE_ONLY) { + CLFlags |= CL_MEM_WRITE_ONLY; } - if (ur_flags & UR_MEM_FLAG_READ_ONLY) { - cl_flags |= CL_MEM_READ_ONLY; + if (URFlags & UR_MEM_FLAG_READ_ONLY) { + CLFlags |= CL_MEM_READ_ONLY; } - if (ur_flags & UR_MEM_FLAG_USE_HOST_POINTER) { - cl_flags |= CL_MEM_USE_HOST_PTR; + if (URFlags & UR_MEM_FLAG_USE_HOST_POINTER) { + CLFlags |= CL_MEM_USE_HOST_PTR; } - if (ur_flags & UR_MEM_FLAG_ALLOC_HOST_POINTER) { - cl_flags |= CL_MEM_ALLOC_HOST_PTR; + if (URFlags & UR_MEM_FLAG_ALLOC_HOST_POINTER) { + CLFlags |= CL_MEM_ALLOC_HOST_PTR; } - if (ur_flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) { - cl_flags |= CL_MEM_COPY_HOST_PTR; + if (URFlags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) { + CLFlags |= CL_MEM_COPY_HOST_PTR; } - return cl_flags; + return CLFlags; } UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( @@ -241,53 +227,53 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(phBuffer, UR_RESULT_ERROR_INVALID_NULL_POINTER); - cl_int ret_err = CL_INVALID_OPERATION; + cl_int RetErr = CL_INVALID_OPERATION; if (pProperties) { // TODO: need to check if all properties are supported by OpenCL RT and // ignore unsupported clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr; cl_context CLContext = cl_adapter::cast(hContext); // First we need to look up the function pointer - ret_err = + RetErr = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clCreateBufferWithPropertiesINTELCache, - cl_ext::clCreateBufferWithPropertiesName, &FuncPtr); + cl_ext::CreateBufferWithPropertiesName, &FuncPtr); if (FuncPtr) { - std::vector propertiesIntel; - auto prop = static_cast(pProperties->pNext); - while (prop) { - switch (prop->stype) { + std::vector PropertiesIntel; + auto Prop = static_cast(pProperties->pNext); + while (Prop) { + switch (Prop->stype) { case UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES: { - auto bufferChannelProperty = - reinterpret_cast(prop); - propertiesIntel.push_back(CL_MEM_CHANNEL_INTEL); - propertiesIntel.push_back(bufferChannelProperty->channel); + auto BufferChannelProperty = + reinterpret_cast(Prop); + PropertiesIntel.push_back(CL_MEM_CHANNEL_INTEL); + PropertiesIntel.push_back(BufferChannelProperty->channel); } break; case UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES: { - auto bufferLocationProperty = - reinterpret_cast(prop); - propertiesIntel.push_back(CL_MEM_ALLOC_FLAGS_INTEL); - propertiesIntel.push_back(bufferLocationProperty->location); + auto BufferLocationProperty = + reinterpret_cast(Prop); + PropertiesIntel.push_back(CL_MEM_ALLOC_FLAGS_INTEL); + PropertiesIntel.push_back(BufferLocationProperty->location); } break; default: break; } - prop = static_cast(prop->pNext); + Prop = static_cast(Prop->pNext); } - propertiesIntel.push_back(0); + PropertiesIntel.push_back(0); *phBuffer = reinterpret_cast(FuncPtr( - CLContext, propertiesIntel.data(), static_cast(flags), - size, pProperties->pHost, cl_adapter::cast(&ret_err))); - CL_RETURN_ON_FAILURE(ret_err); + CLContext, PropertiesIntel.data(), static_cast(flags), + size, pProperties->pHost, cl_adapter::cast(&RetErr))); + CL_RETURN_ON_FAILURE(RetErr); } } *phBuffer = reinterpret_cast(clCreateBuffer( cl_adapter::cast(hContext), static_cast(flags), - size, pProperties->pHost, cl_adapter::cast(&ret_err))); - CL_RETURN_ON_FAILURE(ret_err); + size, pProperties->pHost, cl_adapter::cast(&RetErr))); + CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } @@ -299,16 +285,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(phMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); - cl_int ret_err = CL_INVALID_OPERATION; + cl_int RetErr = CL_INVALID_OPERATION; - cl_image_format image_format = map_ur_image_format_to_cl(pImageFormat); - cl_image_desc image_desc = map_ur_image_desc_to_cl(pImageDesc); - cl_map_flags map_flags = convert_ur_mem_flags_to_cl(flags); + cl_image_format ImageFormat = mapURImageFormatToCL(pImageFormat); + cl_image_desc ImageDesc = mapURImageDescToCL(pImageDesc); + cl_map_flags MapFlags = convertURMemFlagsToCL(flags); *phMem = reinterpret_cast(clCreateImage( - cl_adapter::cast(hContext), map_flags, &image_format, - &image_desc, pHost, cl_adapter::cast(&ret_err))); - CL_RETURN_ON_FAILURE(ret_err); + cl_adapter::cast(hContext), MapFlags, &ImageFormat, + &ImageDesc, pHost, cl_adapter::cast(&RetErr))); + CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } @@ -320,33 +306,32 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( UR_ASSERT(hBuffer, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(phMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); - cl_int ret_err = CL_INVALID_OPERATION; + cl_int RetErr = CL_INVALID_OPERATION; - cl_buffer_create_type buffer_create_type; + cl_buffer_create_type BufferCreateType; switch (bufferCreateType) { case UR_BUFFER_CREATE_TYPE_REGION: - buffer_create_type = CL_BUFFER_CREATE_TYPE_REGION; + BufferCreateType = CL_BUFFER_CREATE_TYPE_REGION; break; default: break; } - _cl_buffer_region buffer_region; - buffer_region.origin = pRegion->origin; - buffer_region.size = pRegion->size; + _cl_buffer_region BufferRegion; + BufferRegion.origin = pRegion->origin; + BufferRegion.size = pRegion->size; - *phMem = reinterpret_cast( - clCreateSubBuffer(cl_adapter::cast(hBuffer), - static_cast(flags), buffer_create_type, - &buffer_region, cl_adapter::cast(&ret_err))); - CL_RETURN_ON_FAILURE(ret_err); + *phMem = reinterpret_cast(clCreateSubBuffer( + cl_adapter::cast(hBuffer), static_cast(flags), + BufferCreateType, &BufferRegion, cl_adapter::cast(&RetErr))); + CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle(ur_mem_handle_t hMem, ur_native_handle_t *phNativeMem) { - return urGetNativeHandle(hMem, phNativeMem); + return getNativeHandle(hMem, phNativeMem); } UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( @@ -384,10 +369,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, UR_ASSERT(hMemory, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - const cl_int clPropName = map_ur_mem_info_to_cl(propName); + const cl_int CLPropName = mapURMemInfoToCL(propName); CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(hMemory), - clPropName, propSize, pPropValue, + CLPropName, propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } @@ -400,10 +385,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, UR_ASSERT(hMemory, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - const cl_int clPropName = map_ur_mem_image_info_to_cl(propName); + const cl_int CLPropName = mapURMemImageInfoToCL(propName); CL_RETURN_ON_FAILURE(clGetImageInfo(cl_adapter::cast(hMemory), - clPropName, propSize, pPropValue, + CLPropName, propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp index ce85efdd2e050..dcf63127067d8 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp @@ -10,49 +10,41 @@ #include -ur_result_t cl_adapter::getPlatformVersion(cl_platform_id plat, - OCLV::OpenCLVersion &version) { +ur_result_t cl_adapter::getPlatformVersion(cl_platform_id Plat, + oclv::OpenCLVersion &Version) { - size_t platVerSize = 0; + size_t PlatVerSize = 0; CL_RETURN_ON_FAILURE( - clGetPlatformInfo(plat, CL_PLATFORM_VERSION, 0, nullptr, &platVerSize)); + clGetPlatformInfo(Plat, CL_PLATFORM_VERSION, 0, nullptr, &PlatVerSize)); - std::string platVer(platVerSize, '\0'); - CL_RETURN_ON_FAILURE(clGetPlatformInfo(plat, CL_PLATFORM_VERSION, platVerSize, - platVer.data(), nullptr)); + std::string PlatVer(PlatVerSize, '\0'); + CL_RETURN_ON_FAILURE(clGetPlatformInfo(Plat, CL_PLATFORM_VERSION, PlatVerSize, + PlatVer.data(), nullptr)); - version = OCLV::OpenCLVersion(platVer); - if (!version.isValid()) { + Version = oclv::OpenCLVersion(PlatVer); + if (!Version.isValid()) { return UR_RESULT_ERROR_INVALID_PLATFORM; } return UR_RESULT_SUCCESS; } -static cl_int map_ur_platform_info_to_cl(ur_platform_info_t urPropName) { +static cl_int mapURPlatformInfoToCL(ur_platform_info_t URPropName) { - cl_int cl_propName; - switch (urPropName) { + switch (URPropName) { case UR_PLATFORM_INFO_NAME: - cl_propName = CL_PLATFORM_NAME; - break; + return CL_PLATFORM_NAME; case UR_PLATFORM_INFO_VENDOR_NAME: - cl_propName = CL_PLATFORM_VENDOR; - break; + return CL_PLATFORM_VENDOR; case UR_PLATFORM_INFO_VERSION: - cl_propName = CL_PLATFORM_VERSION; - break; + return CL_PLATFORM_VERSION; case UR_PLATFORM_INFO_EXTENSIONS: - cl_propName = CL_PLATFORM_EXTENSIONS; - break; + return CL_PLATFORM_EXTENSIONS; case UR_PLATFORM_INFO_PROFILE: - cl_propName = CL_PLATFORM_PROFILE; - break; + return CL_PLATFORM_PROFILE; default: - cl_propName = -1; + return -1; } - - return cl_propName; } UR_DLLEXPORT ur_result_t UR_APICALL @@ -61,7 +53,7 @@ urPlatformGetInfo(ur_platform_handle_t hPlatform, ur_platform_info_t propName, UR_ASSERT(hPlatform, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UrReturnHelper ReturnValue(propSize, pPropValue, pSizeRet); - const cl_int cl_propName = map_ur_platform_info_to_cl(propName); + const cl_int CLPropName = mapURPlatformInfoToCL(propName); switch (static_cast(propName)) { case UR_PLATFORM_INFO_BACKEND: @@ -73,7 +65,7 @@ urPlatformGetInfo(ur_platform_handle_t hPlatform, ur_platform_info_t propName, case UR_PLATFORM_INFO_PROFILE: { CL_RETURN_ON_FAILURE( clGetPlatformInfo(cl_adapter::cast(hPlatform), - cl_propName, propSize, pPropValue, pSizeRet)); + CLPropName, propSize, pPropValue, pSizeRet)); return UR_RESULT_SUCCESS; } default: @@ -97,20 +89,20 @@ urPlatformGet(uint32_t NumEntries, ur_platform_handle_t *phPlatforms, UR_ASSERT(phPlatforms || pNumPlatforms, UR_RESULT_ERROR_INVALID_VALUE); UR_ASSERT(!phPlatforms || NumEntries > 0, UR_RESULT_ERROR_INVALID_SIZE); - cl_int result = + cl_int Result = clGetPlatformIDs(cl_adapter::cast(NumEntries), cl_adapter::cast(phPlatforms), cl_adapter::cast(pNumPlatforms)); /* Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms */ - if (result == CL_PLATFORM_NOT_FOUND_KHR) { - result = CL_SUCCESS; + if (Result == CL_PLATFORM_NOT_FOUND_KHR) { + Result = CL_SUCCESS; if (pNumPlatforms) { *pNumPlatforms = 0; } } - return map_cl_error_to_ur(result); + return mapCLErrorToUR(Result); } UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.hpp index 6f69a04030da1..340955fe7c39c 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.hpp @@ -10,6 +10,6 @@ #include "common.hpp" namespace cl_adapter { -ur_result_t getPlatformVersion(cl_platform_id plat, - OCLV::OpenCLVersion &version); +ur_result_t getPlatformVersion(cl_platform_id Plat, + oclv::OpenCLVersion &Version); } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp index 2dc52f27651a9..0d7f76dc2212f 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp @@ -13,22 +13,22 @@ static ur_result_t getDevicesFromProgram( ur_program_handle_t hProgram, - std::unique_ptr> &devicesInProgram) { + std::unique_ptr> &DevicesInProgram) { - cl_uint deviceCount; + cl_uint DeviceCount; CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), - &deviceCount, nullptr)); + &DeviceCount, nullptr)); - if (deviceCount < 1) { + if (DeviceCount < 1) { return UR_RESULT_ERROR_INVALID_CONTEXT; } - devicesInProgram = std::make_unique>(deviceCount); + DevicesInProgram = std::make_unique>(DeviceCount); CL_RETURN_ON_FAILURE(clGetProgramInfo( cl_adapter::cast(hProgram), CL_PROGRAM_DEVICES, - deviceCount * sizeof(cl_device_id), (*devicesInProgram).data(), nullptr)); + DeviceCount * sizeof(cl_device_id), (*DevicesInProgram).data(), nullptr)); return UR_RESULT_SUCCESS; } @@ -41,77 +41,77 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( UR_ASSERT(pIL, UR_RESULT_ERROR_INVALID_NULL_POINTER); UR_ASSERT(phProgram, UR_RESULT_ERROR_INVALID_NULL_POINTER); - std::unique_ptr> devicesInCtx; + std::unique_ptr> DevicesInCtx; CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::getDevicesFromContext(hContext, devicesInCtx), phProgram); + cl_adapter::getDevicesFromContext(hContext, DevicesInCtx), phProgram); - cl_platform_id curPlatform; + cl_platform_id CurPlatform; CL_RETURN_ON_FAILURE_AND_SET_NULL( - clGetDeviceInfo((*devicesInCtx)[0], CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &curPlatform, nullptr), + clGetDeviceInfo((*DevicesInCtx)[0], CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &CurPlatform, nullptr), phProgram); - OCLV::OpenCLVersion platVer; + oclv::OpenCLVersion PlatVer; CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::getPlatformVersion(curPlatform, platVer), phProgram); + cl_adapter::getPlatformVersion(CurPlatform, PlatVer), phProgram); - cl_int err = CL_SUCCESS; - if (platVer >= OCLV::V2_1) { + cl_int Err = CL_SUCCESS; + if (PlatVer >= oclv::V2_1) { /* Make sure all devices support CL 2.1 or newer as well. */ - for (cl_device_id dev : *devicesInCtx) { - OCLV::OpenCLVersion devVer; + for (cl_device_id Dev : *DevicesInCtx) { + oclv::OpenCLVersion DevVer; CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::getDeviceVersion(dev, devVer), phProgram); + cl_adapter::getDeviceVersion(Dev, DevVer), phProgram); /* If the device does not support CL 2.1 or greater, we need to make sure * it supports the cl_khr_il_program extension. */ - if (devVer < OCLV::V2_1) { - bool supported = false; + if (DevVer < oclv::V2_1) { + bool Supported = false; CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::checkDeviceExtensions(dev, {"cl_khr_il_program"}, - supported), + cl_adapter::checkDeviceExtensions(Dev, {"cl_khr_il_program"}, + Supported), phProgram); - if (!supported) { + if (!Supported) { return UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE; } } } *phProgram = cl_adapter::cast(clCreateProgramWithIL( - cl_adapter::cast(hContext), pIL, length, &err)); - CL_RETURN_ON_FAILURE(err); + cl_adapter::cast(hContext), pIL, length, &Err)); + CL_RETURN_ON_FAILURE(Err); } else { /* If none of the devices conform with CL 2.1 or newer make sure they all * support the cl_khr_il_program extension. */ - for (cl_device_id dev : *devicesInCtx) { - bool supported = false; + for (cl_device_id Dev : *DevicesInCtx) { + bool Supported = false; CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::checkDeviceExtensions(dev, {"cl_khr_il_program"}, - supported), + cl_adapter::checkDeviceExtensions(Dev, {"cl_khr_il_program"}, + Supported), phProgram); - if (!supported) { + if (!Supported) { return UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE; } } - using apiFuncT = + using ApiFuncT = cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *); - apiFuncT funcPtr = - reinterpret_cast(clGetExtensionFunctionAddressForPlatform( - curPlatform, "clCreateProgramWithILKHR")); + ApiFuncT FuncPtr = + reinterpret_cast(clGetExtensionFunctionAddressForPlatform( + CurPlatform, "clCreateProgramWithILKHR")); - assert(funcPtr != nullptr); + assert(FuncPtr != nullptr); *phProgram = cl_adapter::cast( - funcPtr(cl_adapter::cast(hContext), pIL, length, &err)); - CL_RETURN_ON_FAILURE(err); + FuncPtr(cl_adapter::cast(hContext), pIL, length, &Err)); + CL_RETURN_ON_FAILURE(Err); } return UR_RESULT_SUCCESS; @@ -127,14 +127,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( UR_ASSERT(pBinary, UR_RESULT_ERROR_INVALID_NULL_POINTER); UR_ASSERT(phProgram, UR_RESULT_ERROR_INVALID_NULL_POINTER); - cl_int binary_status; - cl_int cl_result; + cl_int BinaryStatus; + cl_int CLResult; *phProgram = cl_adapter::cast(clCreateProgramWithBinary( cl_adapter::cast(hContext), cl_adapter::cast(1u), cl_adapter::cast(&hDevice), &size, &pBinary, - &binary_status, &cl_result)); - CL_RETURN_ON_FAILURE(binary_status); - CL_RETURN_ON_FAILURE(cl_result); + &BinaryStatus, &CLResult)); + CL_RETURN_ON_FAILURE(BinaryStatus); + CL_RETURN_ON_FAILURE(CLResult); return UR_RESULT_SUCCESS; } @@ -146,53 +146,41 @@ urProgramCompile(ur_context_handle_t hContext, ur_program_handle_t hProgram, UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - std::unique_ptr> devicesInProgram; - CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, devicesInProgram)); + std::unique_ptr> DevicesInProgram; + CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); CL_RETURN_ON_FAILURE(clCompileProgram(cl_adapter::cast(hProgram), - devicesInProgram->size(), - devicesInProgram->data(), pOptions, 0, + DevicesInProgram->size(), + DevicesInProgram->data(), pOptions, 0, nullptr, nullptr, nullptr, nullptr)); return UR_RESULT_SUCCESS; } -static cl_int map_ur_program_info_to_cl(ur_program_info_t urPropName) { +static cl_int mapURProgramInfoToCL(ur_program_info_t URPropName) { - cl_int cl_propName; - switch (static_cast(urPropName)) { + switch (static_cast(URPropName)) { case UR_PROGRAM_INFO_REFERENCE_COUNT: - cl_propName = CL_PROGRAM_REFERENCE_COUNT; - break; + return CL_PROGRAM_REFERENCE_COUNT; case UR_PROGRAM_INFO_CONTEXT: - cl_propName = CL_PROGRAM_CONTEXT; - break; + return CL_PROGRAM_CONTEXT; case UR_PROGRAM_INFO_NUM_DEVICES: - cl_propName = CL_PROGRAM_NUM_DEVICES; - break; + return CL_PROGRAM_NUM_DEVICES; case UR_PROGRAM_INFO_DEVICES: - cl_propName = CL_PROGRAM_DEVICES; - break; + return CL_PROGRAM_DEVICES; case UR_PROGRAM_INFO_SOURCE: - cl_propName = CL_PROGRAM_SOURCE; - break; + return CL_PROGRAM_SOURCE; case UR_PROGRAM_INFO_BINARY_SIZES: - cl_propName = CL_PROGRAM_BINARY_SIZES; - break; + return CL_PROGRAM_BINARY_SIZES; case UR_PROGRAM_INFO_BINARIES: - cl_propName = CL_PROGRAM_BINARIES; - break; + return CL_PROGRAM_BINARIES; case UR_PROGRAM_INFO_NUM_KERNELS: - cl_propName = CL_PROGRAM_NUM_KERNELS; - break; + return CL_PROGRAM_NUM_KERNELS; case UR_PROGRAM_INFO_KERNEL_NAMES: - cl_propName = CL_PROGRAM_KERNEL_NAMES; - break; + return CL_PROGRAM_KERNEL_NAMES; default: - cl_propName = -1; + return -1; } - - return cl_propName; } UR_APIEXPORT ur_result_t UR_APICALL @@ -202,7 +190,7 @@ urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), - map_ur_program_info_to_cl(propName), + mapURProgramInfoToCL(propName), propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; @@ -215,12 +203,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t hContext, UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - std::unique_ptr> devicesInProgram; - CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, devicesInProgram)); + std::unique_ptr> DevicesInProgram; + CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); CL_RETURN_ON_FAILURE(clBuildProgram( - cl_adapter::cast(hProgram), devicesInProgram->size(), - devicesInProgram->data(), pOptions, nullptr, nullptr)); + cl_adapter::cast(hProgram), DevicesInProgram->size(), + DevicesInProgram->data(), pOptions, nullptr, nullptr)); return UR_RESULT_SUCCESS; } @@ -233,39 +221,31 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count, UR_ASSERT(phPrograms, UR_RESULT_ERROR_INVALID_NULL_POINTER); UR_ASSERT(phProgram, UR_RESULT_ERROR_INVALID_NULL_POINTER); - cl_int cl_result; + cl_int CLResult; *phProgram = cl_adapter::cast( clLinkProgram(cl_adapter::cast(hContext), 0, nullptr, pOptions, cl_adapter::cast(count), cl_adapter::cast(phPrograms), nullptr, - nullptr, &cl_result)); - CL_RETURN_ON_FAILURE(cl_result); + nullptr, &CLResult)); + CL_RETURN_ON_FAILURE(CLResult); return UR_RESULT_SUCCESS; } -static cl_int -map_ur_program_build_info_to_cl(ur_program_build_info_t urPropName) { +static cl_int mapURProgramBuildInfoToCL(ur_program_build_info_t URPropName) { - cl_int cl_propName; - switch (static_cast(urPropName)) { + switch (static_cast(URPropName)) { case UR_PROGRAM_BUILD_INFO_STATUS: - cl_propName = CL_PROGRAM_BUILD_STATUS; - break; + return CL_PROGRAM_BUILD_STATUS; case UR_PROGRAM_BUILD_INFO_OPTIONS: - cl_propName = CL_PROGRAM_BUILD_OPTIONS; - break; + return CL_PROGRAM_BUILD_OPTIONS; case UR_PROGRAM_BUILD_INFO_LOG: - cl_propName = CL_PROGRAM_BUILD_LOG; - break; + return CL_PROGRAM_BUILD_LOG; case UR_PROGRAM_BUILD_INFO_BINARY_TYPE: - cl_propName = CL_PROGRAM_BINARY_TYPE; - break; + return CL_PROGRAM_BINARY_TYPE; default: - cl_propName = -1; + return -1; } - - return cl_propName; } UR_APIEXPORT ur_result_t UR_APICALL @@ -276,11 +256,10 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - CL_RETURN_ON_FAILURE( - clGetProgramBuildInfo(cl_adapter::cast(hProgram), - cl_adapter::cast(hDevice), - map_ur_program_build_info_to_cl(propName), propSize, - pPropValue, pPropSizeRet)); + CL_RETURN_ON_FAILURE(clGetProgramBuildInfo( + cl_adapter::cast(hProgram), + cl_adapter::cast(hDevice), + mapURProgramBuildInfoToCL(propName), propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } @@ -326,24 +305,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( ur_program_handle_t hProgram, uint32_t count, const ur_specialization_constant_info_t *pSpecConstants) { - cl_program clProg = cl_adapter::cast(hProgram); + cl_program CLProg = cl_adapter::cast(hProgram); cl_context Ctx = nullptr; size_t RetSize = 0; - CL_RETURN_ON_FAILURE(clGetProgramInfo(clProg, CL_PROGRAM_CONTEXT, sizeof(Ctx), + CL_RETURN_ON_FAILURE(clGetProgramInfo(CLProg, CL_PROGRAM_CONTEXT, sizeof(Ctx), &Ctx, &RetSize)); cl_ext::clSetProgramSpecializationConstant_fn F = nullptr; - const ur_result_t ur_result = cl_ext::getExtFuncFromContext( + const ur_result_t URResult = cl_ext::getExtFuncFromContext( Ctx, cl_ext::ExtFuncPtrCache->clSetProgramSpecializationConstantCache, - cl_ext::clSetProgramSpecializationConstantName, &F); + cl_ext::SetProgramSpecializationConstantName, &F); - if (ur_result != UR_RESULT_SUCCESS) { - return ur_result; + if (URResult != UR_RESULT_SUCCESS) { + return URResult; } for (uint32_t i = 0; i < count; ++i) { - CL_RETURN_ON_FAILURE(F(clProg, pSpecConstants[i].id, pSpecConstants[i].size, + CL_RETURN_ON_FAILURE(F(CLProg, pSpecConstants[i].id, pSpecConstants[i].size, pSpecConstants[i].pValue)); } @@ -354,23 +333,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( // then checks if they are equal to the sub_str. // returns true if there is at least one instance // returns false if there are no instances of the name -static bool is_in_separated_string(const std::string &str, char delimiter, - const std::string &sub_str) { - size_t beg = 0; - size_t length = 0; - for (const auto &x : str) { - if (x == delimiter) { - if (str.substr(beg, length) == sub_str) +static bool isInSeparatedString(const std::string &Str, char Delimiter, + const std::string &SubStr) { + size_t Beg = 0; + size_t Length = 0; + for (const auto &x : Str) { + if (x == Delimiter) { + if (Str.substr(Beg, Length) == SubStr) return true; - beg += length + 1; - length = 0; + Beg += Length + 1; + Length = 0; continue; } - length++; + Length++; } - if (length != 0) - if (str.substr(beg, length) == sub_str) + if (Length != 0) + if (Str.substr(Beg, Length) == SubStr) return true; return false; @@ -389,7 +368,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clGetDeviceFunctionPointerCache, - cl_ext::clGetDeviceFunctionPointerName, &FuncT)); + cl_ext::GetDeviceFunctionPointerName, &FuncT)); // Check if kernel name exists, to prevent opencl runtime throwing exception // with cpu runtime @@ -402,34 +381,34 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( PI_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &Size)); - std::string ClResult(Size, ' '); + std::string CLResult(Size, ' '); CL_RETURN_ON_FAILURE(clGetProgramInfo( cl_adapter::cast(hProgram), PI_PROGRAM_INFO_KERNEL_NAMES, - ClResult.size(), &ClResult[0], nullptr)); + CLResult.size(), &CLResult[0], nullptr)); // Get rid of the null terminator and search for kernel_name // If function cannot be found return error code to indicate it // exists - ClResult.pop_back(); - if (!is_in_separated_string(ClResult, ';', pFunctionName)) + CLResult.pop_back(); + if (!isInSeparatedString(CLResult, ';', pFunctionName)) return UR_RESULT_ERROR_INVALID_KERNEL_NAME; - ur_result_t ur_result = UR_RESULT_ERROR_INVALID_FUNCTION_NAME; + ur_result_t URResult = UR_RESULT_ERROR_INVALID_FUNCTION_NAME; // If clGetDeviceFunctionPointer is in list of extensions if (FuncT) { - cl_int cl_result = + cl_int CLResult = FuncT(cl_adapter::cast(hDevice), cl_adapter::cast(hProgram), pFunctionName, reinterpret_cast(ppFunctionPointer)); // GPU runtime sometimes returns PI_ERROR_INVALID_ARG_VALUE if func address // cannot be found even if kernel exits. As the kernel does exist return // that the address is not available - if (cl_result == CL_INVALID_ARG_VALUE) { + if (CLResult == CL_INVALID_ARG_VALUE) { *ppFunctionPointer = 0; return UR_RESULT_ERROR_INVALID_FUNCTION_NAME; } } - return ur_result; + return URResult; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp index 4c45365e3df2c..c60180eccf234 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp @@ -11,56 +11,50 @@ #include -cl_command_queue_info map_ur_queue_info_to_cl(const ur_queue_info_t propName) { - switch (propName) { +cl_command_queue_info mapURQueueInfoToCL(const ur_queue_info_t PropName) { + + switch (PropName) { case UR_QUEUE_INFO_CONTEXT: return CL_QUEUE_CONTEXT; - break; case UR_QUEUE_INFO_DEVICE: return CL_QUEUE_DEVICE; - break; case UR_QUEUE_INFO_DEVICE_DEFAULT: return CL_QUEUE_DEVICE_DEFAULT; - break; case UR_QUEUE_INFO_FLAGS: return CL_QUEUE_PROPERTIES_ARRAY; - break; case UR_QUEUE_INFO_REFERENCE_COUNT: return CL_QUEUE_REFERENCE_COUNT; - break; case UR_QUEUE_INFO_SIZE: return CL_QUEUE_SIZE; - break; default: return -1; - break; } } -cl_command_queue_properties convert_ur_queue_properties_to_cl( - const ur_queue_properties_t *urQueueProperties) { - cl_command_queue_properties clCommandQueueProperties = 0; +cl_command_queue_properties +convertURQueuePropertiesToCL(const ur_queue_properties_t *URQueueProperties) { + cl_command_queue_properties CLCommandQueueProperties = 0; - if (urQueueProperties->flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) { - clCommandQueueProperties |= CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; + if (URQueueProperties->flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) { + CLCommandQueueProperties |= CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; } - if (urQueueProperties->flags & UR_QUEUE_FLAG_PROFILING_ENABLE) { - clCommandQueueProperties |= CL_QUEUE_PROFILING_ENABLE; + if (URQueueProperties->flags & UR_QUEUE_FLAG_PROFILING_ENABLE) { + CLCommandQueueProperties |= CL_QUEUE_PROFILING_ENABLE; } - if (urQueueProperties->flags & UR_QUEUE_FLAG_ON_DEVICE) { - clCommandQueueProperties |= CL_QUEUE_ON_DEVICE; + if (URQueueProperties->flags & UR_QUEUE_FLAG_ON_DEVICE) { + CLCommandQueueProperties |= CL_QUEUE_ON_DEVICE; } - if (urQueueProperties->flags & UR_QUEUE_FLAG_ON_DEVICE_DEFAULT) { - clCommandQueueProperties |= CL_QUEUE_ON_DEVICE_DEFAULT; + if (URQueueProperties->flags & UR_QUEUE_FLAG_ON_DEVICE_DEFAULT) { + CLCommandQueueProperties |= CL_QUEUE_ON_DEVICE_DEFAULT; } - if (urQueueProperties->flags & UR_QUEUE_FLAG_PRIORITY_LOW) { - clCommandQueueProperties |= CL_QUEUE_PRIORITY_LOW_KHR; + if (URQueueProperties->flags & UR_QUEUE_FLAG_PRIORITY_LOW) { + CLCommandQueueProperties |= CL_QUEUE_PRIORITY_LOW_KHR; } - if (urQueueProperties->flags & UR_QUEUE_FLAG_PRIORITY_HIGH) { - clCommandQueueProperties |= CL_QUEUE_PRIORITY_HIGH_KHR; + if (URQueueProperties->flags & UR_QUEUE_FLAG_PRIORITY_HIGH) { + CLCommandQueueProperties |= CL_QUEUE_PRIORITY_HIGH_KHR; } - return clCommandQueueProperties; + return CLCommandQueueProperties; } UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( @@ -69,18 +63,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_platform_id curPlatform; + cl_platform_id CurPlatform; CL_RETURN_ON_FAILURE_AND_SET_NULL( clGetDeviceInfo(cl_adapter::cast(hDevice), - CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &curPlatform, + CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &CurPlatform, nullptr), phQueue); - cl_command_queue_properties clProperties = - convert_ur_queue_properties_to_cl(pProperties); + cl_command_queue_properties CLProperties = + convertURQueuePropertiesToCL(pProperties); // Check that unexpected bits are not set. - assert(!(clProperties & ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | + assert(!(CLProperties & ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT))); @@ -89,29 +83,29 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; - OCLV::OpenCLVersion version; + oclv::OpenCLVersion Version; CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::getPlatformVersion(curPlatform, version), phQueue); + cl_adapter::getPlatformVersion(CurPlatform, Version), phQueue); - cl_int ret_err = CL_INVALID_OPERATION; + cl_int RetErr = CL_INVALID_OPERATION; - if (version >= OCLV::V2_0) { + if (Version >= oclv::V2_0) { *phQueue = cl_adapter::cast( clCreateCommandQueue(cl_adapter::cast(hContext), cl_adapter::cast(hDevice), - clProperties & SupportByOpenCL, &ret_err)); - CL_RETURN_ON_FAILURE(ret_err); + CLProperties & SupportByOpenCL, &RetErr)); + CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } cl_queue_properties CreationFlagProperties[] = { - CL_QUEUE_PROPERTIES, clProperties & SupportByOpenCL, 0}; + CL_QUEUE_PROPERTIES, CLProperties & SupportByOpenCL, 0}; *phQueue = cl_adapter::cast(clCreateCommandQueueWithProperties( cl_adapter::cast(hContext), cl_adapter::cast(hDevice), CreationFlagProperties, - &ret_err)); - CL_RETURN_ON_FAILURE(ret_err); + &RetErr)); + CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } @@ -127,19 +121,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, return UR_RESULT_ERROR_INVALID_VALUE; } - cl_command_queue_info clCommandQueueInfo = map_ur_queue_info_to_cl(propName); + cl_command_queue_info CLCommandQueueInfo = mapURQueueInfoToCL(propName); - cl_int ret_err = clGetCommandQueueInfo( - cl_adapter::cast(hQueue), clCommandQueueInfo, propSize, + cl_int RetErr = clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CLCommandQueueInfo, propSize, pPropValue, pPropSizeRet); - CL_RETURN_ON_FAILURE(ret_err); + CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *pDesc, ur_native_handle_t *phNativeQueue) { - return urGetNativeHandle(hQueue, phNativeQueue); + return getNativeHandle(hQueue, phNativeQueue); } UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( @@ -152,38 +146,38 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( (void)hDevice; (void)pProperties; *phQueue = reinterpret_cast(hNativeQueue); - cl_int ret_err = + cl_int RetErr = clRetainCommandQueue(cl_adapter::cast(hNativeQueue)); - CL_RETURN_ON_FAILURE(ret_err); + CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_int ret_err = clFinish(cl_adapter::cast(hQueue)); - CL_RETURN_ON_FAILURE(ret_err); + cl_int RetErr = clFinish(cl_adapter::cast(hQueue)); + CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_int ret_err = clFinish(cl_adapter::cast(hQueue)); - CL_RETURN_ON_FAILURE(ret_err); + cl_int RetErr = clFinish(cl_adapter::cast(hQueue)); + CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_int ret_err = + cl_int RetErr = clRetainCommandQueue(cl_adapter::cast(hQueue)); - CL_RETURN_ON_FAILURE(ret_err); + CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_int ret_err = + cl_int RetErr = clReleaseCommandQueue(cl_adapter::cast(hQueue)); - CL_RETURN_ON_FAILURE(ret_err); + CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp index 056d61d572672..159deead203e4 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp @@ -4,8 +4,8 @@ namespace { -cl_sampler_info ur2clSamplerInfo(ur_sampler_info_t ur_info) { - switch (ur_info) { +cl_sampler_info ur2CLSamplerInfo(ur_sampler_info_t URInfo) { + switch (URInfo) { #define CASE(UR_INFO, CL_INFO) \ case UR_INFO: \ return CL_INFO; @@ -23,8 +23,8 @@ cl_sampler_info ur2clSamplerInfo(ur_sampler_info_t ur_info) { } } -cl_addressing_mode ur2clAddressingMode(ur_sampler_addressing_mode_t mode) { - switch (mode) { +cl_addressing_mode ur2CLAddressingMode(ur_sampler_addressing_mode_t Mode) { + switch (Mode) { #define CASE(UR_MODE, CL_MODE) \ case UR_MODE: \ @@ -45,8 +45,8 @@ cl_addressing_mode ur2clAddressingMode(ur_sampler_addressing_mode_t mode) { } } -cl_filter_mode ur2clFilterMode(ur_sampler_filter_mode_t mode) { - switch (mode) { +cl_filter_mode ur2CLFilterMode(ur_sampler_filter_mode_t Mode) { + switch (Mode) { #define CASE(UR_MODE, CL_MODE) \ case UR_MODE: \ @@ -63,8 +63,8 @@ cl_filter_mode ur2clFilterMode(ur_sampler_filter_mode_t mode) { } } -ur_sampler_addressing_mode_t cl2urAddressingMode(cl_addressing_mode mode) { - switch (mode) { +ur_sampler_addressing_mode_t cl2URAddressingMode(cl_addressing_mode Mode) { + switch (Mode) { #define CASE(CL_MODE, UR_MODE) \ case CL_MODE: \ @@ -80,14 +80,14 @@ ur_sampler_addressing_mode_t cl2urAddressingMode(cl_addressing_mode mode) { #undef CASE default: - std::cout << mode << std::endl; + std::cout << Mode << std::endl; assert(0 && "Unhandled: cl_addressing_mode"); break; } } -ur_sampler_filter_mode_t cl2urFilterMode(cl_filter_mode mode) { - switch (mode) { +ur_sampler_filter_mode_t cl2URFilterMode(cl_filter_mode Mode) { + switch (Mode) { #define CASE(CL_MODE, UR_MODE) \ case CL_MODE: \ return UR_MODE; @@ -103,23 +103,23 @@ ur_sampler_filter_mode_t cl2urFilterMode(cl_filter_mode mode) { } } -void cl2urSamplerInfoValue(cl_sampler_info info, size_t infoSize, - void *infoValue) { - if (!infoValue) { +void cl2URSamplerInfoValue(cl_sampler_info Info, size_t InfoSize, + void *InfoValue) { + if (!InfoValue) { return; } - switch (info) { + switch (Info) { case CL_SAMPLER_ADDRESSING_MODE: { - cl_addressing_mode clValue = - *reinterpret_cast(infoValue); - *reinterpret_cast(infoValue) = - cl2urAddressingMode(clValue); + cl_addressing_mode CLValue = + *reinterpret_cast(InfoValue); + *reinterpret_cast(InfoValue) = + cl2URAddressingMode(CLValue); break; } case CL_SAMPLER_FILTER_MODE: { - cl_filter_mode clMode = *reinterpret_cast(infoValue); - *reinterpret_cast(infoValue) = - cl2urFilterMode(clMode); + cl_filter_mode CLMode = *reinterpret_cast(InfoValue); + *reinterpret_cast(InfoValue) = + cl2URFilterMode(CLMode); break; } @@ -138,18 +138,18 @@ ur_result_t urSamplerCreate(ur_context_handle_t hContext, UR_ASSERT(phSampler, UR_RESULT_ERROR_INVALID_NULL_POINTER); // Initialize properties according to OpenCL 2.1 spec. - ur_result_t error_code; - cl_addressing_mode addressingMode = - ur2clAddressingMode(pDesc->addressingMode); - cl_filter_mode filterMode = ur2clFilterMode(pDesc->filterMode); + ur_result_t ErrorCode; + cl_addressing_mode AddressingMode = + ur2CLAddressingMode(pDesc->addressingMode); + cl_filter_mode FilterMode = ur2CLFilterMode(pDesc->filterMode); // Always call OpenCL 1.0 API *phSampler = cl_adapter::cast(clCreateSampler( cl_adapter::cast(hContext), - static_cast(pDesc->normalizedCoords), addressingMode, filterMode, - cl_adapter::cast(&error_code))); + static_cast(pDesc->normalizedCoords), AddressingMode, FilterMode, + cl_adapter::cast(&ErrorCode))); - return map_cl_error_to_ur(error_code); + return mapCLErrorToUR(ErrorCode); } UR_APIEXPORT ur_result_t UR_APICALL @@ -158,17 +158,17 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, UR_ASSERT(hSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(pPropValue || pPropSizeRet, UR_RESULT_ERROR_INVALID_VALUE); - cl_sampler_info sampler_info = ur2clSamplerInfo(propName); + cl_sampler_info SamplerInfo = ur2CLSamplerInfo(propName); static_assert(sizeof(cl_addressing_mode) == sizeof(ur_sampler_addressing_mode_t)); - if (ur_result_t err = map_cl_error_to_ur( - clGetSamplerInfo(cl_adapter::cast(hSampler), sampler_info, + if (ur_result_t Err = mapCLErrorToUR( + clGetSamplerInfo(cl_adapter::cast(hSampler), SamplerInfo, propSize, pPropValue, pPropSizeRet))) { - return err; + return Err; } // Convert OpenCL returns to UR - cl2urSamplerInfoValue(sampler_info, propSize, pPropValue); + cl2URSamplerInfoValue(SamplerInfo, propSize, pPropValue); return UR_RESULT_SUCCESS; } @@ -176,14 +176,14 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, UR_APIEXPORT ur_result_t UR_APICALL urSamplerRetain(ur_sampler_handle_t hSampler) { UR_ASSERT(hSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - return map_cl_error_to_ur( + return mapCLErrorToUR( clRetainSampler(cl_adapter::cast(hSampler))); } UR_APIEXPORT ur_result_t UR_APICALL urSamplerRelease(ur_sampler_handle_t hSampler) { UR_ASSERT(hSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - return map_cl_error_to_ur( + return mapCLErrorToUR( clReleaseSampler(cl_adapter::cast(hSampler))); } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index 680c5bd240747..ea0c83d8ffb31 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -13,13 +13,13 @@ namespace { // TODO - this is a duplicate of what is in the L0 plugin // We should move this to somewhere common -ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) { +ur_result_t validateProcInputs(ur_api_version_t Version, void *pDdiTable) { if (nullptr == pDdiTable) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } // Pre 1.0 we enforce loader and adapter must have same version. // Post 1.0 only major version match should be required. - if (version != UR_API_VERSION_CURRENT) { + if (Version != UR_API_VERSION_CURRENT) { return UR_RESULT_ERROR_UNSUPPORTED_VERSION; } return UR_RESULT_SUCCESS; @@ -31,10 +31,10 @@ extern "C" { #endif UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( - ur_api_version_t version, ur_platform_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; + ur_api_version_t Version, ur_platform_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; } pDdiTable->pfnCreateWithNativeHandle = urPlatformCreateWithNativeHandle; pDdiTable->pfnGet = urPlatformGet; @@ -47,10 +47,10 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( } UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( - ur_api_version_t version, ur_context_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; + ur_api_version_t Version, ur_context_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; } pDdiTable->pfnCreate = urContextCreate; pDdiTable->pfnCreateWithNativeHandle = urContextCreateWithNativeHandle; @@ -63,10 +63,10 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( } UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( - ur_api_version_t version, ur_event_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; + ur_api_version_t Version, ur_event_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; } pDdiTable->pfnCreateWithNativeHandle = urEventCreateWithNativeHandle; pDdiTable->pfnGetInfo = urEventGetInfo; @@ -80,10 +80,10 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( } UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( - ur_api_version_t version, ur_program_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; + ur_api_version_t Version, ur_program_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; } pDdiTable->pfnBuild = urProgramBuild; pDdiTable->pfnCompile = urProgramCompile; @@ -103,10 +103,10 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( } UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( - ur_api_version_t version, ur_kernel_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; + ur_api_version_t Version, ur_kernel_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; } pDdiTable->pfnCreate = urKernelCreate; pDdiTable->pfnCreateWithNativeHandle = urKernelCreateWithNativeHandle; @@ -127,10 +127,10 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( } UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( - ur_api_version_t version, ur_sampler_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; + ur_api_version_t Version, ur_sampler_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; } pDdiTable->pfnCreate = urSamplerCreate; // pDdiTable->pfnCreateWithNativeHandle = urSamplerCreateWithNativeHandle; @@ -142,10 +142,10 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( } UR_DLLEXPORT ur_result_t UR_APICALL -urGetMemProcAddrTable(ur_api_version_t version, ur_mem_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; +urGetMemProcAddrTable(ur_api_version_t Version, ur_mem_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; } pDdiTable->pfnBufferCreate = urMemBufferCreate; pDdiTable->pfnBufferPartition = urMemBufferPartition; @@ -161,10 +161,10 @@ urGetMemProcAddrTable(ur_api_version_t version, ur_mem_dditable_t *pDdiTable) { } UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( - ur_api_version_t version, ur_enqueue_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; + ur_api_version_t Version, ur_enqueue_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; } pDdiTable->pfnDeviceGlobalVariableRead = urEnqueueDeviceGlobalVariableRead; pDdiTable->pfnDeviceGlobalVariableWrite = urEnqueueDeviceGlobalVariableWrite; @@ -193,10 +193,10 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( } UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( - ur_api_version_t version, ur_global_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; + ur_api_version_t Version, ur_global_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; } pDdiTable->pfnInit = urInit; pDdiTable->pfnTearDown = urTearDown; @@ -204,10 +204,10 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( } UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( - ur_api_version_t version, ur_queue_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; + ur_api_version_t Version, ur_queue_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; } pDdiTable->pfnCreate = urQueueCreate; pDdiTable->pfnCreateWithNativeHandle = urQueueCreateWithNativeHandle; @@ -221,15 +221,15 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( } UR_DLLEXPORT ur_result_t UR_APICALL -urGetUSMProcAddrTable(ur_api_version_t version, ur_usm_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; +urGetUSMProcAddrTable(ur_api_version_t Version, ur_usm_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; } - pDdiTable->pfnDeviceAlloc = urUSMDeviceAlloc; - pDdiTable->pfnFree = urUSMFree; - pDdiTable->pfnGetMemAllocInfo = urUSMGetMemAllocInfo; - pDdiTable->pfnHostAlloc = urUSMHostAlloc; + pDdiTable->pfnDeviceAlloc = urUSMDeviceAlloc; + pDdiTable->pfnFree = urUSMFree; + pDdiTable->pfnGetMemAllocInfo = urUSMGetMemAllocInfo; + pDdiTable->pfnHostAlloc = urUSMHostAlloc; // pDdiTable->pfnPoolCreate = nullptr; // pDdiTable->pfnPoolDestroy = nullptr; // pDdiTable->pfnPoolDestroy = nullptr; @@ -238,10 +238,10 @@ urGetUSMProcAddrTable(ur_api_version_t version, ur_usm_dditable_t *pDdiTable) { } UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( - ur_api_version_t version, ur_device_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; + ur_api_version_t Version, ur_device_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; } pDdiTable->pfnCreateWithNativeHandle = urDeviceCreateWithNativeHandle; pDdiTable->pfnGet = urDeviceGet; diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp index 309794477aabc..ca5f992d7f743 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp @@ -42,7 +42,7 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, cl_context CLContext = cl_adapter::cast(hContext); RetVal = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, - cl_ext::clHostMemAllocName, &FuncPtr); + cl_ext::HostMemAllocName, &FuncPtr); if (FuncPtr) { Ptr = FuncPtr(CLContext, Properties, size, Alignment, @@ -97,7 +97,7 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, cl_context CLContext = cl_adapter::cast(hContext); RetVal = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, - cl_ext::clDeviceMemAllocName, &FuncPtr); + cl_ext::DeviceMemAllocName, &FuncPtr); if (FuncPtr) { Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), @@ -163,7 +163,7 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, cl_context CLContext = cl_adapter::cast(hContext); RetVal = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, - cl_ext::clSharedMemAllocName, &FuncPtr); + cl_ext::SharedMemAllocName, &FuncPtr); if (FuncPtr) { Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), @@ -192,10 +192,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; RetVal = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, - cl_ext::clMemBlockingFreeName, &FuncPtr); + cl_ext::MemBlockingFreeName, &FuncPtr); if (FuncPtr) { - RetVal = map_cl_error_to_ur(FuncPtr(CLContext, pMem)); + RetVal = mapCLErrorToUR(FuncPtr(CLContext, pMem)); } return RetVal; @@ -212,16 +212,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, sizeof(cl_context), &CLContext, nullptr); if (CLErr != CL_SUCCESS) { - return map_cl_error_to_ur(CLErr); + return mapCLErrorToUR(CLErr); } clEnqueueMemFillINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemFillINTELCache, - cl_ext::clEnqueueMemFillName, &FuncPtr); + cl_ext::EnqueueMemFillName, &FuncPtr); if (FuncPtr) { - RetVal = map_cl_error_to_ur( + RetVal = mapCLErrorToUR( FuncPtr(cl_adapter::cast(hQueue), ptr, pPattern, patternSize, size, numEventsInWaitList, cl_adapter::cast(phEventWaitList), @@ -246,16 +246,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, sizeof(cl_context), &CLContext, nullptr); if (CLErr != CL_SUCCESS) { - return map_cl_error_to_ur(CLErr); + return mapCLErrorToUR(CLErr); } clEnqueueMemcpyINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, - cl_ext::clEnqueueMemcpyName, &FuncPtr); + cl_ext::EnqueueMemcpyName, &FuncPtr); if (FuncPtr) { - RetVal = map_cl_error_to_ur( + RetVal = mapCLErrorToUR( FuncPtr(cl_adapter::cast(hQueue), blocking, pDst, pSrc, size, numEventsInWaitList, cl_adapter::cast(phEventWaitList), @@ -279,7 +279,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( if (flags != 0) return UR_RESULT_ERROR_INVALID_VALUE; - return map_cl_error_to_ur(clEnqueueMarkerWithWaitList( + return mapCLErrorToUR(clEnqueueMarkerWithWaitList( cl_adapter::cast(hQueue), numEventsInWaitList, cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent))); @@ -323,7 +323,7 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(pMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); - return map_cl_error_to_ur(clEnqueueMarkerWithWaitList( + return mapCLErrorToUR(clEnqueueMarkerWithWaitList( cl_adapter::cast(hQueue), 0, nullptr, reinterpret_cast(phEvent))); @@ -404,7 +404,7 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, cl_context CLContext = cl_adapter::cast(hContext); ur_result_t RetVal = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clGetMemAllocInfoINTELCache, - cl_ext::clGetMemAllocInfoName, &FuncPtr); + cl_ext::GetMemAllocInfoName, &FuncPtr); cl_mem_info_intel PropNameCL; switch (propName) { @@ -425,9 +425,9 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, } if (FuncPtr) { - RetVal = map_cl_error_to_ur(FuncPtr(cl_adapter::cast(hContext), - pMem, PropNameCL, propSize, pPropValue, - pPropSizeRet)); + RetVal = + mapCLErrorToUR(FuncPtr(cl_adapter::cast(hContext), pMem, + PropNameCL, propSize, pPropValue, pPropSizeRet)); if (RetVal == UR_RESULT_SUCCESS && pPropValue && propName == UR_USM_ALLOC_INFO_TYPE) { auto *AllocTypeCL = From 59857f168e19118806fb230030c3a16509742c6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Mon, 17 Jul 2023 14:58:13 +0000 Subject: [PATCH 20/36] [SYCL][OpenCL] Cleanup code and address common review feedback --- sycl/plugins/unified_runtime/CMakeLists.txt | 16 +- sycl/plugins/unified_runtime/pi2ur.hpp | 1 - .../ur/adapters/opencl/command_buffer.cpp | 243 +++++++----------- .../ur/adapters/opencl/common.cpp | 4 +- .../ur/adapters/opencl/common.hpp | 40 +-- .../ur/adapters/opencl/context.cpp | 7 + .../ur/adapters/opencl/event.cpp | 19 +- .../ur/adapters/opencl/kernel.cpp | 11 +- .../ur/adapters/opencl/memory.cpp | 2 +- .../ur/adapters/opencl/platform.cpp | 10 +- .../ur/adapters/opencl/program.cpp | 59 +++-- .../ur/adapters/opencl/queue.cpp | 2 +- .../ur/adapters/opencl/sampler.cpp | 14 +- .../adapters/opencl/ur_interface_loader.cpp | 40 ++- .../ur/adapters/opencl/usm.cpp | 49 ++-- 15 files changed, 233 insertions(+), 284 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 7823501547ea0..bb58e4a288736 100755 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -256,6 +256,8 @@ if ("opencl" IN_LIST SYCL_ENABLE_PLUGINS) SOURCES "ur/ur.hpp" "ur/ur.cpp" + "ur/adapters/opencl/command_buffer.hpp" + "ur/adapters/opencl/command_buffer.cpp" "ur/adapters/opencl/common.cpp" "ur/adapters/opencl/common.hpp" "ur/adapters/opencl/context.cpp" @@ -263,18 +265,17 @@ if ("opencl" IN_LIST SYCL_ENABLE_PLUGINS) "ur/adapters/opencl/device.cpp" "ur/adapters/opencl/device.hpp" "ur/adapters/opencl/enqueue.cpp" + "ur/adapters/opencl/event.cpp" "ur/adapters/opencl/kernel.cpp" + "ur/adapters/opencl/memory.cpp" "ur/adapters/opencl/platform.cpp" "ur/adapters/opencl/platform.hpp" "ur/adapters/opencl/program.cpp" - "ur/adapters/opencl/sampler.cpp" - "ur/adapters/opencl/memory.cpp" - "ur/adapters/opencl/event.cpp" "ur/adapters/opencl/queue.cpp" + "ur/adapters/opencl/sampler.cpp" "ur/adapters/opencl/ur_interface_loader.cpp" "ur/adapters/opencl/usm.cpp" - "ur/adapters/opencl/command_buffer.hpp" - "ur/adapters/opencl/command_buffer.cpp" + INCLUDE_DIRS ${sycl_inc_dir} LIBRARIES @@ -282,6 +283,11 @@ if ("opencl" IN_LIST SYCL_ENABLE_PLUGINS) Threads::Threads OpenCL-ICD ) + + set_target_properties("ur_adapter_opencl" PROPERTIES + VERSION "0.0.0" + SOVERSION "0" + ) endif() if (TARGET UnifiedRuntimeLoader) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index c64237466ed49..635f609c3b1e7 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -2351,7 +2351,6 @@ inline pi_result piProgramRelease(pi_program Program) { inline pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, size_t, const void *ArgValue) { ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - HANDLE_ERRORS(urKernelSetArgPointer(UrKernel, ArgIndex, nullptr, ArgValue)); return PI_SUCCESS; diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp index 5b09b47bb63a0..6a942823fcc05 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp @@ -12,39 +12,34 @@ /// Stub implementations of UR experimental feature command-buffers UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_exp_command_buffer_desc_t *pCommandBufferDesc, - ur_exp_command_buffer_handle_t *phCommandBuffer) { - (void)hContext; - (void)hDevice; - (void)pCommandBufferDesc; - (void)phCommandBuffer; + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] const ur_exp_command_buffer_desc_t *pCommandBufferDesc, + [[maybe_unused]] ur_exp_command_buffer_handle_t *phCommandBuffer) { + cl_adapter::die("Experimental Command-buffer feature is not " "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - (void)hCommandBuffer; +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) { cl_adapter::die("Experimental Command-buffer feature is not " "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - (void)hCommandBuffer; +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) { cl_adapter::die("Experimental Command-buffer feature is not " "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - (void)hCommandBuffer; +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) { cl_adapter::die("Experimental Command-buffer feature is not " "implemented for OpenCL adapter."); @@ -52,21 +47,16 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, - uint32_t workDim, const size_t *pGlobalWorkOffset, - const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, - uint32_t numSyncPointsInWaitList, - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { - (void)hCommandBuffer; - (void)hKernel; - (void)workDim; - (void)pGlobalWorkOffset; - (void)pGlobalWorkSize; - (void)pLocalWorkSize; - (void)numSyncPointsInWaitList; - (void)pSyncPointWaitList; - (void)pSyncPoint; + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_kernel_handle_t hKernel, + [[maybe_unused]] uint32_t workDim, + [[maybe_unused]] const size_t *pGlobalWorkOffset, + [[maybe_unused]] const size_t *pGlobalWorkSize, + [[maybe_unused]] const size_t *pLocalWorkSize, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { cl_adapter::die("Experimental Command-buffer feature is not " "implemented for OpenCL adapter."); @@ -74,17 +64,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( - ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc, - size_t size, uint32_t numSyncPointsInWaitList, - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { - (void)hCommandBuffer; - (void)pDst; - (void)pSrc; - (void)size; - (void)numSyncPointsInWaitList; - (void)pSyncPointWaitList; - (void)pSyncPoint; + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] void *pDst, [[maybe_unused]] const void *pSrc, + [[maybe_unused]] size_t size, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { cl_adapter::die("Experimental Command-buffer feature is not " "implemented for OpenCL adapter."); @@ -92,20 +78,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, - ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, - uint32_t numSyncPointsInWaitList, - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { - (void)hCommandBuffer; - (void)hSrcMem; - (void)hDstMem; - (void)srcOffset; - (void)dstOffset; - (void)size; - (void)numSyncPointsInWaitList; - (void)pSyncPointWaitList; - (void)pSyncPoint; + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_mem_handle_t hSrcMem, + [[maybe_unused]] ur_mem_handle_t hDstMem, [[maybe_unused]] size_t srcOffset, + [[maybe_unused]] size_t dstOffset, [[maybe_unused]] size_t size, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { cl_adapter::die("Experimental Command-buffer feature is not " "implemented for OpenCL adapter."); @@ -113,26 +93,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, - ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin, - ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, - size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, - uint32_t numSyncPointsInWaitList, - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { - (void)hCommandBuffer; - (void)hSrcMem; - (void)hDstMem; - (void)srcOrigin; - (void)dstOrigin; - (void)region; - (void)srcRowPitch; - (void)srcSlicePitch; - (void)dstRowPitch; - (void)dstSlicePitch; - (void)numSyncPointsInWaitList; - (void)pSyncPointWaitList; - (void)pSyncPoint; + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_mem_handle_t hSrcMem, + [[maybe_unused]] ur_mem_handle_t hDstMem, + [[maybe_unused]] ur_rect_offset_t srcOrigin, + [[maybe_unused]] ur_rect_offset_t dstOrigin, + [[maybe_unused]] ur_rect_region_t region, + [[maybe_unused]] size_t srcRowPitch, [[maybe_unused]] size_t srcSlicePitch, + [[maybe_unused]] size_t dstRowPitch, [[maybe_unused]] size_t dstSlicePitch, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { cl_adapter::die("Experimental Command-buffer feature is not " "implemented for OpenCL adapter."); @@ -141,19 +113,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, - size_t offset, size_t size, const void *pSrc, - uint32_t numSyncPointsInWaitList, - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { - (void)hCommandBuffer; - (void)hBuffer; - (void)offset; - (void)size; - (void)pSrc; - (void)numSyncPointsInWaitList; - (void)pSyncPointWaitList; - (void)pSyncPoint; + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] size_t offset, + [[maybe_unused]] size_t size, [[maybe_unused]] const void *pSrc, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { cl_adapter::die("Experimental Command-buffer feature is not " "implemented for OpenCL adapter."); @@ -162,18 +128,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, - size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList, - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { - (void)hCommandBuffer; - (void)hBuffer; - (void)offset; - (void)size; - (void)pDst; - (void)numSyncPointsInWaitList; - (void)pSyncPointWaitList; - (void)pSyncPoint; + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] size_t offset, + [[maybe_unused]] size_t size, [[maybe_unused]] void *pDst, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { cl_adapter::die("Experimental Command-buffer feature is not " "implemented for OpenCL adapter."); @@ -182,26 +143,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, - ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, - ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, - size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, - uint32_t numSyncPointsInWaitList, - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { - (void)hCommandBuffer; - (void)hBuffer; - (void)bufferOffset; - (void)hostOffset; - (void)region; - (void)bufferRowPitch; - (void)bufferSlicePitch; - (void)hostRowPitch; - (void)hostSlicePitch; - (void)pSrc; - (void)numSyncPointsInWaitList; - (void)pSyncPointWaitList; - (void)pSyncPoint; + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_mem_handle_t hBuffer, + [[maybe_unused]] ur_rect_offset_t bufferOffset, + [[maybe_unused]] ur_rect_offset_t hostOffset, + [[maybe_unused]] ur_rect_region_t region, + [[maybe_unused]] size_t bufferRowPitch, + [[maybe_unused]] size_t bufferSlicePitch, + [[maybe_unused]] size_t hostRowPitch, + [[maybe_unused]] size_t hostSlicePitch, [[maybe_unused]] void *pSrc, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { cl_adapter::die("Experimental Command-buffer feature is not " "implemented for OpenCL adapter."); @@ -210,27 +164,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, - ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, - ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, - size_t hostRowPitch, size_t hostSlicePitch, void *pDst, - uint32_t numSyncPointsInWaitList, - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { - (void)hCommandBuffer; - (void)hBuffer; - (void)bufferOffset; - (void)hostOffset; - (void)region; - (void)bufferRowPitch; - (void)bufferSlicePitch; - (void)hostRowPitch; - (void)hostSlicePitch; - (void)pDst; - - (void)numSyncPointsInWaitList; - (void)pSyncPointWaitList; - (void)pSyncPoint; + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_mem_handle_t hBuffer, + [[maybe_unused]] ur_rect_offset_t bufferOffset, + [[maybe_unused]] ur_rect_offset_t hostOffset, + [[maybe_unused]] ur_rect_region_t region, + [[maybe_unused]] size_t bufferRowPitch, + [[maybe_unused]] size_t bufferSlicePitch, + [[maybe_unused]] size_t hostRowPitch, + [[maybe_unused]] size_t hostSlicePitch, [[maybe_unused]] void *pDst, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { cl_adapter::die("Experimental Command-buffer feature is not " "implemented for OpenCL adapter."); @@ -238,14 +184,11 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - (void)hCommandBuffer; - (void)hQueue; - (void)numEventsInWaitList; - (void)phEventWaitList; - (void)phEvent; + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_queue_handle_t hQueue, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { cl_adapter::die("Experimental Command-buffer feature is not " "implemented for OpenCL adapter."); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp index b9b7fc1a820d1..4d9623be53f4e 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp @@ -64,7 +64,7 @@ ur_result_t mapCLErrorToUR(cl_int Result) { } void cl_adapter::die(const char *Message) { - std::cerr << "ur_die: " << Message << std::endl; + std::cerr << "ur_die: " << Message << "\n"; std::terminate(); } @@ -79,5 +79,3 @@ ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle) { *NativeHandle = reinterpret_cast(URObj); return UR_RESULT_SUCCESS; } - -cl_ext::ExtFuncPtrCacheT *ExtFuncPtrCache; diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp index 35e346b7c5cd3..1ad5c7115a8b2 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp @@ -49,14 +49,14 @@ namespace oclv { class OpenCLVersion { protected: - unsigned int ocl_major; - unsigned int ocl_minor; + unsigned int OCLMajor; + unsigned int OCLMinor; public: - OpenCLVersion() : ocl_major(0), ocl_minor(0) {} + OpenCLVersion() : OCLMajor(0), OCLMinor(0) {} OpenCLVersion(unsigned int OclMajor, unsigned int OclMinor) - : ocl_major(OclMajor), ocl_minor(OclMinor) { + : OCLMajor(OclMajor), OCLMinor(OclMinor) { if (!isValid()) { OclMajor = OclMinor = 0; } @@ -64,7 +64,7 @@ class OpenCLVersion { OpenCLVersion(const char *Version) : OpenCLVersion(std::string(Version)) {} - OpenCLVersion(const std::string &Version) : ocl_major(0), ocl_minor(0) { + OpenCLVersion(const std::string &Version) : OCLMajor(0), OCLMinor(0) { /* The OpenCL specification defines the full version string as * 'OpenCL' for platforms and as @@ -75,26 +75,26 @@ class OpenCLVersion { std::smatch Match; if (std::regex_search(Version, Match, Rx) && (Match.size() == 3)) { - ocl_major = strtoul(Match[1].str().c_str(), nullptr, 10); - ocl_minor = strtoul(Match[2].str().c_str(), nullptr, 10); + OCLMajor = strtoul(Match[1].str().c_str(), nullptr, 10); + OCLMinor = strtoul(Match[2].str().c_str(), nullptr, 10); if (!isValid()) { - ocl_major = ocl_minor = 0; + OCLMajor = OCLMinor = 0; } } } bool operator==(const OpenCLVersion &V) const { - return ocl_major == V.ocl_major && ocl_minor == V.ocl_minor; + return OCLMajor == V.OCLMajor && OCLMinor == V.OCLMinor; } bool operator!=(const OpenCLVersion &V) const { return !(*this == V); } bool operator<(const OpenCLVersion &V) const { - if (ocl_major == V.ocl_major) - return ocl_minor < V.ocl_minor; + if (OCLMajor == V.OCLMajor) + return OCLMinor < V.OCLMinor; - return ocl_major < V.ocl_major; + return OCLMajor < V.OCLMajor; } bool operator>(const OpenCLVersion &V) const { return V < *this; } @@ -108,21 +108,21 @@ class OpenCLVersion { } bool isValid() const { - switch (ocl_major) { + switch (OCLMajor) { case 0: return false; case 1: case 2: - return ocl_minor <= 2; + return OCLMinor <= 2; case UINT_MAX: return false; default: - return ocl_minor != UINT_MAX; + return OCLMinor != UINT_MAX; } } - int getMajor() const { return ocl_major; } - int getMinor() const { return ocl_minor; } + unsigned int getMajor() const { return OCLMajor; } + unsigned int getMinor() const { return OCLMinor; } }; inline const OpenCLVersion V1_0(1, 0); @@ -232,7 +232,7 @@ template struct FuncPtrCache { // FIXME: There's currently no mechanism for cleaning up this cache, meaning // that it is invalidated whenever a context is destroyed. This could lead to -// reusing an invalid function pointer if another context happends to have the +// reusing an invalid function pointer if another context happens to have the // same native handle. struct ExtFuncPtrCacheT { FuncPtrCache clHostMemAllocINTELCache; @@ -305,8 +305,8 @@ static ur_result_t getExtFuncFromContext(cl_context Context, return UR_RESULT_ERROR_INVALID_CONTEXT; } - T FuncPtr = - (T)clGetExtensionFunctionAddressForPlatform(CurPlatform, FuncName); + T FuncPtr = reinterpret_cast( + clGetExtensionFunctionAddressForPlatform(CurPlatform, FuncName)); if (!FuncPtr) { // Cache that the extension is not available diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp index 8021f6cb45cfb..7b58f912543c3 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp @@ -139,3 +139,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( *phContext = reinterpret_cast(hNativeContext); return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_context_extended_deleter_t pfnDeleter, + [[maybe_unused]] void *pUserData) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp index 723c863ec8831..3f5a4e7fa7d3b 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp @@ -50,21 +50,6 @@ convertURProfilingInfoToCL(const ur_profiling_info_t PropName) { } } -cl_int convertURProfilingInfoToCL(const ur_execution_info_t ExecutionInfo) { - switch (ExecutionInfo) { - case UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE: - return CL_COMPLETE; - case UR_EXECUTION_INFO_EXECUTION_INFO_RUNNING: - return CL_RUNNING; - case UR_EXECUTION_INFO_EXECUTION_INFO_SUBMITTED: - return CL_SUBMITTED; - case UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED: - return CL_QUEUED; - default: - return -1; - } -} - UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( ur_native_handle_t hNativeEvent, ur_context_handle_t hContext, const ur_event_native_properties_t *pProperties, @@ -133,5 +118,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback(ur_event_handle_t hEvent, ur_execution_info_t execStatus, ur_event_callback_t pfnNotify, void *pUserData) { + std::ignore = hEvent; + std::ignore = execStatus; + std::ignore = pfnNotify; + std::ignore = pUserData; return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp index bc68f1d68bb41..77ed6055315e5 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp @@ -121,7 +121,7 @@ mapURKernelSubGroupInfoToCL(ur_kernel_sub_group_info_t URPropName) { UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, - ur_kernel_sub_group_info_t propName, size_t propSize, + ur_kernel_sub_group_info_t propName, size_t, void *pPropValue, size_t *pPropSizeRet) { UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -134,7 +134,7 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, if (propName == UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE) { // OpenCL needs an input value for PI_KERNEL_MAX_SUB_GROUP_SIZE so if no // value is given we use the max work item size of the device in the first - // dimention to avoid truncation of max sub-group size. + // dimension to avoid truncation of max sub-group size. uint32_t MaxDims = 0; ur_result_t URRet = urDeviceGetInfo(hDevice, UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS, @@ -345,10 +345,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, - const ur_kernel_arg_mem_obj_properties_t *pProperties, - ur_mem_handle_t hArgValue) { +UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( + ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_mem_obj_properties_t *, ur_mem_handle_t hArgValue) { UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); cl_int RetErr = clSetKernelArg( diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp index 3a0392da14de9..13be5e20956ef 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp @@ -314,7 +314,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( BufferCreateType = CL_BUFFER_CREATE_TYPE_REGION; break; default: - break; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } _cl_buffer_region BufferRegion; diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp index dcf63127067d8..590aa4d4e9e5e 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp @@ -149,9 +149,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urTearDown(void *pParams) { // Returns plugin specific backend option. // Current support is only for optimization options. // Return '-cl-opt-disable' for pFrontendOption = -O0 and '' for others. -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( - ur_platform_handle_t hPlatform, const char *pFrontendOption, - const char **ppPlatformOption) { +UR_APIEXPORT ur_result_t UR_APICALL +urPlatformGetBackendOption(ur_platform_handle_t, const char *pFrontendOption, + const char **ppPlatformOption) { using namespace std::literals; if (pFrontendOption == nullptr) return UR_RESULT_SUCCESS; @@ -177,9 +177,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( } UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetLastError( - ur_platform_handle_t hPlatform, const char **ppMessage, int32_t *pError) { - - std::ignore = hPlatform; + ur_platform_handle_t, const char **ppMessage, int32_t *pError) { *ppMessage = cl_adapter::ErrorMessage; *pError = cl_adapter::ErrorMessageCode; diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp index 0d7f76dc2212f..6e32230082e8a 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp @@ -358,6 +358,7 @@ static bool isInSeparatedString(const std::string &Str, char Delimiter, UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( ur_device_handle_t hDevice, ur_program_handle_t hProgram, const char *pFunctionName, void **ppFunctionPointer) { + cl_context CLContext = nullptr; CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), CL_PROGRAM_CONTEXT, sizeof(CLContext), @@ -370,45 +371,47 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( CLContext, cl_ext::ExtFuncPtrCache->clGetDeviceFunctionPointerCache, cl_ext::GetDeviceFunctionPointerName, &FuncT)); - // Check if kernel name exists, to prevent opencl runtime throwing exception - // with cpu runtime - // TODO: Use fallback search method if extension does not exist once CPU - // runtime no longer throws exceptions and prints messages when given - // unavailable functions. + if (!FuncT) { + return UR_RESULT_ERROR_INVALID_FUNCTION_NAME; + } + + // Check if the kernel name exists to prevent the OpenCL runtime from throwing + // an exception with the cpu runtime. + // TODO: Use fallback search method if the clGetDeviceFunctionPointerINTEL + // extension does not exist. Can only be done once the CPU runtime no longer + // throws exceptions. *ppFunctionPointer = 0; size_t Size; CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), PI_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &Size)); - std::string CLResult(Size, ' '); + std::string KernelNames(Size, ' '); CL_RETURN_ON_FAILURE(clGetProgramInfo( cl_adapter::cast(hProgram), PI_PROGRAM_INFO_KERNEL_NAMES, - CLResult.size(), &CLResult[0], nullptr)); + KernelNames.size(), &KernelNames[0], nullptr)); - // Get rid of the null terminator and search for kernel_name - // If function cannot be found return error code to indicate it - // exists - CLResult.pop_back(); - if (!isInSeparatedString(CLResult, ';', pFunctionName)) + // Get rid of the null terminator and search for the kernel name. If the + // function cannot be found, return an error code to indicate it exists. + KernelNames.pop_back(); + if (!isInSeparatedString(KernelNames, ';', pFunctionName)) { return UR_RESULT_ERROR_INVALID_KERNEL_NAME; + } - ur_result_t URResult = UR_RESULT_ERROR_INVALID_FUNCTION_NAME; - - // If clGetDeviceFunctionPointer is in list of extensions - if (FuncT) { - cl_int CLResult = - FuncT(cl_adapter::cast(hDevice), - cl_adapter::cast(hProgram), pFunctionName, - reinterpret_cast(ppFunctionPointer)); - // GPU runtime sometimes returns PI_ERROR_INVALID_ARG_VALUE if func address - // cannot be found even if kernel exits. As the kernel does exist return - // that the address is not available - if (CLResult == CL_INVALID_ARG_VALUE) { - *ppFunctionPointer = 0; - return UR_RESULT_ERROR_INVALID_FUNCTION_NAME; - } + const cl_int CLResult = + FuncT(cl_adapter::cast(hDevice), + cl_adapter::cast(hProgram), pFunctionName, + reinterpret_cast(ppFunctionPointer)); + // GPU runtime sometimes returns CL_INVALID_ARG_VALUE if the function address + // cannot be found but the kernel exists. As the kernel does exist, return + // that the function name is invalid. + if (CLResult == CL_INVALID_ARG_VALUE) { + *ppFunctionPointer = 0; + return UR_RESULT_ERROR_INVALID_FUNCTION_NAME; } - return URResult; + + CL_RETURN_ON_FAILURE(CLResult); + + return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp index c60180eccf234..decce6b448842 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp @@ -131,7 +131,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, } UR_APIEXPORT ur_result_t UR_APICALL -urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *pDesc, +urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *, ur_native_handle_t *phNativeQueue) { return getNativeHandle(hQueue, phNativeQueue); } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp index 159deead203e4..99397f6dbdeb2 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp @@ -80,7 +80,6 @@ ur_sampler_addressing_mode_t cl2URAddressingMode(cl_addressing_mode Mode) { #undef CASE default: - std::cout << Mode << std::endl; assert(0 && "Unhandled: cl_addressing_mode"); break; } @@ -103,8 +102,7 @@ ur_sampler_filter_mode_t cl2URFilterMode(cl_filter_mode Mode) { } } -void cl2URSamplerInfoValue(cl_sampler_info Info, size_t InfoSize, - void *InfoValue) { +void cl2URSamplerInfoValue(cl_sampler_info Info, void *InfoValue) { if (!InfoValue) { return; } @@ -168,7 +166,7 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, return Err; } // Convert OpenCL returns to UR - cl2URSamplerInfoValue(SamplerInfo, propSize, pPropValue); + cl2URSamplerInfoValue(SamplerInfo, pPropValue); return UR_RESULT_SUCCESS; } @@ -198,15 +196,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( - ur_native_handle_t hNativeSampler, ur_context_handle_t hContext, - const ur_sampler_native_properties_t *pProperties, - ur_sampler_handle_t *phSampler) { + ur_native_handle_t hNativeSampler, ur_context_handle_t, + const ur_sampler_native_properties_t *, ur_sampler_handle_t *phSampler) { UR_ASSERT(hNativeSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(phSampler, UR_RESULT_ERROR_INVALID_NULL_POINTER); - std::ignore = hContext; - std::ignore = pProperties; *phSampler = reinterpret_cast( cl_adapter::cast(hNativeSampler)); return UR_RESULT_SUCCESS; diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index ea0c83d8ffb31..ba334dd946140 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -58,7 +58,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( pDdiTable->pfnGetNativeHandle = urContextGetNativeHandle; pDdiTable->pfnRelease = urContextRelease; pDdiTable->pfnRetain = urContextRetain; - // pDdiTable->pfnSetExtendedDeleter = urContextSetExtendedDeleter; + pDdiTable->pfnSetExtendedDeleter = urContextSetExtendedDeleter; return UR_RESULT_SUCCESS; } @@ -133,7 +133,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( return Result; } pDdiTable->pfnCreate = urSamplerCreate; - // pDdiTable->pfnCreateWithNativeHandle = urSamplerCreateWithNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = urSamplerCreateWithNativeHandle; pDdiTable->pfnGetInfo = urSamplerGetInfo; pDdiTable->pfnGetNativeHandle = urSamplerGetNativeHandle; pDdiTable->pfnRelease = urSamplerRelease; @@ -230,10 +230,11 @@ urGetUSMProcAddrTable(ur_api_version_t Version, ur_usm_dditable_t *pDdiTable) { pDdiTable->pfnFree = urUSMFree; pDdiTable->pfnGetMemAllocInfo = urUSMGetMemAllocInfo; pDdiTable->pfnHostAlloc = urUSMHostAlloc; - // pDdiTable->pfnPoolCreate = nullptr; - // pDdiTable->pfnPoolDestroy = nullptr; - // pDdiTable->pfnPoolDestroy = nullptr; - // pDdiTable->pfnSharedAlloc = nullptr; + pDdiTable->pfnPoolCreate = nullptr; + pDdiTable->pfnPoolRetain = nullptr; + pDdiTable->pfnPoolRelease = nullptr; + pDdiTable->pfnPoolGetInfo = nullptr; + pDdiTable->pfnSharedAlloc = urUSMSharedAlloc; return UR_RESULT_SUCCESS; } @@ -255,6 +256,33 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( return UR_RESULT_SUCCESS; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( + ur_api_version_t version, ur_command_buffer_exp_dditable_t *pDdiTable) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + pDdiTable->pfnCreateExp = urCommandBufferCreateExp; + pDdiTable->pfnRetainExp = urCommandBufferRetainExp; + pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp; + pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; + pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; + pDdiTable->pfnAppendMemcpyUSMExp = urCommandBufferAppendMemcpyUSMExp; + pDdiTable->pfnAppendMembufferCopyExp = urCommandBufferAppendMembufferCopyExp; + pDdiTable->pfnAppendMembufferCopyRectExp = + urCommandBufferAppendMembufferCopyRectExp; + pDdiTable->pfnAppendMembufferReadExp = urCommandBufferAppendMembufferReadExp; + pDdiTable->pfnAppendMembufferReadRectExp = + urCommandBufferAppendMembufferReadRectExp; + pDdiTable->pfnAppendMembufferWriteExp = + urCommandBufferAppendMembufferWriteExp; + pDdiTable->pfnAppendMembufferWriteRectExp = + urCommandBufferAppendMembufferWriteRectExp; + pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + + return retVal; +} + #if defined(__cplusplus) } // extern "C" #endif diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp index ca5f992d7f743..c8ccc7acb2519 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp @@ -10,7 +10,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, - ur_usm_pool_handle_t pool, size_t size, void **ppMem) { + ur_usm_pool_handle_t, size_t size, void **ppMem) { UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); @@ -61,7 +61,7 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool, + const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -117,7 +117,7 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool, + const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -357,39 +357,24 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( - ur_queue_handle_t hQueue, void *pMem, size_t pitch, size_t patternSize, - const void *pPattern, size_t width, size_t height, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - std::ignore = hQueue; - std::ignore = pMem; - std::ignore = pitch; - std::ignore = patternSize; - std::ignore = pPattern; - std::ignore = width; - std::ignore = height; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; + [[maybe_unused]] ur_queue_handle_t hQueue, [[maybe_unused]] void *pMem, + [[maybe_unused]] size_t pitch, [[maybe_unused]] size_t patternSize, + [[maybe_unused]] const void *pPattern, [[maybe_unused]] size_t width, + [[maybe_unused]] size_t height, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { return UR_RESULT_ERROR_INVALID_OPERATION; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( - ur_queue_handle_t hQueue, bool blocking, void *pDst, size_t dstPitch, - const void *pSrc, size_t srcPitch, size_t width, size_t height, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - std::ignore = hQueue; - std::ignore = blocking; - std::ignore = pDst; - std::ignore = dstPitch; - std::ignore = pSrc; - std::ignore = srcPitch; - std::ignore = width; - std::ignore = height; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; + [[maybe_unused]] ur_queue_handle_t hQueue, [[maybe_unused]] bool blocking, + [[maybe_unused]] void *pDst, [[maybe_unused]] size_t dstPitch, + [[maybe_unused]] const void *pSrc, [[maybe_unused]] size_t srcPitch, + [[maybe_unused]] size_t width, [[maybe_unused]] size_t height, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { return UR_RESULT_ERROR_INVALID_OPERATION; } From c94338a4d4aa22188b869435c839351607afd2d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Wed, 19 Jul 2023 11:52:05 +0000 Subject: [PATCH 21/36] [SYCL][OpenCL] Fix issues after rebase and format cmakefile --- sycl/plugins/opencl/CMakeLists.txt | 1 + sycl/plugins/opencl/pi_opencl.cpp | 36 ++++--------- sycl/plugins/unified_runtime/CMakeLists.txt | 52 +++++++++---------- .../ur/adapters/opencl/queue.cpp | 7 +-- .../adapters/opencl/ur_interface_loader.cpp | 13 +++++ .../ur/adapters/opencl/usm_p2p.cpp | 39 ++++++++++++++ 6 files changed, 91 insertions(+), 57 deletions(-) create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/usm_p2p.cpp diff --git a/sycl/plugins/opencl/CMakeLists.txt b/sycl/plugins/opencl/CMakeLists.txt index 5e7bcdc1283df..9940b717f6596 100644 --- a/sycl/plugins/opencl/CMakeLists.txt +++ b/sycl/plugins/opencl/CMakeLists.txt @@ -37,6 +37,7 @@ add_sycl_plugin(opencl "../unified_runtime/ur/adapters/opencl/queue.cpp" "../unified_runtime/ur/adapters/opencl/command_buffer.hpp" "../unified_runtime/ur/adapters/opencl/command_buffer.cpp" + "../unified_runtime/ur/adapters/opencl/usm_p2p.cpp" # --- "${sycl_inc_dir}/sycl/detail/pi.h" "pi_opencl.cpp" diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index b2d2a45214a83..36d0111fd8466 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -16,30 +16,10 @@ #define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#include #include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// Want all the needed casts be explicit, do not define conversion operators. -template To cast(From value) { - // TODO: see if more sanity checks are possible. - static_assert(sizeof(From) == sizeof(To), "cast failed size check"); - return (To)(value); -} - extern "C" { pi_result piextUSMImport(const void *ptr, size_t size, pi_context context) { @@ -214,12 +194,14 @@ pi_result piPluginInit(pi_plugin *PluginInit) { pi2ur::piextCommandBufferMemBufferCopy) _PI_CL(piextCommandBufferMemBufferCopyRect, pi2ur::piextCommandBufferMemBufferCopyRect) - _PI_CL(piextCommandBufferMemBufferRead, piextCommandBufferMemBufferRead) + _PI_CL(piextCommandBufferMemBufferRead, + pi2ur::piextCommandBufferMemBufferRead) _PI_CL(piextCommandBufferMemBufferReadRect, - piextCommandBufferMemBufferReadRect) - _PI_CL(piextCommandBufferMemBufferWrite, piextCommandBufferMemBufferWrite) + pi2ur::piextCommandBufferMemBufferReadRect) + _PI_CL(piextCommandBufferMemBufferWrite, + pi2ur::piextCommandBufferMemBufferWrite) _PI_CL(piextCommandBufferMemBufferWriteRect, - piextCommandBufferMemBufferWriteRect) + pi2ur::piextCommandBufferMemBufferWriteRect) _PI_CL(piextEnqueueCommandBuffer, pi2ur::piextEnqueueCommandBuffer) // Kernel _PI_CL(piextKernelSetArgMemObj, pi2ur::piextKernelSetArgMemObj) @@ -228,6 +210,10 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piTearDown, pi2ur::piTearDown) _PI_CL(piGetDeviceAndHostTimer, pi2ur::piGetDeviceAndHostTimer) _PI_CL(piPluginGetBackendOption, pi2ur::piPluginGetBackendOption) + // Peer to Peer + _PI_CL(piextEnablePeerAccess, pi2ur::piextEnablePeerAccess) + _PI_CL(piextDisablePeerAccess, pi2ur::piextDisablePeerAccess) + _PI_CL(piextPeerAccessGetInfo, pi2ur::piextPeerAccessGetInfo) #undef _PI_CL diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index bb58e4a288736..8750afd5e1481 100755 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -254,34 +254,34 @@ if ("opencl" IN_LIST SYCL_ENABLE_PLUGINS) # Build OpenCL adapter add_sycl_library("ur_adapter_opencl" SHARED SOURCES - "ur/ur.hpp" - "ur/ur.cpp" - "ur/adapters/opencl/command_buffer.hpp" - "ur/adapters/opencl/command_buffer.cpp" - "ur/adapters/opencl/common.cpp" - "ur/adapters/opencl/common.hpp" - "ur/adapters/opencl/context.cpp" - "ur/adapters/opencl/context.hpp" - "ur/adapters/opencl/device.cpp" - "ur/adapters/opencl/device.hpp" - "ur/adapters/opencl/enqueue.cpp" - "ur/adapters/opencl/event.cpp" - "ur/adapters/opencl/kernel.cpp" - "ur/adapters/opencl/memory.cpp" - "ur/adapters/opencl/platform.cpp" - "ur/adapters/opencl/platform.hpp" - "ur/adapters/opencl/program.cpp" - "ur/adapters/opencl/queue.cpp" - "ur/adapters/opencl/sampler.cpp" - "ur/adapters/opencl/ur_interface_loader.cpp" - "ur/adapters/opencl/usm.cpp" - + "ur/ur.hpp" + "ur/ur.cpp" + "ur/adapters/opencl/command_buffer.hpp" + "ur/adapters/opencl/command_buffer.cpp" + "ur/adapters/opencl/common.cpp" + "ur/adapters/opencl/common.hpp" + "ur/adapters/opencl/context.cpp" + "ur/adapters/opencl/context.hpp" + "ur/adapters/opencl/device.cpp" + "ur/adapters/opencl/device.hpp" + "ur/adapters/opencl/enqueue.cpp" + "ur/adapters/opencl/event.cpp" + "ur/adapters/opencl/kernel.cpp" + "ur/adapters/opencl/memory.cpp" + "ur/adapters/opencl/platform.cpp" + "ur/adapters/opencl/platform.hpp" + "ur/adapters/opencl/program.cpp" + "ur/adapters/opencl/queue.cpp" + "ur/adapters/opencl/sampler.cpp" + "ur/adapters/opencl/ur_interface_loader.cpp" + "ur/adapters/opencl/usm.cpp" + "ur/adapters/opencl/usm_p2p.cpp" INCLUDE_DIRS - ${sycl_inc_dir} + ${sycl_inc_dir} LIBRARIES - UnifiedRuntime-Headers - Threads::Threads - OpenCL-ICD + UnifiedRuntime-Headers + Threads::Threads + OpenCL-ICD ) set_target_properties("ur_adapter_opencl" PROPERTIES diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp index decce6b448842..f99e0df2ce4dd 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp @@ -73,13 +73,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( cl_command_queue_properties CLProperties = convertURQueuePropertiesToCL(pProperties); - // Check that unexpected bits are not set. - assert(!(CLProperties & ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | - CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | - CL_QUEUE_ON_DEVICE_DEFAULT))); - // Properties supported by OpenCL backend. - cl_command_queue_properties SupportByOpenCL = + const cl_command_queue_properties SupportByOpenCL = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index ba334dd946140..803bd494b95e5 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -283,6 +283,19 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( return retVal; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( + ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + pDdiTable->pfnEnablePeerAccessExp = urUsmP2PEnablePeerAccessExp; + pDdiTable->pfnDisablePeerAccessExp = urUsmP2PDisablePeerAccessExp; + pDdiTable->pfnPeerAccessGetInfoExp = urUsmP2PPeerAccessGetInfoExp; + + return retVal; +} + #if defined(__cplusplus) } // extern "C" #endif diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/usm_p2p.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/usm_p2p.cpp new file mode 100644 index 0000000000000..e7dd9083342a0 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/usm_p2p.cpp @@ -0,0 +1,39 @@ +//===--------- usm_p2p.cpp - OpenCL Adapter-------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------===// + +#include "common.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL +urUsmP2PEnablePeerAccessExp([[maybe_unused]] ur_device_handle_t commandDevice, + [[maybe_unused]] ur_device_handle_t peerDevice) { + + cl_adapter::die( + "Experimental P2P feature is not implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urUsmP2PDisablePeerAccessExp([[maybe_unused]] ur_device_handle_t commandDevice, + [[maybe_unused]] ur_device_handle_t peerDevice) { + + cl_adapter::die( + "Experimental P2P feature is not implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( + [[maybe_unused]] ur_device_handle_t commandDevice, + [[maybe_unused]] ur_device_handle_t peerDevice, + [[maybe_unused]] ur_exp_peer_info_t propName, + [[maybe_unused]] size_t propSize, [[maybe_unused]] void *pPropValue, + [[maybe_unused]] size_t *pPropSizeRet) { + + cl_adapter::die( + "Experimental P2P feature is not implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} From d7669ce23e257854581361b7c153bde1c64fffa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Wed, 19 Jul 2023 17:43:54 +0100 Subject: [PATCH 22/36] [SYCL][OpenCL] Remove change to e2e test from the OpenCL port --- sycl/test-e2e/Basic/subdevice_pi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/Basic/subdevice_pi.cpp b/sycl/test-e2e/Basic/subdevice_pi.cpp index 127eb9b9fae06..bf63e1da8aa06 100644 --- a/sycl/test-e2e/Basic/subdevice_pi.cpp +++ b/sycl/test-e2e/Basic/subdevice_pi.cpp @@ -195,7 +195,7 @@ int main(int argc, const char **argv) { std::string test(argv[1]); std::string partition_type(argv[2]); - device dev(cpu_selector_v); + device dev(default_selector_v); std::vector host_mem(1024, 1); buffer buf(&host_mem[0], host_mem.size()); From 6dccfea6993b2a87762c22e52d895935fd1378fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Thu, 20 Jul 2023 11:54:12 +0000 Subject: [PATCH 23/36] [SYCL][OpenCL] Port USM import and export entrypoints --- sycl/plugins/opencl/pi_opencl.cpp | 17 ++--------------- .../ur/adapters/opencl/ur_interface_loader.cpp | 12 ++++++++++++ .../unified_runtime/ur/adapters/opencl/usm.cpp | 12 ++++++++++++ 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 36d0111fd8466..4b4d47f7bf167 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -22,19 +22,6 @@ extern "C" { -pi_result piextUSMImport(const void *ptr, size_t size, pi_context context) { - std::ignore = ptr; - std::ignore = size; - std::ignore = context; - return PI_SUCCESS; -} - -pi_result piextUSMRelease(const void *ptr, pi_context context) { - std::ignore = ptr; - std::ignore = context; - return PI_SUCCESS; -} - const char SupportedVersion[] = _PI_OPENCL_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { @@ -173,8 +160,8 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextUSMEnqueueMemset2D, pi2ur::piextUSMEnqueueMemset2D) _PI_CL(piextUSMEnqueueMemcpy2D, pi2ur::piextUSMEnqueueMemcpy2D) _PI_CL(piextUSMGetMemAllocInfo, pi2ur::piextUSMGetMemAllocInfo) - _PI_CL(piextUSMImport, piextUSMImport) - _PI_CL(piextUSMRelease, piextUSMRelease) + _PI_CL(piextUSMImport, pi2ur::piextUSMImport) + _PI_CL(piextUSMRelease, pi2ur::piextUSMRelease) // Device global variable _PI_CL(piextEnqueueDeviceGlobalVariableWrite, pi2ur::piextEnqueueDeviceGlobalVariableWrite) diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index 803bd494b95e5..5ff8e07dc2841 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -238,6 +238,18 @@ urGetUSMProcAddrTable(ur_api_version_t Version, ur_usm_dditable_t *pDdiTable) { return UR_RESULT_SUCCESS; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( + ur_api_version_t Version, ur_usm_exp_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + + pDdiTable->pfnImportExp = urUSMImportExp; + pDdiTable->pfnReleaseExp = urUSMReleaseExp; + return UR_RESULT_SUCCESS; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( ur_api_version_t Version, ur_device_dditable_t *pDdiTable) { auto Result = validateProcInputs(Version, pDdiTable); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp index c8ccc7acb2519..8a1b26bba9eac 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp @@ -440,3 +440,15 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, return RetVal; } + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMImportExp([[maybe_unused]] ur_context_handle_t Context, + [[maybe_unused]] void *HostPtr, [[maybe_unused]] size_t Size) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMReleaseExp([[maybe_unused]] ur_context_handle_t Context, + [[maybe_unused]] void *HostPtr) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} From da8e170401a4d8af5ffed8f464126c1d1cfd90f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Fri, 21 Jul 2023 11:16:06 +0000 Subject: [PATCH 24/36] [SYCL][OpenCL] Fix ABI break and add image entrypoints to the adapter --- sycl/plugins/opencl/CMakeLists.txt | 1 + sycl/plugins/opencl/pi_opencl.cpp | 1272 ++++++++++++++--- sycl/plugins/unified_runtime/CMakeLists.txt | 1 + .../ur/adapters/opencl/image.cpp | 174 +++ .../adapters/opencl/ur_interface_loader.cpp | 33 + sycl/test/abi/pi_opencl_symbol_check.dump | 72 + 6 files changed, 1384 insertions(+), 169 deletions(-) create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/image.cpp diff --git a/sycl/plugins/opencl/CMakeLists.txt b/sycl/plugins/opencl/CMakeLists.txt index 9940b717f6596..d292a84ba7683 100644 --- a/sycl/plugins/opencl/CMakeLists.txt +++ b/sycl/plugins/opencl/CMakeLists.txt @@ -26,6 +26,7 @@ add_sycl_plugin(opencl "../unified_runtime/ur/adapters/opencl/device.cpp" "../unified_runtime/ur/adapters/opencl/device.hpp" "../unified_runtime/ur/adapters/opencl/enqueue.cpp" + "../unified_runtime/ur/adapters/opencl/image.cpp" "../unified_runtime/ur/adapters/opencl/kernel.cpp" "../unified_runtime/ur/adapters/opencl/platform.cpp" "../unified_runtime/ur/adapters/opencl/platform.hpp" diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 4b4d47f7bf167..6cc6a325af923 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -24,6 +24,1105 @@ extern "C" { const char SupportedVersion[] = _PI_OPENCL_PLUGIN_VERSION_STRING; +pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, + pi_uint32 *NumPlatforms) { + return pi2ur::piPlatformsGet(NumEntries, Platforms, NumPlatforms); +} + +pi_result piPlatformGetInfo(pi_platform Platform, pi_platform_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piPlatformGetInfo(Platform, ParamName, ParamValueSize, + ParamValue, ParamValueSizeRet); +} + +pi_result piextPlatformGetNativeHandle(pi_platform Platform, + pi_native_handle *NativeHandle) { + return pi2ur::piextPlatformGetNativeHandle(Platform, NativeHandle); +} + +pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_platform *Platform) { + return pi2ur::piextPlatformCreateWithNativeHandle(NativeHandle, Platform); +} + +pi_result piPluginGetLastError(char **message) { + return pi2ur::piPluginGetLastError(message); +} + +pi_result piPluginGetBackendOption(pi_platform platform, + const char *frontend_option, + const char **backend_option) { + return pi2ur::piPluginGetBackendOption(platform, frontend_option, + backend_option); +} + +pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, + pi_uint32 NumEntries, pi_device *Devices, + pi_uint32 *NumDevices) { + return pi2ur::piDevicesGet(Platform, DeviceType, NumEntries, Devices, + NumDevices); +} + +pi_result piDeviceRetain(pi_device Device) { + return pi2ur::piDeviceRetain(Device); +} + +pi_result piDeviceRelease(pi_device Device) { + return pi2ur::piDeviceRelease(Device); +} + +pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piDeviceGetInfo(Device, ParamName, ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result piDevicePartition(pi_device Device, + const pi_device_partition_property *Properties, + pi_uint32 NumDevices, pi_device *OutDevices, + pi_uint32 *OutNumDevices) { + return pi2ur::piDevicePartition(Device, Properties, NumDevices, OutDevices, + OutNumDevices); +} + +pi_result piextDeviceSelectBinary(pi_device Device, pi_device_binary *Binaries, + pi_uint32 NumBinaries, + pi_uint32 *SelectedBinaryInd) { + return pi2ur::piextDeviceSelectBinary(Device, Binaries, NumBinaries, + SelectedBinaryInd); +} + +pi_result piextDeviceGetNativeHandle(pi_device Device, + pi_native_handle *NativeHandle) { + return pi2ur::piextDeviceGetNativeHandle(Device, NativeHandle); +} + +pi_result piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_platform Platform, + pi_device *Device) { + return pi2ur::piextDeviceCreateWithNativeHandle(NativeHandle, Platform, + Device); +} + +pi_result piContextCreate(const pi_context_properties *Properties, + pi_uint32 NumDevices, const pi_device *Devices, + void (*PFnNotify)(const char *ErrInfo, + const void *PrivateInfo, size_t CB, + void *UserData), + void *UserData, pi_context *RetContext) { + return pi2ur::piContextCreate(Properties, NumDevices, Devices, PFnNotify, + UserData, RetContext); +} + +pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piContextGetInfo(Context, ParamName, ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result piextContextSetExtendedDeleter(pi_context Context, + pi_context_extended_deleter Function, + void *UserData) { + return pi2ur::piextContextSetExtendedDeleter(Context, Function, UserData); +} + +pi_result piextContextGetNativeHandle(pi_context Context, + pi_native_handle *NativeHandle) { + return pi2ur::piextContextGetNativeHandle(Context, NativeHandle); +} + +pi_result piextContextCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_uint32 NumDevices, + const pi_device *Devices, + bool OwnNativeHandle, + pi_context *RetContext) { + return pi2ur::piextContextCreateWithNativeHandle( + NativeHandle, NumDevices, Devices, OwnNativeHandle, RetContext); +} + +pi_result piContextRetain(pi_context Context) { + return pi2ur::piContextRetain(Context); +} + +pi_result piContextRelease(pi_context Context) { + return pi2ur::piContextRelease(Context); +} + +pi_result piQueueCreate(pi_context Context, pi_device Device, + pi_queue_properties Flags, pi_queue *Queue) { + pi_queue_properties Properties[] = {PI_QUEUE_FLAGS, Flags, 0}; + return piextQueueCreate(Context, Device, Properties, Queue); +} + +pi_result piextQueueCreate(pi_context Context, pi_device Device, + pi_queue_properties *Properties, pi_queue *Queue) { + return pi2ur::piextQueueCreate(Context, Device, Properties, Queue); +} + +pi_result piQueueGetInfo(pi_queue Queue, pi_queue_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piQueueGetInfo(Queue, ParamName, ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result piQueueRetain(pi_queue Queue) { return pi2ur::piQueueRetain(Queue); } + +pi_result piQueueRelease(pi_queue Queue) { + return pi2ur::piQueueRelease(Queue); +} + +pi_result piQueueFinish(pi_queue Queue) { return pi2ur::piQueueFinish(Queue); } + +pi_result piQueueFlush(pi_queue Queue) { return pi2ur::piQueueFlush(Queue); } + +pi_result piextQueueGetNativeHandle(pi_queue Queue, + pi_native_handle *NativeHandle, + int32_t *NativeHandleDesc) { + return pi2ur::piextQueueGetNativeHandle(Queue, NativeHandle, + NativeHandleDesc); +} + +pi_result piextQueueCreateWithNativeHandle(pi_native_handle NativeHandle, + int32_t NativeHandleDesc, + pi_context Context, pi_device Device, + bool OwnNativeHandle, + pi_queue_properties *Properties, + pi_queue *Queue) { + return pi2ur::piextQueueCreateWithNativeHandle( + NativeHandle, NativeHandleDesc, Context, Device, OwnNativeHandle, + Properties, Queue); +} + +pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size, + void *HostPtr, pi_mem *RetMem, + const pi_mem_properties *properties) { + return pi2ur::piMemBufferCreate(Context, Flags, Size, HostPtr, RetMem, + properties); +} + +pi_result piMemGetInfo(pi_mem Mem, pi_mem_info ParamName, size_t ParamValueSize, + void *ParamValue, size_t *ParamValueSizeRet) { + return pi2ur::piMemGetInfo(Mem, ParamName, ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result piMemRetain(pi_mem Mem) { return pi2ur::piMemRetain(Mem); } + +pi_result piMemRelease(pi_mem Mem) { return pi2ur::piMemRelease(Mem); } + +pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, + const pi_image_format *ImageFormat, + const pi_image_desc *ImageDesc, void *HostPtr, + pi_mem *RetImage) { + return pi2ur::piMemImageCreate(Context, Flags, ImageFormat, ImageDesc, + HostPtr, RetImage); +} + +pi_result piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) { + return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle); +} + +pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_context Context, + bool ownNativeHandle, pi_mem *Mem) { + return pi2ur::piextMemCreateWithNativeHandle(NativeHandle, Context, + ownNativeHandle, Mem); +} + +pi_result piProgramCreate(pi_context Context, const void *ILBytes, + size_t Length, pi_program *Program) { + return pi2ur::piProgramCreate(Context, ILBytes, Length, Program); +} + +pi_result piProgramCreateWithBinary( + pi_context Context, pi_uint32 NumDevices, const pi_device *DeviceList, + const size_t *Lengths, const unsigned char **Binaries, + size_t NumMetadataEntries, const pi_device_binary_property *Metadata, + pi_int32 *BinaryStatus, pi_program *Program) { + return pi2ur::piProgramCreateWithBinary(Context, NumDevices, DeviceList, + Lengths, Binaries, NumMetadataEntries, + Metadata, BinaryStatus, Program); +} + +pi_result piextMemImageCreateWithNativeHandle( + pi_native_handle NativeHandle, pi_context Context, bool OwnNativeHandle, + const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, + pi_mem *Img) { + return pi2ur::piextMemImageCreateWithNativeHandle( + NativeHandle, Context, OwnNativeHandle, ImageFormat, ImageDesc, Img); +} + +pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piProgramGetInfo(Program, ParamName, ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result piProgramLink(pi_context Context, pi_uint32 NumDevices, + const pi_device *DeviceList, const char *Options, + pi_uint32 NumInputPrograms, + const pi_program *InputPrograms, + void (*PFnNotify)(pi_program Program, void *UserData), + void *UserData, pi_program *RetProgram) { + return pi2ur::piProgramLink(Context, NumDevices, DeviceList, Options, + NumInputPrograms, InputPrograms, PFnNotify, + UserData, RetProgram); +} + +pi_result piProgramCompile( + pi_program Program, pi_uint32 NumDevices, const pi_device *DeviceList, + const char *Options, pi_uint32 NumInputHeaders, + const pi_program *InputHeaders, const char **HeaderIncludeNames, + void (*PFnNotify)(pi_program Program, void *UserData), void *UserData) { + return pi2ur::piProgramCompile(Program, NumDevices, DeviceList, Options, + NumInputHeaders, InputHeaders, + HeaderIncludeNames, PFnNotify, UserData); +} + +pi_result piProgramBuild(pi_program Program, pi_uint32 NumDevices, + const pi_device *DeviceList, const char *Options, + void (*PFnNotify)(pi_program Program, void *UserData), + void *UserData) { + return pi2ur::piProgramBuild(Program, NumDevices, DeviceList, Options, + PFnNotify, UserData); +} + +pi_result piProgramGetBuildInfo(pi_program Program, pi_device Device, + pi_program_build_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piProgramGetBuildInfo(Program, Device, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result piProgramRetain(pi_program Program) { + return pi2ur::piProgramRetain(Program); +} + +pi_result piProgramRelease(pi_program Program) { + return pi2ur::piProgramRelease(Program); +} + +pi_result piextProgramGetNativeHandle(pi_program Program, + pi_native_handle *NativeHandle) { + return pi2ur::piextProgramGetNativeHandle(Program, NativeHandle); +} + +pi_result piextProgramCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_context Context, + bool OwnNativeHandle, + pi_program *Program) { + return pi2ur::piextProgramCreateWithNativeHandle(NativeHandle, Context, + OwnNativeHandle, Program); +} + +pi_result piKernelCreate(pi_program Program, const char *KernelName, + pi_kernel *RetKernel) { + return pi2ur::piKernelCreate(Program, KernelName, RetKernel); +} + +pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, size_t ArgSize, + const void *ArgValue) { + return pi2ur::piKernelSetArg(Kernel, ArgIndex, ArgSize, ArgValue); +} + +pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex, + const pi_mem_obj_property *ArgProperties, + const pi_mem *ArgValue) { + return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgProperties, + ArgValue); +} + +pi_result piextKernelSetArgSampler(pi_kernel Kernel, pi_uint32 ArgIndex, + const pi_sampler *ArgValue) { + return pi2ur::piextKernelSetArgSampler(Kernel, ArgIndex, ArgValue); +} + +pi_result piKernelGetInfo(pi_kernel Kernel, pi_kernel_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piKernelGetInfo(Kernel, ParamName, ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result piextMemImageAllocate(pi_context Context, pi_device Device, + pi_image_format *ImageFormat, + pi_image_desc *ImageDesc, + pi_image_mem_handle *RetMem) { + return pi2ur::piextMemImageAllocate(Context, Device, ImageFormat, ImageDesc, + RetMem); +} + +pi_result piextMemUnsampledImageCreate(pi_context Context, pi_device Device, + pi_image_mem_handle ImgMem, + pi_image_format *ImageFormat, + pi_image_desc *ImageDesc, pi_mem *RetMem, + pi_image_handle *RetHandle) { + return pi2ur::piextMemUnsampledImageCreate( + Context, Device, ImgMem, ImageFormat, ImageDesc, RetMem, RetHandle); +} + +pi_result piextMemSampledImageCreate(pi_context Context, pi_device Device, + pi_image_mem_handle ImgMem, + pi_image_format *ImageFormat, + pi_image_desc *ImageDesc, + pi_sampler Sampler, pi_mem *RetMem, + pi_image_handle *RetHandle) { + return pi2ur::piextMemSampledImageCreate(Context, Device, ImgMem, ImageFormat, + ImageDesc, Sampler, RetMem, + RetHandle); +} + +pi_result piextBindlessImageSamplerCreate( + pi_context Context, const pi_sampler_properties *SamplerProperties, + float MinMipmapLevelClamp, float MaxMipmapLevelClamp, float MaxAnisotropy, + pi_sampler *RetSampler) { + return pi2ur::piextBindlessImageSamplerCreate( + Context, SamplerProperties, MinMipmapLevelClamp, MaxMipmapLevelClamp, + MaxAnisotropy, RetSampler); +} + +pi_result piextMemMipmapGetLevel(pi_context Context, pi_device Device, + pi_image_mem_handle MipMem, unsigned int Level, + pi_image_mem_handle *RetMem) { + return pi2ur::piextMemMipmapGetLevel(Context, Device, MipMem, Level, RetMem); +} + +pi_result piextMemImageFree(pi_context Context, pi_device Device, + pi_image_mem_handle MemoryHandle) { + return pi2ur::piextMemImageFree(Context, Device, MemoryHandle); +} + +pi_result piextMemMipmapFree(pi_context Context, pi_device Device, + pi_image_mem_handle MemoryHandle) { + return pi2ur::piextMemMipmapFree(Context, Device, MemoryHandle); +} + +pi_result +piextMemImageCopy(pi_queue Queue, void *DstPtr, void *SrcPtr, + const pi_image_format *ImageFormat, + const pi_image_desc *ImageDesc, + const pi_image_copy_flags Flags, pi_image_offset SrcOffset, + pi_image_offset DstOffset, pi_image_region CopyExtent, + pi_image_region HostExtent, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + return pi2ur::piextMemImageCopy(Queue, DstPtr, SrcPtr, ImageFormat, ImageDesc, + Flags, SrcOffset, DstOffset, CopyExtent, + HostExtent, NumEventsInWaitList, + EventWaitList, Event); +} + +pi_result piextMemUnsampledImageHandleDestroy(pi_context Context, + pi_device Device, + pi_image_handle Handle) { + return pi2ur::piextMemUnsampledImageHandleDestroy(Context, Device, Handle); +} + +pi_result piextMemSampledImageHandleDestroy(pi_context Context, + pi_device Device, + pi_image_handle Handle) { + return pi2ur::piextMemSampledImageHandleDestroy(Context, Device, Handle); +} + +pi_result piextMemImageGetInfo(pi_image_mem_handle MemHandle, + pi_image_info ParamName, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextMemImageGetInfo(MemHandle, ParamName, ParamValue, + ParamValueSizeRet); +} + +pi_result piextMemImportOpaqueFD(pi_context Context, pi_device Device, + size_t Size, int FileDescriptor, + pi_interop_mem_handle *RetHandle) { + return pi2ur::piextMemImportOpaqueFD(Context, Device, Size, FileDescriptor, + RetHandle); +} + +pi_result piextMemMapExternalArray(pi_context Context, pi_device Device, + pi_image_format *ImageFormat, + pi_image_desc *ImageDesc, + pi_interop_mem_handle MemHandle, + pi_image_mem_handle *RetMem) { + return pi2ur::piextMemMapExternalArray(Context, Device, ImageFormat, + ImageDesc, MemHandle, RetMem); +} + +pi_result piextMemReleaseInterop(pi_context Context, pi_device Device, + pi_interop_mem_handle ExtMem) { + return pi2ur::piextMemReleaseInterop(Context, Device, ExtMem); +} + +pi_result +piextImportExternalSemaphoreOpaqueFD(pi_context Context, pi_device Device, + int FileDescriptor, + pi_interop_semaphore_handle *RetHandle) { + return pi2ur::piextImportExternalSemaphoreOpaqueFD(Context, Device, + FileDescriptor, RetHandle); +} + +pi_result piextDestroyExternalSemaphore(pi_context Context, pi_device Device, + pi_interop_semaphore_handle SemHandle) { + return pi2ur::piextDestroyExternalSemaphore(Context, Device, SemHandle); +} + +pi_result piextWaitExternalSemaphore(pi_queue Queue, + pi_interop_semaphore_handle SemHandle, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piextWaitExternalSemaphore( + Queue, SemHandle, NumEventsInWaitList, EventWaitList, Event); +} + +pi_result piextSignalExternalSemaphore(pi_queue Queue, + pi_interop_semaphore_handle SemHandle, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piextSignalExternalSemaphore( + Queue, SemHandle, NumEventsInWaitList, EventWaitList, Event); +} + +pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device, + pi_kernel_group_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piKernelGetGroupInfo(Kernel, Device, ParamName, ParamValueSize, + ParamValue, ParamValueSizeRet); +} + +pi_result piKernelGetSubGroupInfo(pi_kernel Kernel, pi_device Device, + pi_kernel_sub_group_info ParamName, + size_t InputValueSize, const void *InputValue, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piKernelGetSubGroupInfo( + Kernel, Device, ParamName, InputValueSize, InputValue, ParamValueSize, + ParamValue, ParamValueSizeRet); +} + +pi_result piKernelRetain(pi_kernel Kernel) { + return pi2ur::piKernelRetain(Kernel); +} + +pi_result piKernelRelease(pi_kernel Kernel) { + return pi2ur::piKernelRelease(Kernel); +} + +pi_result +piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, + const size_t *GlobalWorkOffset, + const size_t *GlobalWorkSize, const size_t *LocalWorkSize, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *OutEvent) { + return pi2ur::piEnqueueKernelLaunch( + Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize, + NumEventsInWaitList, EventWaitList, OutEvent); +} + +pi_result piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_context Context, + pi_program Program, + bool OwnNativeHandle, + pi_kernel *Kernel) { + return pi2ur::piextKernelCreateWithNativeHandle( + NativeHandle, Context, Program, OwnNativeHandle, Kernel); +} + +pi_result piextKernelGetNativeHandle(pi_kernel Kernel, + pi_native_handle *NativeHandle) { + return pi2ur::piextKernelGetNativeHandle(Kernel, NativeHandle); +} + +pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { + return pi2ur::piEventCreate(Context, RetEvent); +} + +pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piEventGetInfo(Event, ParamName, ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piEventGetProfilingInfo(Event, ParamName, ParamValueSize, + ParamValue, ParamValueSizeRet); +} + +pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) { + return pi2ur::piEventsWait(NumEvents, EventList); +} + +pi_result piEventSetCallback(pi_event Event, pi_int32 CommandExecCallbackType, + void (*PFnNotify)(pi_event Event, + pi_int32 EventCommandStatus, + void *UserData), + void *UserData) { + return pi2ur::piEventSetCallback(Event, CommandExecCallbackType, PFnNotify, + UserData); +} + +pi_result piEventSetStatus(pi_event Event, pi_int32 ExecutionStatus) { + return pi2ur::piEventSetStatus(Event, ExecutionStatus); +} + +pi_result piEventRetain(pi_event Event) { return pi2ur::piEventRetain(Event); } + +pi_result piEventRelease(pi_event Event) { + return pi2ur::piEventRelease(Event); +} + +pi_result piextEventGetNativeHandle(pi_event Event, + pi_native_handle *NativeHandle) { + return pi2ur::piextEventGetNativeHandle(Event, NativeHandle); +} + +pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle, + pi_context Context, + bool OwnNativeHandle, + pi_event *Event) { + return pi2ur::piextEventCreateWithNativeHandle(NativeHandle, Context, + OwnNativeHandle, Event); +} + +pi_result piSamplerCreate(pi_context Context, + const pi_sampler_properties *SamplerProperties, + pi_sampler *RetSampler) { + return pi2ur::piSamplerCreate(Context, SamplerProperties, RetSampler); +} + +pi_result piSamplerGetInfo(pi_sampler Sampler, pi_sampler_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piSamplerGetInfo(Sampler, ParamName, ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result piSamplerRetain(pi_sampler Sampler) { + return pi2ur::piSamplerRetain(Sampler); +} + +pi_result piSamplerRelease(pi_sampler Sampler) { + return pi2ur::piSamplerRelease(Sampler); +} + +pi_result piEnqueueEventsWait(pi_queue Queue, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *OutEvent) { + return pi2ur::piEnqueueEventsWait(Queue, NumEventsInWaitList, EventWaitList, + OutEvent); +} + +pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *OutEvent) { + return pi2ur::piEnqueueEventsWaitWithBarrier(Queue, NumEventsInWaitList, + EventWaitList, OutEvent); +} + +pi_result piEnqueueMemBufferRead(pi_queue Queue, pi_mem Src, + pi_bool BlockingRead, size_t Offset, + size_t Size, void *Dst, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piEnqueueMemBufferRead(Queue, Src, BlockingRead, Offset, Size, + Dst, NumEventsInWaitList, EventWaitList, + Event); +} + +pi_result piEnqueueMemBufferReadRect( + pi_queue Queue, pi_mem Buffer, pi_bool BlockingRead, + pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, + pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, + size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, + pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piEnqueueMemBufferReadRect( + Queue, Buffer, BlockingRead, BufferOffset, HostOffset, Region, + BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, + NumEventsInWaitList, EventWaitList, Event); +} + +pi_result piEnqueueMemBufferWrite(pi_queue Queue, pi_mem Buffer, + pi_bool BlockingWrite, size_t Offset, + size_t Size, const void *Ptr, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piEnqueueMemBufferWrite(Queue, Buffer, BlockingWrite, Offset, + Size, Ptr, NumEventsInWaitList, + EventWaitList, Event); +} + +pi_result piEnqueueMemBufferWriteRect( + pi_queue Queue, pi_mem Buffer, pi_bool BlockingWrite, + pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, + pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, + size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, + pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piEnqueueMemBufferWriteRect( + Queue, Buffer, BlockingWrite, BufferOffset, HostOffset, Region, + BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, + NumEventsInWaitList, EventWaitList, Event); +} + +pi_result piEnqueueMemBufferCopy(pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, + size_t SrcOffset, size_t DstOffset, + size_t Size, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piEnqueueMemBufferCopy(Queue, SrcMem, DstMem, SrcOffset, + DstOffset, Size, NumEventsInWaitList, + EventWaitList, Event); +} + +pi_result piEnqueueMemBufferCopyRect( + pi_queue Queue, pi_mem SrcMem, pi_mem DstMem, pi_buff_rect_offset SrcOrigin, + pi_buff_rect_offset DstOrigin, pi_buff_rect_region Region, + size_t SrcRowPitch, size_t SrcSlicePitch, size_t DstRowPitch, + size_t DstSlicePitch, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + + return pi2ur::piEnqueueMemBufferCopyRect( + Queue, SrcMem, DstMem, SrcOrigin, DstOrigin, Region, SrcRowPitch, + SrcSlicePitch, DstRowPitch, DstSlicePitch, NumEventsInWaitList, + EventWaitList, Event); +} + +pi_result piEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, + const void *Pattern, size_t PatternSize, + size_t Offset, size_t Size, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piEnqueueMemBufferFill(Queue, Buffer, Pattern, PatternSize, + Offset, Size, NumEventsInWaitList, + EventWaitList, Event); +} + +pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Mem, pi_bool BlockingMap, + pi_map_flags MapFlags, size_t Offset, + size_t Size, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *OutEvent, void **RetMap) { + return pi2ur::piEnqueueMemBufferMap(Queue, Mem, BlockingMap, MapFlags, Offset, + Size, NumEventsInWaitList, EventWaitList, + OutEvent, RetMap); +} + +pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem Mem, void *MappedPtr, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *OutEvent) { + return pi2ur::piEnqueueMemUnmap(Queue, Mem, MappedPtr, NumEventsInWaitList, + EventWaitList, OutEvent); +} + +pi_result piMemImageGetInfo(pi_mem Image, pi_image_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piMemImageGetInfo(Image, ParamName, ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result piEnqueueMemImageRead(pi_queue Queue, pi_mem Image, + pi_bool BlockingRead, pi_image_offset Origin, + pi_image_region Region, size_t RowPitch, + size_t SlicePitch, void *Ptr, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piEnqueueMemImageRead( + Queue, Image, BlockingRead, Origin, Region, RowPitch, SlicePitch, Ptr, + NumEventsInWaitList, EventWaitList, Event); +} + +pi_result piEnqueueMemImageWrite(pi_queue Queue, pi_mem Image, + pi_bool BlockingWrite, pi_image_offset Origin, + pi_image_region Region, size_t InputRowPitch, + size_t InputSlicePitch, const void *Ptr, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piEnqueueMemImageWrite( + Queue, Image, BlockingWrite, Origin, Region, InputRowPitch, + InputSlicePitch, Ptr, NumEventsInWaitList, EventWaitList, Event); +} + +pi_result +piEnqueueMemImageCopy(pi_queue Queue, pi_mem SrcImage, pi_mem DstImage, + pi_image_offset SrcOrigin, pi_image_offset DstOrigin, + pi_image_region Region, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + return pi2ur::piEnqueueMemImageCopy(Queue, SrcImage, DstImage, SrcOrigin, + DstOrigin, Region, NumEventsInWaitList, + EventWaitList, Event); +} + +pi_result piEnqueueMemImageFill(pi_queue Queue, pi_mem Image, + const void *FillColor, const size_t *Origin, + const size_t *Region, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piEnqueueMemImageFill(Queue, Image, FillColor, Origin, Region, + NumEventsInWaitList, EventWaitList, + Event); +} + +pi_result piMemBufferPartition(pi_mem Buffer, pi_mem_flags Flags, + pi_buffer_create_type BufferCreateType, + void *BufferCreateInfo, pi_mem *RetMem) { + return pi2ur::piMemBufferPartition(Buffer, Flags, BufferCreateType, + BufferCreateInfo, RetMem); +} + +pi_result piextGetDeviceFunctionPointer(pi_device Device, pi_program Program, + const char *FunctionName, + pi_uint64 *FunctionPointerRet) { + return pi2ur::piextGetDeviceFunctionPointer(Device, Program, FunctionName, + FunctionPointerRet); +} + +pi_result piextUSMDeviceAlloc(void **ResultPtr, pi_context Context, + pi_device Device, + pi_usm_mem_properties *Properties, size_t Size, + pi_uint32 Alignment) { + return pi2ur::piextUSMDeviceAlloc(ResultPtr, Context, Device, Properties, + Size, Alignment); +} + +pi_result piextUSMSharedAlloc(void **ResultPtr, pi_context Context, + pi_device Device, + pi_usm_mem_properties *Properties, size_t Size, + pi_uint32 Alignment) { + return pi2ur::piextUSMSharedAlloc(ResultPtr, Context, Device, Properties, + Size, Alignment); +} + +pi_result piextUSMPitchedAlloc(void **ResultPtr, size_t *ResultPitch, + pi_context Context, pi_device Device, + pi_usm_mem_properties *Properties, + size_t WidthInBytes, size_t Height, + unsigned int ElementSizeBytes) { + return pi2ur::piextUSMPitchedAlloc(ResultPtr, ResultPitch, Context, Device, + Properties, WidthInBytes, Height, + ElementSizeBytes); +} + +pi_result piextUSMHostAlloc(void **ResultPtr, pi_context Context, + pi_usm_mem_properties *Properties, size_t Size, + pi_uint32 Alignment) { + return pi2ur::piextUSMHostAlloc(ResultPtr, Context, Properties, Size, + Alignment); +} + +pi_result piextUSMFree(pi_context Context, void *Ptr) { + return pi2ur::piextUSMFree(Context, Ptr); +} + +pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, + size_t ArgSize, const void *ArgValue) { + return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); +} + +pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, + size_t Count, pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, + pi_event *Event) { + return pi2ur::piextUSMEnqueueMemset( + Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +} + +pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, + const void *SrcPtr, size_t Size, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, + pi_event *Event) { + return pi2ur::piextUSMEnqueueMemcpy(Queue, Blocking, DstPtr, SrcPtr, Size, + NumEventsInWaitlist, EventsWaitlist, + Event); +} + +pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size, + pi_usm_migration_flags Flags, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *OutEvent) { + return pi2ur::piextUSMEnqueuePrefetch( + Queue, Ptr, Size, Flags, NumEventsInWaitList, EventWaitList, OutEvent); +} + +pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, + size_t Length, pi_mem_advice Advice, + pi_event *OutEvent) { + + return pi2ur::piextUSMEnqueueMemAdvise(Queue, Ptr, Length, Advice, OutEvent); +} + +pi_result piextUSMEnqueueFill2D(pi_queue Queue, void *Ptr, size_t Pitch, + size_t PatternSize, const void *Pattern, + size_t Width, size_t Height, + pi_uint32 NumEventsWaitList, + const pi_event *EventsWaitList, + pi_event *Event) { + return pi2ur::piextUSMEnqueueFill2D(Queue, Ptr, Pitch, PatternSize, Pattern, + Width, Height, NumEventsWaitList, + EventsWaitList, Event); +} + +pi_result piextUSMEnqueueMemset2D(pi_queue Queue, void *Ptr, size_t Pitch, + int Value, size_t Width, size_t Height, + pi_uint32 NumEventsWaitList, + const pi_event *EventsWaitlist, + pi_event *Event) { + return pi2ur::piextUSMEnqueueMemset2D(Queue, Ptr, Pitch, Value, Width, Height, + NumEventsWaitList, EventsWaitlist, + Event); +} + +pi_result piextUSMEnqueueMemcpy2D(pi_queue Queue, pi_bool Blocking, + void *DstPtr, size_t DstPitch, + const void *SrcPtr, size_t SrcPitch, + size_t Width, size_t Height, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventWaitlist, + pi_event *Event) { + return pi2ur::piextUSMEnqueueMemcpy2D( + Queue, Blocking, DstPtr, DstPitch, SrcPtr, SrcPitch, Width, Height, + NumEventsInWaitlist, EventWaitlist, Event); +} + +pi_result piextUSMGetMemAllocInfo(pi_context Context, const void *Ptr, + pi_mem_alloc_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextUSMGetMemAllocInfo(Context, Ptr, ParamName, ParamValueSize, + ParamValue, ParamValueSizeRet); +} + +pi_result piextUSMImport(const void *HostPtr, size_t Size, pi_context Context) { + return pi2ur::piextUSMImport(HostPtr, Size, Context); +} + +pi_result piextUSMRelease(const void *HostPtr, pi_context Context) { + return pi2ur::piextUSMRelease(HostPtr, Context); +} + +pi_result piextEnqueueDeviceGlobalVariableWrite( + pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingWrite, + size_t Count, size_t Offset, const void *Src, pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, pi_event *Event) { + return pi2ur::piextEnqueueDeviceGlobalVariableWrite( + Queue, Program, Name, BlockingWrite, Count, Offset, Src, + NumEventsInWaitList, EventsWaitList, Event); +} + +pi_result piextEnqueueDeviceGlobalVariableRead( + pi_queue Queue, pi_program Program, const char *Name, pi_bool BlockingRead, + size_t Count, size_t Offset, void *Dst, pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, pi_event *Event) { + + return pi2ur::piextEnqueueDeviceGlobalVariableRead( + Queue, Program, Name, BlockingRead, Count, Offset, Dst, + NumEventsInWaitList, EventsWaitList, Event); + + return PI_SUCCESS; +} + +pi_result piextEnqueueReadHostPipe(pi_queue Queue, pi_program Program, + const char *PipeSymbol, pi_bool Blocking, + void *Ptr, size_t Size, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *Event) { + return pi2ur::piextEnqueueReadHostPipe(Queue, Program, PipeSymbol, Blocking, + Ptr, Size, NumEventsInWaitList, + EventsWaitList, Event); +} + +pi_result piextEnqueueWriteHostPipe(pi_queue Queue, pi_program Program, + const char *PipeSymbol, pi_bool Blocking, + void *Ptr, size_t Size, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *Event) { + return pi2ur::piextEnqueueWriteHostPipe(Queue, Program, PipeSymbol, Blocking, + Ptr, Size, NumEventsInWaitList, + EventsWaitList, Event); +} + +pi_result piKernelSetExecInfo(pi_kernel Kernel, pi_kernel_exec_info ParamName, + size_t ParamValueSize, const void *ParamValue) { + + return pi2ur::piKernelSetExecInfo(Kernel, ParamName, ParamValueSize, + ParamValue); +} + +pi_result piextProgramSetSpecializationConstant(pi_program Prog, + pi_uint32 SpecID, size_t Size, + const void *SpecValue) { + return pi2ur::piextProgramSetSpecializationConstant(Prog, SpecID, Size, + SpecValue); +} + +pi_result piextCommandBufferCreate(pi_context Context, pi_device Device, + const pi_ext_command_buffer_desc *Desc, + pi_ext_command_buffer *RetCommandBuffer) { + return pi2ur::piextCommandBufferCreate(Context, Device, Desc, + RetCommandBuffer); +} + +pi_result piextCommandBufferRetain(pi_ext_command_buffer CommandBuffer) { + return pi2ur::piextCommandBufferRetain(CommandBuffer); +} + +pi_result piextCommandBufferRelease(pi_ext_command_buffer CommandBuffer) { + return pi2ur::piextCommandBufferRelease(CommandBuffer); +} + +pi_result piextCommandBufferFinalize(pi_ext_command_buffer CommandBuffer) { + return pi2ur::piextCommandBufferFinalize(CommandBuffer); +} + +pi_result piextCommandBufferNDRangeKernel( + pi_ext_command_buffer CommandBuffer, pi_kernel Kernel, pi_uint32 WorkDim, + const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, + const size_t *LocalWorkSize, pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferNDRangeKernel( + CommandBuffer, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, + LocalWorkSize, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); +} + +pi_result piextCommandBufferMemcpyUSM( + pi_ext_command_buffer CommandBuffer, void *DstPtr, const void *SrcPtr, + size_t Size, pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemcpyUSM(CommandBuffer, DstPtr, SrcPtr, Size, + NumSyncPointsInWaitList, + SyncPointWaitList, SyncPoint); +} + +pi_result piextCommandBufferMemBufferCopy( + pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, + size_t SrcOffset, size_t DstOffset, size_t Size, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemBufferCopy( + CommandBuffer, SrcMem, DstMem, SrcOffset, DstOffset, Size, + NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); +} + +pi_result piextCommandBufferMemBufferCopyRect( + pi_ext_command_buffer CommandBuffer, pi_mem SrcMem, pi_mem DstMem, + pi_buff_rect_offset SrcOrigin, pi_buff_rect_offset DstOrigin, + pi_buff_rect_region Region, size_t SrcRowPitch, size_t SrcSlicePitch, + size_t DstRowPitch, size_t DstSlicePitch, pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemBufferCopyRect( + CommandBuffer, SrcMem, DstMem, SrcOrigin, DstOrigin, Region, SrcRowPitch, + SrcSlicePitch, DstRowPitch, DstSlicePitch, NumSyncPointsInWaitList, + SyncPointWaitList, SyncPoint); +} + +pi_result piextCommandBufferMemBufferRead( + pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, + size_t Size, void *Dst, pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemBufferRead( + CommandBuffer, Buffer, Offset, Size, Dst, NumSyncPointsInWaitList, + SyncPointWaitList, SyncPoint); +} + +pi_result piextCommandBufferMemBufferReadRect( + pi_ext_command_buffer CommandBuffer, pi_mem Buffer, + pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, + pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, + size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemBufferReadRect( + CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch, + BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, + NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); +} + +pi_result piextCommandBufferMemBufferWrite( + pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset, + size_t Size, const void *Ptr, pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemBufferWrite( + CommandBuffer, Buffer, Offset, Size, Ptr, NumSyncPointsInWaitList, + SyncPointWaitList, SyncPoint); +} + +pi_result piextCommandBufferMemBufferWriteRect( + pi_ext_command_buffer CommandBuffer, pi_mem Buffer, + pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, + pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, + size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, + pi_uint32 NumSyncPointsInWaitList, + const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) { + return pi2ur::piextCommandBufferMemBufferWriteRect( + CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch, + BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, + NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); +} + +pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer, + pi_queue Queue, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { + return pi2ur::piextEnqueueCommandBuffer( + CommandBuffer, Queue, NumEventsInWaitList, EventWaitList, Event); +} + +pi_result piextPluginGetOpaqueData(void *opaque_data_param, + void **opaque_data_return) { + return pi2ur::piextPluginGetOpaqueData(opaque_data_param, opaque_data_return); +} + +pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, + uint64_t *HostTime) { + return pi2ur::piGetDeviceAndHostTimer(Device, DeviceTime, HostTime); +} + +pi_result piextEnablePeerAccess(pi_device command_device, + pi_device peer_device) { + return pi2ur::piextEnablePeerAccess(command_device, peer_device); +} + +pi_result piextDisablePeerAccess(pi_device command_device, + pi_device peer_device) { + return pi2ur::piextDisablePeerAccess(command_device, peer_device); +} + +pi_result piextPeerAccessGetInfo(pi_device command_device, + pi_device peer_device, pi_peer_attr attr, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextPeerAccessGetInfo(command_device, peer_device, attr, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result piTearDown(void *PluginParameter) { + return pi2ur::piTearDown(PluginParameter); +} + pi_result piPluginInit(pi_plugin *PluginInit) { // Check that the major version matches in PiVersion and SupportedVersion _PI_PLUGIN_VERSION_CHECK(PluginInit->PiVersion, SupportedVersion); @@ -34,175 +1133,10 @@ pi_result piPluginInit(pi_plugin *PluginInit) { return PI_ERROR_INVALID_VALUE; strncpy(PluginInit->PluginVersion, SupportedVersion, PluginVersionSize); -#define _PI_CL(pi_api, ocl_api) \ - (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&ocl_api); - - // Platform - _PI_CL(piPlatformsGet, pi2ur::piPlatformsGet) - _PI_CL(piPlatformGetInfo, pi2ur::piPlatformGetInfo) - _PI_CL(piextPlatformGetNativeHandle, pi2ur::piextPlatformGetNativeHandle) - _PI_CL(piextPlatformCreateWithNativeHandle, - pi2ur::piextPlatformCreateWithNativeHandle) - // Device - _PI_CL(piDevicesGet, pi2ur::piDevicesGet) - _PI_CL(piDeviceGetInfo, pi2ur::piDeviceGetInfo) - _PI_CL(piDevicePartition, pi2ur::piDevicePartition) - _PI_CL(piDeviceRetain, pi2ur::piDeviceRetain) - _PI_CL(piDeviceRelease, pi2ur::piDeviceRelease) - _PI_CL(piextDeviceSelectBinary, pi2ur::piextDeviceSelectBinary) - _PI_CL(piextGetDeviceFunctionPointer, pi2ur::piextGetDeviceFunctionPointer) - _PI_CL(piextDeviceGetNativeHandle, pi2ur::piextDeviceGetNativeHandle) - _PI_CL(piextDeviceCreateWithNativeHandle, - pi2ur::piextDeviceCreateWithNativeHandle) - // Context - _PI_CL(piContextCreate, pi2ur::piContextCreate) - _PI_CL(piContextGetInfo, pi2ur::piContextGetInfo) - _PI_CL(piContextRetain, pi2ur::piContextRetain) - _PI_CL(piContextRelease, pi2ur::piContextRelease) - _PI_CL(piextContextGetNativeHandle, pi2ur::piextContextGetNativeHandle) - _PI_CL(piextContextCreateWithNativeHandle, - pi2ur::piextContextCreateWithNativeHandle) - // Queue - _PI_CL(piQueueCreate, pi2ur::piQueueCreate) - _PI_CL(piextQueueCreate, pi2ur::piextQueueCreate) - _PI_CL(piQueueGetInfo, pi2ur::piQueueGetInfo) - _PI_CL(piQueueFinish, pi2ur::piQueueFinish) - _PI_CL(piQueueFlush, pi2ur::piQueueFlush) - _PI_CL(piQueueRetain, pi2ur::piQueueRetain) - _PI_CL(piQueueRelease, pi2ur::piQueueRelease) - _PI_CL(piextQueueGetNativeHandle, pi2ur::piextQueueGetNativeHandle) - _PI_CL(piextQueueCreateWithNativeHandle, - pi2ur::piextQueueCreateWithNativeHandle) - // Memory - _PI_CL(piMemBufferCreate, pi2ur::piMemBufferCreate) - _PI_CL(piMemImageCreate, pi2ur::piMemImageCreate) - _PI_CL(piMemGetInfo, pi2ur::piMemGetInfo) - _PI_CL(piMemImageGetInfo, pi2ur::piMemImageGetInfo) - _PI_CL(piMemRetain, pi2ur::piMemRetain) - _PI_CL(piMemRelease, pi2ur::piMemRelease) - _PI_CL(piMemBufferPartition, pi2ur::piMemBufferPartition) - _PI_CL(piextMemGetNativeHandle, pi2ur::piextMemGetNativeHandle) - _PI_CL(piextMemCreateWithNativeHandle, pi2ur::piextMemCreateWithNativeHandle) - _PI_CL(piextMemImageCreateWithNativeHandle, - pi2ur::piextMemImageCreateWithNativeHandle) - // Program - _PI_CL(piProgramCreate, pi2ur::piProgramCreate) - _PI_CL(piProgramCreateWithBinary, pi2ur::piProgramCreateWithBinary) - _PI_CL(piProgramGetInfo, pi2ur::piProgramGetInfo) - _PI_CL(piProgramCompile, pi2ur::piProgramCompile) - _PI_CL(piProgramBuild, pi2ur::piProgramBuild) - _PI_CL(piProgramLink, pi2ur::piProgramLink) - _PI_CL(piProgramGetBuildInfo, pi2ur::piProgramGetBuildInfo) - _PI_CL(piProgramRetain, pi2ur::piProgramRetain) - _PI_CL(piProgramRelease, pi2ur::piProgramRelease) - _PI_CL(piextProgramSetSpecializationConstant, - pi2ur::piextProgramSetSpecializationConstant) - _PI_CL(piextProgramGetNativeHandle, pi2ur::piextProgramGetNativeHandle) - _PI_CL(piextProgramCreateWithNativeHandle, - pi2ur::piextProgramCreateWithNativeHandle) - // Kernel - _PI_CL(piKernelCreate, pi2ur::piKernelCreate) - _PI_CL(piKernelSetArg, pi2ur::piKernelSetArg) - _PI_CL(piKernelGetInfo, pi2ur::piKernelGetInfo) - _PI_CL(piKernelGetGroupInfo, pi2ur::piKernelGetGroupInfo) - _PI_CL(piKernelGetSubGroupInfo, pi2ur::piKernelGetSubGroupInfo) - _PI_CL(piKernelRetain, pi2ur::piKernelRetain) - _PI_CL(piKernelRelease, pi2ur::piKernelRelease) - _PI_CL(piKernelSetExecInfo, pi2ur::piKernelSetExecInfo) - _PI_CL(piextKernelSetArgPointer, pi2ur::piextKernelSetArgPointer) - _PI_CL(piextKernelCreateWithNativeHandle, - pi2ur::piextKernelCreateWithNativeHandle) - _PI_CL(piextKernelGetNativeHandle, pi2ur::piextKernelGetNativeHandle) - // Event - _PI_CL(piEventCreate, pi2ur::piEventCreate) - _PI_CL(piEventGetInfo, pi2ur::piEventGetInfo) - _PI_CL(piEventGetProfilingInfo, pi2ur::piEventGetProfilingInfo) - _PI_CL(piEventsWait, pi2ur::piEventsWait) - _PI_CL(piEventSetCallback, pi2ur::piEventSetCallback) - _PI_CL(piEventSetStatus, pi2ur::piEventSetStatus) - _PI_CL(piEventRetain, pi2ur::piEventRetain) - _PI_CL(piEventRelease, pi2ur::piEventRelease) - _PI_CL(piextEventGetNativeHandle, pi2ur::piextEventGetNativeHandle) - _PI_CL(piextEventCreateWithNativeHandle, - pi2ur::piextEventCreateWithNativeHandle) - // Sampler - _PI_CL(piSamplerCreate, pi2ur::piSamplerCreate) - _PI_CL(piSamplerGetInfo, pi2ur::piSamplerGetInfo) - _PI_CL(piSamplerRetain, pi2ur::piSamplerRetain) - _PI_CL(piSamplerRelease, pi2ur::piSamplerRelease) - // Queue commands - _PI_CL(piEnqueueKernelLaunch, pi2ur::piEnqueueKernelLaunch) - _PI_CL(piEnqueueEventsWait, pi2ur::piEnqueueEventsWait) - _PI_CL(piEnqueueEventsWaitWithBarrier, pi2ur::piEnqueueEventsWaitWithBarrier) - _PI_CL(piEnqueueMemBufferRead, pi2ur::piEnqueueMemBufferRead) - _PI_CL(piEnqueueMemBufferReadRect, pi2ur::piEnqueueMemBufferReadRect) - _PI_CL(piEnqueueMemBufferWrite, pi2ur::piEnqueueMemBufferWrite) - _PI_CL(piEnqueueMemBufferWriteRect, pi2ur::piEnqueueMemBufferWriteRect) - _PI_CL(piEnqueueMemBufferCopy, pi2ur::piEnqueueMemBufferCopy) - _PI_CL(piEnqueueMemBufferCopyRect, pi2ur::piEnqueueMemBufferCopyRect) - _PI_CL(piEnqueueMemBufferFill, pi2ur::piEnqueueMemBufferFill) - _PI_CL(piEnqueueMemImageRead, pi2ur::piEnqueueMemImageRead) - _PI_CL(piEnqueueMemImageWrite, pi2ur::piEnqueueMemImageWrite) - _PI_CL(piEnqueueMemImageCopy, pi2ur::piEnqueueMemImageCopy) - _PI_CL(piEnqueueMemImageFill, pi2ur::piEnqueueMemImageFill) - _PI_CL(piEnqueueMemBufferMap, pi2ur::piEnqueueMemBufferMap) - _PI_CL(piEnqueueMemUnmap, pi2ur::piEnqueueMemUnmap) - // USM - _PI_CL(piextUSMHostAlloc, pi2ur::piextUSMHostAlloc) - _PI_CL(piextUSMDeviceAlloc, pi2ur::piextUSMDeviceAlloc) - _PI_CL(piextUSMSharedAlloc, pi2ur::piextUSMSharedAlloc) - _PI_CL(piextUSMFree, pi2ur::piextUSMFree) - _PI_CL(piextUSMEnqueueMemset, pi2ur::piextUSMEnqueueMemset) - _PI_CL(piextUSMEnqueueMemcpy, pi2ur::piextUSMEnqueueMemcpy) - _PI_CL(piextUSMEnqueuePrefetch, pi2ur::piextUSMEnqueuePrefetch) - _PI_CL(piextUSMEnqueueMemAdvise, pi2ur::piextUSMEnqueueMemAdvise) - _PI_CL(piextUSMEnqueueFill2D, pi2ur::piextUSMEnqueueFill2D) - _PI_CL(piextUSMEnqueueMemset2D, pi2ur::piextUSMEnqueueMemset2D) - _PI_CL(piextUSMEnqueueMemcpy2D, pi2ur::piextUSMEnqueueMemcpy2D) - _PI_CL(piextUSMGetMemAllocInfo, pi2ur::piextUSMGetMemAllocInfo) - _PI_CL(piextUSMImport, pi2ur::piextUSMImport) - _PI_CL(piextUSMRelease, pi2ur::piextUSMRelease) - // Device global variable - _PI_CL(piextEnqueueDeviceGlobalVariableWrite, - pi2ur::piextEnqueueDeviceGlobalVariableWrite) - _PI_CL(piextEnqueueDeviceGlobalVariableRead, - pi2ur::piextEnqueueDeviceGlobalVariableRead) - // Host Pipe - _PI_CL(piextEnqueueReadHostPipe, pi2ur::piextEnqueueReadHostPipe) - _PI_CL(piextEnqueueWriteHostPipe, pi2ur::piextEnqueueWriteHostPipe) - // Command-buffer - _PI_CL(piextCommandBufferCreate, pi2ur::piextCommandBufferCreate) - _PI_CL(piextCommandBufferRetain, pi2ur::piextCommandBufferRetain) - _PI_CL(piextCommandBufferRelease, pi2ur::piextCommandBufferRelease) - _PI_CL(piextCommandBufferNDRangeKernel, - pi2ur::piextCommandBufferNDRangeKernel) - _PI_CL(piextCommandBufferMemcpyUSM, pi2ur::piextCommandBufferMemcpyUSM) - _PI_CL(piextCommandBufferMemBufferCopy, - pi2ur::piextCommandBufferMemBufferCopy) - _PI_CL(piextCommandBufferMemBufferCopyRect, - pi2ur::piextCommandBufferMemBufferCopyRect) - _PI_CL(piextCommandBufferMemBufferRead, - pi2ur::piextCommandBufferMemBufferRead) - _PI_CL(piextCommandBufferMemBufferReadRect, - pi2ur::piextCommandBufferMemBufferReadRect) - _PI_CL(piextCommandBufferMemBufferWrite, - pi2ur::piextCommandBufferMemBufferWrite) - _PI_CL(piextCommandBufferMemBufferWriteRect, - pi2ur::piextCommandBufferMemBufferWriteRect) - _PI_CL(piextEnqueueCommandBuffer, pi2ur::piextEnqueueCommandBuffer) - // Kernel - _PI_CL(piextKernelSetArgMemObj, pi2ur::piextKernelSetArgMemObj) - _PI_CL(piextKernelSetArgSampler, pi2ur::piextKernelSetArgSampler) - _PI_CL(piPluginGetLastError, pi2ur::piPluginGetLastError) - _PI_CL(piTearDown, pi2ur::piTearDown) - _PI_CL(piGetDeviceAndHostTimer, pi2ur::piGetDeviceAndHostTimer) - _PI_CL(piPluginGetBackendOption, pi2ur::piPluginGetBackendOption) - // Peer to Peer - _PI_CL(piextEnablePeerAccess, pi2ur::piextEnablePeerAccess) - _PI_CL(piextDisablePeerAccess, pi2ur::piextDisablePeerAccess) - _PI_CL(piextPeerAccessGetInfo, pi2ur::piextPeerAccessGetInfo) - -#undef _PI_CL +#define _PI_API(api) \ + (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api); +#include +#undef _PI_API return PI_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 8750afd5e1481..d7f9f491bbb1b 100755 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -266,6 +266,7 @@ if ("opencl" IN_LIST SYCL_ENABLE_PLUGINS) "ur/adapters/opencl/device.hpp" "ur/adapters/opencl/enqueue.cpp" "ur/adapters/opencl/event.cpp" + "ur/adapters/opencl/image.cpp" "ur/adapters/opencl/kernel.cpp" "ur/adapters/opencl/memory.cpp" "ur/adapters/opencl/platform.cpp" diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/image.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/image.cpp new file mode 100644 index 0000000000000..5e7c2405a31cc --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/image.cpp @@ -0,0 +1,174 @@ +//===---------- image.cpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] const ur_usm_desc_t *pUSMDesc, + [[maybe_unused]] ur_usm_pool_handle_t pool, + [[maybe_unused]] size_t widthInBytes, [[maybe_unused]] size_t height, + [[maybe_unused]] size_t elementSizeBytes, [[maybe_unused]] void **ppMem, + [[maybe_unused]] size_t *pResultPitch) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urBindlessImagesUnsampledImageHandleDestroyExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_handle_t hImage) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urBindlessImagesSampledImageHandleDestroyExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_handle_t hImage) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] const ur_image_format_t *pImageFormat, + [[maybe_unused]] const ur_image_desc_t *pImageDesc, + [[maybe_unused]] ur_exp_image_mem_handle_t *phImageMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageFreeExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem, + [[maybe_unused]] const ur_image_format_t *pImageFormat, + [[maybe_unused]] const ur_image_desc_t *pImageDesc, + [[maybe_unused]] ur_mem_handle_t *phMem, + [[maybe_unused]] ur_exp_image_handle_t *phImage) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem, + [[maybe_unused]] const ur_image_format_t *pImageFormat, + [[maybe_unused]] const ur_image_desc_t *pImageDesc, + [[maybe_unused]] ur_sampler_handle_t hSampler, + [[maybe_unused]] ur_mem_handle_t *phMem, + [[maybe_unused]] ur_exp_image_handle_t *phImage) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( + [[maybe_unused]] ur_queue_handle_t hQueue, [[maybe_unused]] void *pDst, + [[maybe_unused]] void *pSrc, + [[maybe_unused]] const ur_image_format_t *pImageFormat, + [[maybe_unused]] const ur_image_desc_t *pImageDesc, + [[maybe_unused]] ur_exp_image_copy_flags_t imageCopyFlags, + [[maybe_unused]] ur_rect_offset_t srcOffset, + [[maybe_unused]] ur_rect_offset_t dstOffset, + [[maybe_unused]] ur_rect_region_t copyExtent, + [[maybe_unused]] ur_rect_region_t hostExtent, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( + [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem, + [[maybe_unused]] ur_image_info_t propName, + [[maybe_unused]] void *pPropValue, [[maybe_unused]] size_t *pPropSizeRet) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem, + [[maybe_unused]] uint32_t mipmapLevel, + [[maybe_unused]] ur_exp_image_mem_handle_t *phImageMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urBindlessImagesMipmapFreeExp([[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_mem_handle_t hMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportOpaqueFDExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, [[maybe_unused]] size_t size, + [[maybe_unused]] ur_exp_interop_mem_desc_t *pInteropMemDesc, + [[maybe_unused]] ur_exp_interop_mem_handle_t *phInteropMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] const ur_image_format_t *pImageFormat, + [[maybe_unused]] const ur_image_desc_t *pImageDesc, + [[maybe_unused]] ur_exp_interop_mem_handle_t hInteropMem, + [[maybe_unused]] ur_exp_image_mem_handle_t *phImageMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseInteropExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_interop_mem_handle_t hInteropMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urBindlessImagesImportExternalSemaphoreOpaqueFDExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_interop_semaphore_desc_t *pInteropSemaphoreDesc, + [[maybe_unused]] ur_exp_interop_semaphore_handle_t + *phInteropSemaphoreHandle) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesDestroyExternalSemaphoreExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_interop_semaphore_handle_t hInteropSemaphore) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( + [[maybe_unused]] ur_queue_handle_t hQueue, + [[maybe_unused]] ur_exp_interop_semaphore_handle_t hSemaphore, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( + [[maybe_unused]] ur_queue_handle_t hQueue, + [[maybe_unused]] ur_exp_interop_semaphore_handle_t hSemaphore, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index 5ff8e07dc2841..952abc9c130fe 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -308,6 +308,39 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( return retVal; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( + ur_api_version_t version, ur_bindless_images_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + pDdiTable->pfnUnsampledImageHandleDestroyExp = + urBindlessImagesUnsampledImageHandleDestroyExp; + pDdiTable->pfnSampledImageHandleDestroyExp = + urBindlessImagesSampledImageHandleDestroyExp; + pDdiTable->pfnImageAllocateExp = urBindlessImagesImageAllocateExp; + pDdiTable->pfnImageFreeExp = urBindlessImagesImageFreeExp; + pDdiTable->pfnUnsampledImageCreateExp = + urBindlessImagesUnsampledImageCreateExp; + pDdiTable->pfnSampledImageCreateExp = urBindlessImagesSampledImageCreateExp; + pDdiTable->pfnImageCopyExp = urBindlessImagesImageCopyExp; + pDdiTable->pfnImageGetInfoExp = urBindlessImagesImageGetInfoExp; + pDdiTable->pfnMipmapGetLevelExp = urBindlessImagesMipmapGetLevelExp; + pDdiTable->pfnMipmapFreeExp = urBindlessImagesMipmapFreeExp; + pDdiTable->pfnImportOpaqueFDExp = urBindlessImagesImportOpaqueFDExp; + pDdiTable->pfnMapExternalArrayExp = urBindlessImagesMapExternalArrayExp; + pDdiTable->pfnReleaseInteropExp = urBindlessImagesReleaseInteropExp; + pDdiTable->pfnImportExternalSemaphoreOpaqueFDExp = + urBindlessImagesImportExternalSemaphoreOpaqueFDExp; + pDdiTable->pfnDestroyExternalSemaphoreExp = + urBindlessImagesDestroyExternalSemaphoreExp; + pDdiTable->pfnWaitExternalSemaphoreExp = + urBindlessImagesWaitExternalSemaphoreExp; + pDdiTable->pfnSignalExternalSemaphoreExp = + urBindlessImagesSignalExternalSemaphoreExp; + return UR_RESULT_SUCCESS; +} + #if defined(__cplusplus) } // extern "C" #endif diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index fc8355fd7bacd..11ee74902849b 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -9,31 +9,79 @@ piContextCreate piContextGetInfo +piContextRelease +piContextRetain piDeviceGetInfo +piDevicePartition +piDeviceRelease +piDeviceRetain piDevicesGet +piEnqueueEventsWait +piEnqueueEventsWaitWithBarrier +piEnqueueKernelLaunch +piEnqueueMemBufferCopy +piEnqueueMemBufferCopyRect +piEnqueueMemBufferFill piEnqueueMemBufferMap +piEnqueueMemBufferRead +piEnqueueMemBufferReadRect +piEnqueueMemBufferWrite +piEnqueueMemBufferWriteRect +piEnqueueMemImageCopy +piEnqueueMemImageFill +piEnqueueMemImageRead +piEnqueueMemImageWrite +piEnqueueMemUnmap piEventCreate piEventGetInfo +piEventGetProfilingInfo +piEventRelease +piEventRetain +piEventSetCallback +piEventSetStatus +piEventsWait piGetDeviceAndHostTimer piKernelCreate piKernelGetGroupInfo +piKernelGetInfo piKernelGetSubGroupInfo +piKernelRelease +piKernelRetain +piKernelSetArg piKernelSetExecInfo piMemBufferCreate piMemBufferPartition +piMemGetInfo piMemImageCreate +piMemImageGetInfo +piMemRelease +piMemRetain piPlatformGetInfo piPlatformsGet piPluginGetBackendOption piPluginGetLastError piPluginInit +piProgramBuild +piProgramCompile piProgramCreate piProgramCreateWithBinary +piProgramGetBuildInfo +piProgramGetInfo piProgramLink +piProgramRelease +piProgramRetain piQueueCreate +piQueueFinish +piQueueFlush piQueueGetInfo +piQueueRelease +piQueueRetain piSamplerCreate +piSamplerGetInfo +piSamplerRelease +piSamplerRetain piTearDown +piextBindlessImageSamplerCreate piextCommandBufferCreate piextCommandBufferFinalize piextCommandBufferMemBufferCopy @@ -48,14 +96,20 @@ piextCommandBufferRelease piextCommandBufferRetain piextContextCreateWithNativeHandle piextContextGetNativeHandle +piextContextSetExtendedDeleter +piextDestroyExternalSemaphore piextDeviceCreateWithNativeHandle piextDeviceGetNativeHandle piextDeviceSelectBinary +piextDisablePeerAccess +piextEnablePeerAccess piextEnqueueCommandBuffer piextEnqueueReadHostPipe piextEnqueueWriteHostPipe piextEventCreateWithNativeHandle +piextEventGetNativeHandle piextGetDeviceFunctionPointer +piextImportExternalSemaphoreOpaqueFD piextKernelCreateWithNativeHandle piextKernelGetNativeHandle piextKernelSetArgMemObj @@ -63,15 +117,31 @@ piextKernelSetArgPointer piextKernelSetArgSampler piextMemCreateWithNativeHandle piextMemGetNativeHandle +piextMemImageAllocate +piextMemImageCopy piextMemImageCreateWithNativeHandle +piextMemImageFree +piextMemImageGetInfo +piextMemImportOpaqueFD +piextMemMapExternalArray +piextMemMipmapFree +piextMemMipmapGetLevel +piextMemReleaseInterop +piextMemSampledImageCreate +piextMemSampledImageHandleDestroy +piextMemUnsampledImageCreate +piextMemUnsampledImageHandleDestroy +piextPeerAccessGetInfo piextPlatformCreateWithNativeHandle piextPlatformGetNativeHandle +piextPluginGetOpaqueData piextProgramCreateWithNativeHandle piextProgramGetNativeHandle piextProgramSetSpecializationConstant piextQueueCreate piextQueueCreateWithNativeHandle piextQueueGetNativeHandle +piextSignalExternalSemaphore piextUSMDeviceAlloc piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise @@ -84,5 +154,7 @@ piextUSMFree piextUSMGetMemAllocInfo piextUSMHostAlloc piextUSMImport +piextUSMPitchedAlloc piextUSMRelease piextUSMSharedAlloc +piextWaitExternalSemaphore From ae105a8216ee1b5e144dd212d826050d52c3231e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Fri, 21 Jul 2023 19:35:39 +0100 Subject: [PATCH 25/36] [SYCL][OpenCL] Multiple fixes: - Fix windows builds - Interop e2e tests - urQueueCreate - urKernelSetArgMemObj - UR_DEVICE_INFO_IP_VERSION --- sycl/plugins/unified_runtime/pi2ur.hpp | 2 -- .../ur/adapters/opencl/common.hpp | 2 ++ .../ur/adapters/opencl/device.cpp | 5 +++- .../ur/adapters/opencl/kernel.cpp | 2 +- .../ur/adapters/opencl/queue.cpp | 9 ++----- .../ur/adapters/opencl/sampler.cpp | 24 ++++++++++--------- 6 files changed, 22 insertions(+), 22 deletions(-) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 635f609c3b1e7..762f6aede44e3 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -2104,8 +2104,6 @@ inline pi_result piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle, pi_context Context, pi_program Program, bool OwnNativeHandle, pi_kernel *Kernel) { - PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); - PI_ASSERT(Program, PI_ERROR_INVALID_PROGRAM); PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE); PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp index 1ad5c7115a8b2..3161a8a3f6125 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp @@ -8,6 +8,8 @@ #pragma once #include +#include +#include #include #include #include diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp index f623696c7f532..1fb7e1d05c616 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp @@ -303,6 +303,8 @@ static cl_int mapURDeviceInfoToCL(ur_device_info_t URPropName) { return CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL; case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: return CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL; + case UR_DEVICE_INFO_IP_VERSION: + return CL_DEVICE_IP_VERSION_INTEL; default: return -1; } @@ -845,7 +847,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_EXTENSIONS: case UR_DEVICE_INFO_BUILT_IN_KERNELS: case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: - case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: { + case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: + case UR_DEVICE_INFO_IP_VERSION: { /* We can just use the OpenCL outputs because the sizes of OpenCL types * are the same as UR. * | CL | UR | Size | diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp index 77ed6055315e5..9d9cd12c0398a 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp @@ -352,7 +352,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); cl_int RetErr = clSetKernelArg( cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), - sizeof(hArgValue), cl_adapter::cast(hArgValue)); + sizeof(hArgValue), cl_adapter::cast(&hArgValue)); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp index f99e0df2ce4dd..04a8632739fdb 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp @@ -47,12 +47,6 @@ convertURQueuePropertiesToCL(const ur_queue_properties_t *URQueueProperties) { if (URQueueProperties->flags & UR_QUEUE_FLAG_ON_DEVICE_DEFAULT) { CLCommandQueueProperties |= CL_QUEUE_ON_DEVICE_DEFAULT; } - if (URQueueProperties->flags & UR_QUEUE_FLAG_PRIORITY_LOW) { - CLCommandQueueProperties |= CL_QUEUE_PRIORITY_LOW_KHR; - } - if (URQueueProperties->flags & UR_QUEUE_FLAG_PRIORITY_HIGH) { - CLCommandQueueProperties |= CL_QUEUE_PRIORITY_HIGH_KHR; - } return CLCommandQueueProperties; } @@ -84,7 +78,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( cl_int RetErr = CL_INVALID_OPERATION; - if (Version >= oclv::V2_0) { + if (Version < oclv::V2_0) { *phQueue = cl_adapter::cast( clCreateCommandQueue(cl_adapter::cast(hContext), cl_adapter::cast(hDevice), @@ -93,6 +87,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( return UR_RESULT_SUCCESS; } + /* TODO: Add support for CL_QUEUE_PRIORITY_KHR */ cl_queue_properties CreationFlagProperties[] = { CL_QUEUE_PROPERTIES, CLProperties & SupportByOpenCL, 0}; *phQueue = diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp index 99397f6dbdeb2..049d9377a1afe 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp @@ -1,6 +1,12 @@ -#include "cassert" +//===--------- sampler.cpp - OpenCL Adapter --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + #include "common.hpp" -#include namespace { @@ -19,7 +25,7 @@ cl_sampler_info ur2CLSamplerInfo(ur_sampler_info_t URInfo) { #undef CASE default: - assert(0 && "Unhandled: ur_sampler_info_t"); + cl_adapter::die("Unhandled: ur_sampler_info_t"); } } @@ -40,8 +46,7 @@ cl_addressing_mode ur2CLAddressingMode(ur_sampler_addressing_mode_t Mode) { #undef CASE default: - assert(0 && "Unhandled: ur_sampler_addressing_mode_t"); - break; + cl_adapter::die("Unhandled: ur_sampler_addressing_mode_t"); } } @@ -58,8 +63,7 @@ cl_filter_mode ur2CLFilterMode(ur_sampler_filter_mode_t Mode) { #undef CASE default: - assert(0 && "Unhandled: ur_sampler_filter_mode_t"); - break; + cl_adapter::die("Unhandled: ur_sampler_filter_mode_t"); } } @@ -80,8 +84,7 @@ ur_sampler_addressing_mode_t cl2URAddressingMode(cl_addressing_mode Mode) { #undef CASE default: - assert(0 && "Unhandled: cl_addressing_mode"); - break; + cl_adapter::die("Unhandled: cl_addressing_mode"); } } @@ -97,8 +100,7 @@ ur_sampler_filter_mode_t cl2URFilterMode(cl_filter_mode Mode) { #undef CASE default: - assert(0 && "Unhandled: cl_filter_mode"); - break; + cl_adapter::die("Unhandled: cl_filter_mode"); } } From 5d581b215b0ce687db628e0b9566ff99a9e2c597 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Wed, 9 Aug 2023 16:00:24 +0000 Subject: [PATCH 26/36] [SYCL][OpenCL] Address upstream review comments --- sycl/plugins/opencl/CMakeLists.txt | 4 +- sycl/plugins/unified_runtime/CMakeLists.txt | 2 + sycl/plugins/unified_runtime/pi2ur.hpp | 11 -- .../ur/adapters/opencl/adapter.cpp | 80 +++++++++++ .../ur/adapters/opencl/adapter.hpp | 11 ++ .../ur/adapters/opencl/common.cpp | 1 - .../ur/adapters/opencl/context.cpp | 11 -- .../ur/adapters/opencl/device.cpp | 13 -- .../ur/adapters/opencl/enqueue.cpp | 130 ------------------ .../ur/adapters/opencl/event.cpp | 13 +- .../ur/adapters/opencl/kernel.cpp | 18 --- .../ur/adapters/opencl/memory.cpp | 39 ++---- .../ur/adapters/opencl/platform.cpp | 54 +------- .../ur/adapters/opencl/program.cpp | 41 +----- .../ur/adapters/opencl/queue.cpp | 20 +-- .../ur/adapters/opencl/sampler.cpp | 13 -- .../adapters/opencl/ur_interface_loader.cpp | 6 +- .../ur/adapters/opencl/usm.cpp | 41 ++---- 18 files changed, 139 insertions(+), 369 deletions(-) create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.cpp create mode 100644 sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.hpp diff --git a/sycl/plugins/opencl/CMakeLists.txt b/sycl/plugins/opencl/CMakeLists.txt index d292a84ba7683..a227cf79bc589 100644 --- a/sycl/plugins/opencl/CMakeLists.txt +++ b/sycl/plugins/opencl/CMakeLists.txt @@ -17,8 +17,8 @@ add_sycl_plugin(opencl "../unified_runtime/pi2ur.cpp" "../unified_runtime/ur/ur.hpp" "../unified_runtime/ur/ur.cpp" - "../unified_runtime/ur/usm_allocator.cpp" - "../unified_runtime/ur/usm_allocator.hpp" + "../unified_runtime/ur/adapters/opencl/adapter.cpp" + "../unified_runtime/ur/adapters/opencl/adapter.hpp" "../unified_runtime/ur/adapters/opencl/common.cpp" "../unified_runtime/ur/adapters/opencl/common.hpp" "../unified_runtime/ur/adapters/opencl/context.cpp" diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 525f016c7a0e5..38eb27eb143b1 100755 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -259,6 +259,8 @@ if ("opencl" IN_LIST SYCL_ENABLE_PLUGINS) SOURCES "ur/ur.hpp" "ur/ur.cpp" + "ur/adapters/opencl/adapter.cpp" + "ur/adapters/opencl/adapter.hpp" "ur/adapters/opencl/command_buffer.hpp" "ur/adapters/opencl/command_buffer.cpp" "ur/adapters/opencl/common.cpp" diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index c67260658b55e..5c16f9e1a3d9b 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -3936,17 +3936,6 @@ inline pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, HANDLE_ERRORS(urEventGetInfo(UREvent, PropName, ParamValueSize, ParamValue, ParamValueSizeRet)); - if (ParamName == PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) { - /* If the PI_EVENT_INFO_COMMAND_EXECUTION_STATUS info value is - * PI_EVENT_QUEUED, change it to PI_EVENT_SUBMITTED. This change is needed - * since sycl::info::event::event_command_status has no equivalent to - * PI_EVENT_QUEUED. */ - const auto param_value_int = static_cast(ParamValue); - if (*param_value_int == PI_EVENT_QUEUED) { - *param_value_int = PI_EVENT_SUBMITTED; - } - } - return PI_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.cpp new file mode 100644 index 0000000000000..88684e597f465 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.cpp @@ -0,0 +1,80 @@ +//===-------------- adapter.cpp - OpenCL Adapter ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" + +struct ur_adapter_handle_t_ { + std::atomic RefCount = 0; +}; + +ur_adapter_handle_t_ adapter{}; + +UR_APIEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t, + ur_loader_config_handle_t) { + cl_ext::ExtFuncPtrCache = new cl_ext::ExtFuncPtrCacheT(); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urTearDown(void *) { + if (cl_ext::ExtFuncPtrCache) { + delete cl_ext::ExtFuncPtrCache; + cl_ext::ExtFuncPtrCache = nullptr; + } + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, + uint32_t *pNumAdapters) { + if (NumEntries > 0 && phAdapters) { + *phAdapters = &adapter; + } + + if (pNumAdapters) { + *pNumAdapters = 1; + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { + ++adapter.RefCount; + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { + --adapter.RefCount; + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError( + ur_adapter_handle_t, const char **ppMessage, int32_t *pError) { + *ppMessage = cl_adapter::ErrorMessage; + *pError = cl_adapter::ErrorMessageCode; + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, + ur_adapter_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { + case UR_ADAPTER_INFO_BACKEND: + return ReturnValue(UR_ADAPTER_BACKEND_CUDA); + case UR_ADAPTER_INFO_REFERENCE_COUNT: + return ReturnValue(adapter.RefCount.load()); + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.hpp new file mode 100644 index 0000000000000..d0f530f3daf77 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.hpp @@ -0,0 +1,11 @@ +//===-------------- adapter.hpp - OpenCL Adapter ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +struct ur_adapter_handle_t_; + +extern ur_adapter_handle_t_ adapter; diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp index 4d9623be53f4e..9a57de1906792 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.cpp @@ -75,7 +75,6 @@ void cl_adapter::die(const char *Message) { /// /// UR_RESULT_SUCCESS ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle) { - UR_ASSERT(NativeHandle, UR_RESULT_ERROR_INVALID_NULL_POINTER) *NativeHandle = reinterpret_cast(URObj); return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp index 7b58f912543c3..88531e8c5b543 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp @@ -36,9 +36,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( uint32_t DeviceCount, const ur_device_handle_t *phDevices, const ur_context_properties_t *, ur_context_handle_t *phContext) { - UR_ASSERT(phDevices, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(phContext, UR_RESULT_ERROR_INVALID_NULL_POINTER); - cl_int Ret; *phContext = cl_adapter::cast( clCreateContext(nullptr, cl_adapter::cast(DeviceCount), @@ -72,7 +69,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); const cl_int CLPropName = mapURContextInfoToCL(propName); @@ -107,7 +103,6 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, UR_APIEXPORT ur_result_t UR_APICALL urContextRelease(ur_context_handle_t hContext) { - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); cl_int Ret = clReleaseContext(cl_adapter::cast(hContext)); return mapCLErrorToUR(Ret); } @@ -115,7 +110,6 @@ urContextRelease(ur_context_handle_t hContext) { UR_APIEXPORT ur_result_t UR_APICALL urContextRetain(ur_context_handle_t hContext) { - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); cl_int Ret = clRetainContext(cl_adapter::cast(hContext)); return mapCLErrorToUR(Ret); } @@ -123,9 +117,6 @@ urContextRetain(ur_context_handle_t hContext) { UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(phNativeContext, UR_RESULT_ERROR_INVALID_NULL_POINTER); - *phNativeContext = reinterpret_cast(hContext); return UR_RESULT_SUCCESS; } @@ -134,8 +125,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( ur_native_handle_t hNativeContext, uint32_t, const ur_device_handle_t *, const ur_context_native_properties_t *, ur_context_handle_t *phContext) { - UR_ASSERT(hNativeContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - *phContext = reinterpret_cast(hNativeContext); return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp index 1fb7e1d05c616..46a08adfaddd7 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp @@ -316,7 +316,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, void *pPropValue, size_t *pPropSizeRet) { - UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); const cl_device_info CLPropName = mapURDeviceInfoToCL(propName); @@ -902,9 +901,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( const ur_device_partition_properties_t *pProperties, uint32_t NumDevices, ur_device_handle_t *phSubDevices, uint32_t *pNumDevicesRet) { - UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pProperties, UR_RESULT_ERROR_INVALID_NULL_POINTER); - std::vector CLProperties( pProperties->PropCount + 2); @@ -967,8 +963,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( UR_APIEXPORT ur_result_t UR_APICALL urDeviceRetain(ur_device_handle_t hDevice) { - UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_int Result = clRetainDevice(cl_adapter::cast(hDevice)); return mapCLErrorToUR(Result); @@ -977,8 +971,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceRetain(ur_device_handle_t hDevice) { UR_APIEXPORT ur_result_t UR_APICALL urDeviceRelease(ur_device_handle_t hDevice) { - UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - cl_int Result = clReleaseDevice(cl_adapter::cast(hDevice)); return mapCLErrorToUR(Result); @@ -987,9 +979,6 @@ urDeviceRelease(ur_device_handle_t hDevice) { UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( ur_device_handle_t hDevice, ur_native_handle_t *phNativeDevice) { - UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(phNativeDevice, UR_RESULT_ERROR_INVALID_NULL_POINTER); - *phNativeDevice = reinterpret_cast(hDevice); return UR_RESULT_SUCCESS; } @@ -998,8 +987,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( ur_native_handle_t hNativeDevice, ur_platform_handle_t, const ur_device_native_properties_t *, ur_device_handle_t *phDevice) { - UR_ASSERT(hNativeDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - *phDevice = reinterpret_cast(hNativeDevice); return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/enqueue.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/enqueue.cpp index 9d6cd14d6cee9..01c353236a78a 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/enqueue.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/enqueue.cpp @@ -29,11 +29,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pGlobalWorkOffset, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(pGlobalWorkSize, UR_RESULT_ERROR_INVALID_NULL_POINTER); - CL_RETURN_ON_FAILURE(clEnqueueNDRangeKernel( cl_adapter::cast(hQueue), cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, @@ -48,8 +43,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - CL_RETURN_ON_FAILURE(clEnqueueMarkerWithWaitList( cl_adapter::cast(hQueue), numEventsInWaitList, cl_adapter::cast(phEventWaitList), @@ -62,8 +55,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - CL_RETURN_ON_FAILURE(clEnqueueBarrierWithWaitList( cl_adapter::cast(hQueue), numEventsInWaitList, cl_adapter::cast(phEventWaitList), @@ -77,14 +68,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hBuffer, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pDst, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - CL_RETURN_ON_FAILURE(clEnqueueReadBuffer( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingRead, offset, size, pDst, @@ -99,14 +82,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hBuffer, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pSrc, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - CL_RETURN_ON_FAILURE(clEnqueueWriteBuffer( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingWrite, offset, size, pSrc, @@ -124,13 +99,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hBuffer, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - CL_RETURN_ON_FAILURE(clEnqueueReadBufferRect( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingRead, @@ -152,13 +120,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hBuffer, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - CL_RETURN_ON_FAILURE(clEnqueueWriteBufferRect( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingWrite, @@ -178,14 +139,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hBufferSrc, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hBufferDst, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - CL_RETURN_ON_FAILURE(clEnqueueCopyBuffer( cl_adapter::cast(hQueue), cl_adapter::cast(hBufferSrc), @@ -204,14 +157,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hBufferSrc, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hBufferDst, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - CL_RETURN_ON_FAILURE(clEnqueueCopyBufferRect( cl_adapter::cast(hQueue), cl_adapter::cast(hBufferSrc), @@ -232,14 +177,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hBuffer, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pPattern, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - CL_RETURN_ON_FAILURE(clEnqueueFillBuffer( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), pPattern, patternSize, offset, size, @@ -255,14 +192,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hImage, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pDst, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - CL_RETURN_ON_FAILURE(clEnqueueReadImage( cl_adapter::cast(hQueue), cl_adapter::cast(hImage), blockingRead, @@ -280,14 +209,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hImage, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pSrc, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - CL_RETURN_ON_FAILURE(clEnqueueWriteImage( cl_adapter::cast(hQueue), cl_adapter::cast(hImage), blockingWrite, @@ -306,14 +227,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hImageSrc, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hImageDst, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - CL_RETURN_ON_FAILURE(clEnqueueCopyImage( cl_adapter::cast(hQueue), cl_adapter::cast(hImageSrc), cl_adapter::cast(hImageDst), @@ -331,13 +244,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_map_flags_t mapFlags, size_t offset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, void **ppRetMap) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hBuffer, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(ppRetMap, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); cl_int Err; *ppRetMap = clEnqueueMapBuffer( @@ -356,12 +262,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hMem, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); CL_RETURN_ON_FAILURE(clEnqueueUnmapMemObject( cl_adapter::cast(hQueue), @@ -377,13 +277,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( bool blockingWrite, size_t count, size_t offset, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pSrc, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); cl_context Ctx = nullptr; cl_int Res = @@ -415,13 +308,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( bool blockingRead, size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pDst, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); cl_context Ctx = nullptr; cl_int Res = @@ -453,14 +339,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( const char *pipe_symbol, bool blocking, void *pDst, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pDst, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(pipe_symbol, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); cl_context CLContext; cl_int CLErr = clGetCommandQueueInfo( @@ -493,14 +371,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( const char *pipe_symbol, bool blocking, void *pSrc, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pSrc, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(pipe_symbol, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(!(phEventWaitList == nullptr && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != nullptr && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); cl_context CLContext; cl_int CLErr = clGetCommandQueueInfo( diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp index 3f5a4e7fa7d3b..9024349cafb2a 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp @@ -51,12 +51,10 @@ convertURProfilingInfoToCL(const ur_profiling_info_t PropName) { } UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( - ur_native_handle_t hNativeEvent, ur_context_handle_t hContext, - const ur_event_native_properties_t *pProperties, + ur_native_handle_t hNativeEvent, + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] const ur_event_native_properties_t *pProperties, ur_event_handle_t *phEvent) { - UR_ASSERT(hNativeEvent, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - (void)hContext; - (void)pProperties; *phEvent = reinterpret_cast(hNativeEvent); return UR_RESULT_SUCCESS; } @@ -67,14 +65,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { - UR_ASSERT(hEvent, UR_RESULT_ERROR_INVALID_NULL_HANDLE); cl_int RetErr = clReleaseEvent(cl_adapter::cast(hEvent)); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { - UR_ASSERT(hEvent, UR_RESULT_ERROR_INVALID_NULL_HANDLE); cl_int RetErr = clRetainEvent(cl_adapter::cast(hEvent)); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; @@ -82,7 +78,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { UR_APIEXPORT ur_result_t UR_APICALL urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) { - UR_ASSERT(phEventWaitList, UR_RESULT_ERROR_INVALID_NULL_POINTER); cl_int RetErr = clWaitForEvents( numEvents, cl_adapter::cast(phEventWaitList)); CL_RETURN_ON_FAILURE(RetErr); @@ -94,7 +89,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UR_ASSERT(hEvent, UR_RESULT_ERROR_INVALID_NULL_HANDLE); cl_event_info CLEventInfo = convertUREventInfoToCL(propName); cl_int RetErr = clGetEventInfo(cl_adapter::cast(hEvent), CLEventInfo, propSize, @@ -106,7 +100,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( ur_event_handle_t hEvent, ur_profiling_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UR_ASSERT(hEvent, UR_RESULT_ERROR_INVALID_NULL_HANDLE); cl_profiling_info CLProfilingInfo = convertURProfilingInfoToCL(propName); cl_int RetErr = clGetEventProfilingInfo(cl_adapter::cast(hEvent), CLProfilingInfo, propSize, pPropValue, diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp index 9d9cd12c0398a..7cab7e0797b0f 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp @@ -57,8 +57,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, void *pPropValue, size_t *pPropSizeRet) { - UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), mapURKernelInfoToCL(propName), propSize, pPropValue, pPropSizeRet)); @@ -91,9 +89,6 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, ur_kernel_group_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - CL_RETURN_ON_FAILURE(clGetKernelWorkGroupInfo( cl_adapter::cast(hKernel), cl_adapter::cast(hDevice), @@ -124,9 +119,6 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, ur_kernel_sub_group_info_t propName, size_t, void *pPropValue, size_t *pPropSizeRet) { - UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - std::shared_ptr InputValue; size_t InputValueSize = 0; size_t RetVal; @@ -204,16 +196,12 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, } UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(ur_kernel_handle_t hKernel) { - UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - CL_RETURN_ON_FAILURE(clRetainKernel(cl_adapter::cast(hKernel))); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease(ur_kernel_handle_t hKernel) { - UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - CL_RETURN_ON_FAILURE(clReleaseKernel(cl_adapter::cast(hKernel))); return UR_RESULT_SUCCESS; } @@ -328,18 +316,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( ur_kernel_handle_t hKernel, ur_native_handle_t *phNativeKernel) { - UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(phNativeKernel, UR_RESULT_ERROR_INVALID_NULL_POINTER); - *phNativeKernel = reinterpret_cast(hKernel); - return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( ur_native_handle_t hNativeKernel, ur_context_handle_t, ur_program_handle_t, const ur_kernel_native_properties_t *, ur_kernel_handle_t *phKernel) { - UR_ASSERT(hNativeKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); *phKernel = reinterpret_cast(hNativeKernel); return UR_RESULT_SUCCESS; @@ -349,7 +332,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_mem_obj_properties_t *, ur_mem_handle_t hArgValue) { - UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); cl_int RetErr = clSetKernelArg( cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), sizeof(hArgValue), cl_adapter::cast(&hArgValue)); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp index 13be5e20956ef..5012c7fd65b7e 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp @@ -224,8 +224,6 @@ cl_map_flags convertURMemFlagsToCL(ur_mem_flags_t URFlags) { UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) { - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(phBuffer, UR_RESULT_ERROR_INVALID_NULL_POINTER); cl_int RetErr = CL_INVALID_OPERATION; if (pProperties) { @@ -282,8 +280,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( ur_context_handle_t hContext, ur_mem_flags_t flags, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, void *pHost, ur_mem_handle_t *phMem) { - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(phMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); cl_int RetErr = CL_INVALID_OPERATION; @@ -303,8 +299,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( ur_mem_handle_t hBuffer, ur_mem_flags_t flags, ur_buffer_create_type_t bufferCreateType, const ur_buffer_region_t *pRegion, ur_mem_handle_t *phMem) { - UR_ASSERT(hBuffer, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(phMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); cl_int RetErr = CL_INVALID_OPERATION; @@ -335,28 +329,23 @@ urMemGetNativeHandle(ur_mem_handle_t hMem, ur_native_handle_t *phNativeMem) { } UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( - ur_native_handle_t hNativeMem, ur_context_handle_t hContext, - const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { - (void)hContext; - (void)pProperties; - UR_ASSERT(hNativeMem, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(phMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); + ur_native_handle_t hNativeMem, + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] const ur_mem_native_properties_t *pProperties, + ur_mem_handle_t *phMem) { + *phMem = reinterpret_cast(hNativeMem); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( - ur_native_handle_t hNativeMem, ur_context_handle_t hContext, - const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, - const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { - (void)hContext; - (void)pImageFormat; - (void)pImageDesc; - (void)pProperties; - UR_ASSERT(hNativeMem, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(phMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); + ur_native_handle_t hNativeMem, + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] const ur_image_format_t *pImageFormat, + [[maybe_unused]] const ur_image_desc_t *pImageDesc, + [[maybe_unused]] const ur_mem_native_properties_t *pProperties, + ur_mem_handle_t *phMem) { + *phMem = reinterpret_cast(hNativeMem); return UR_RESULT_SUCCESS; } @@ -366,7 +355,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UR_ASSERT(hMemory, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); const cl_int CLPropName = mapURMemInfoToCL(propName); @@ -382,7 +370,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UR_ASSERT(hMemory, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); const cl_int CLPropName = mapURMemImageInfoToCL(propName); @@ -394,13 +381,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, } UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { - UR_ASSERT(hMem, UR_RESULT_ERROR_INVALID_NULL_HANDLE); CL_RETURN_ON_FAILURE(clRetainMemObject(cl_adapter::cast(hMem))); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { - UR_ASSERT(hMem, UR_RESULT_ERROR_INVALID_NULL_HANDLE); CL_RETURN_ON_FAILURE(clReleaseMemObject(cl_adapter::cast(hMem))); return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp index 590aa4d4e9e5e..d7e323e230a32 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp @@ -51,7 +51,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urPlatformGetInfo(ur_platform_handle_t hPlatform, ur_platform_info_t propName, size_t propSize, void *pPropValue, size_t *pSizeRet) { - UR_ASSERT(hPlatform, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UrReturnHelper ReturnValue(propSize, pPropValue, pSizeRet); const cl_int CLPropName = mapURPlatformInfoToCL(propName); @@ -73,21 +72,16 @@ urPlatformGetInfo(ur_platform_handle_t hPlatform, ur_platform_info_t propName, } } -UR_DLLEXPORT ur_result_t UR_APICALL urPlatformGetApiVersion( - ur_platform_handle_t hPlatform, ur_api_version_t *pVersion) { - UR_ASSERT(hPlatform, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pVersion, UR_RESULT_ERROR_INVALID_NULL_POINTER); - +UR_DLLEXPORT ur_result_t UR_APICALL +urPlatformGetApiVersion([[maybe_unused]] ur_platform_handle_t hPlatform, + ur_api_version_t *pVersion) { *pVersion = UR_API_VERSION_CURRENT; return UR_RESULT_SUCCESS; } -UR_DLLEXPORT ur_result_t UR_APICALL -urPlatformGet(uint32_t NumEntries, ur_platform_handle_t *phPlatforms, - uint32_t *pNumPlatforms) { - - UR_ASSERT(phPlatforms || pNumPlatforms, UR_RESULT_ERROR_INVALID_VALUE); - UR_ASSERT(!phPlatforms || NumEntries > 0, UR_RESULT_ERROR_INVALID_SIZE); +UR_APIEXPORT ur_result_t UR_APICALL +urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, + ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { cl_int Result = clGetPlatformIDs(cl_adapter::cast(NumEntries), @@ -107,10 +101,6 @@ urPlatformGet(uint32_t NumEntries, ur_platform_handle_t *phPlatforms, UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( ur_platform_handle_t hPlatform, ur_native_handle_t *phNativePlatform) { - - UR_ASSERT(hPlatform, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(phNativePlatform, UR_RESULT_ERROR_INVALID_NULL_POINTER); - *phNativePlatform = reinterpret_cast(hPlatform); return UR_RESULT_SUCCESS; } @@ -118,34 +108,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( ur_native_handle_t hNativePlatform, const ur_platform_native_properties_t *, ur_platform_handle_t *phPlatform) { - - UR_ASSERT(hNativePlatform, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - *phPlatform = reinterpret_cast(hNativePlatform); return UR_RESULT_SUCCESS; } -UR_DLLEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t) { - cl_ext::ExtFuncPtrCache = new cl_ext::ExtFuncPtrCacheT(); - return UR_RESULT_SUCCESS; -} - -/* This API is called by Sycl RT to notify the end of the adapter lifetime. - * Windows: dynamically loaded plugins might have been unloaded already when - * this is called. Sycl RT holds onto the UR adapter so it can be called safely. - * But this is not transitive. If the UR adapter dynamically loaded a - * different DLL, that may have been unloaded already. - * TODO: add a global variable lifetime management code here (see - * pi_level_zero.cpp for reference). */ -UR_DLLEXPORT ur_result_t UR_APICALL urTearDown(void *pParams) { - UR_ASSERT(pParams, UR_RESULT_ERROR_INVALID_NULL_POINTER); - if (cl_ext::ExtFuncPtrCache) { - delete cl_ext::ExtFuncPtrCache; - cl_ext::ExtFuncPtrCache = nullptr; - } - return UR_RESULT_SUCCESS; -} - // Returns plugin specific backend option. // Current support is only for optimization options. // Return '-cl-opt-disable' for pFrontendOption = -O0 and '' for others. @@ -175,11 +141,3 @@ urPlatformGetBackendOption(ur_platform_handle_t, const char *pFrontendOption, } return UR_RESULT_ERROR_INVALID_VALUE; } - -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetLastError( - ur_platform_handle_t, const char **ppMessage, int32_t *pError) { - *ppMessage = cl_adapter::ErrorMessage; - *pError = cl_adapter::ErrorMessageCode; - - return UR_RESULT_SUCCESS; -} diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp index 6e32230082e8a..c3502a4180041 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp @@ -37,10 +37,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( ur_context_handle_t hContext, const void *pIL, size_t length, const ur_program_properties_t *, ur_program_handle_t *phProgram) { - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pIL, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(phProgram, UR_RESULT_ERROR_INVALID_NULL_POINTER); - std::unique_ptr> DevicesInCtx; CL_RETURN_ON_FAILURE_AND_SET_NULL( cl_adapter::getDevicesFromContext(hContext, DevicesInCtx), phProgram); @@ -122,11 +118,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( const uint8_t *pBinary, const ur_program_properties_t *, ur_program_handle_t *phProgram) { - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pBinary, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(phProgram, UR_RESULT_ERROR_INVALID_NULL_POINTER); - cl_int BinaryStatus; cl_int CLResult; *phProgram = cl_adapter::cast(clCreateProgramWithBinary( @@ -140,11 +131,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( } UR_APIEXPORT ur_result_t UR_APICALL -urProgramCompile(ur_context_handle_t hContext, ur_program_handle_t hProgram, - const char *pOptions) { - - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); +urProgramCompile([[maybe_unused]] ur_context_handle_t hContext, + ur_program_handle_t hProgram, const char *pOptions) { std::unique_ptr> DevicesInProgram; CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); @@ -187,8 +175,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), mapURProgramInfoToCL(propName), propSize, pPropValue, pPropSizeRet)); @@ -196,12 +182,9 @@ urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t hContext, - ur_program_handle_t hProgram, - const char *pOptions) { - - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); +UR_APIEXPORT ur_result_t UR_APICALL +urProgramBuild([[maybe_unused]] ur_context_handle_t hContext, + ur_program_handle_t hProgram, const char *pOptions) { std::unique_ptr> DevicesInProgram; CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); @@ -217,10 +200,6 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count, const ur_program_handle_t *phPrograms, const char *pOptions, ur_program_handle_t *phProgram) { - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(phPrograms, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(phProgram, UR_RESULT_ERROR_INVALID_NULL_POINTER); - cl_int CLResult; *phProgram = cl_adapter::cast( clLinkProgram(cl_adapter::cast(hContext), 0, nullptr, @@ -253,9 +232,6 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, ur_program_build_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - CL_RETURN_ON_FAILURE(clGetProgramBuildInfo( cl_adapter::cast(hProgram), cl_adapter::cast(hDevice), @@ -266,7 +242,6 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, UR_APIEXPORT ur_result_t UR_APICALL urProgramRetain(ur_program_handle_t hProgram) { - UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); CL_RETURN_ON_FAILURE(clRetainProgram(cl_adapter::cast(hProgram))); return UR_RESULT_SUCCESS; @@ -274,7 +249,6 @@ urProgramRetain(ur_program_handle_t hProgram) { UR_APIEXPORT ur_result_t UR_APICALL urProgramRelease(ur_program_handle_t hProgram) { - UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); CL_RETURN_ON_FAILURE( clReleaseProgram(cl_adapter::cast(hProgram))); @@ -284,18 +258,13 @@ urProgramRelease(ur_program_handle_t hProgram) { UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( ur_program_handle_t hProgram, ur_native_handle_t *phNativeProgram) { - UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(phNativeProgram, UR_RESULT_ERROR_INVALID_NULL_POINTER); - *phNativeProgram = reinterpret_cast(hProgram); - return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( ur_native_handle_t hNativeProgram, ur_context_handle_t, const ur_program_native_properties_t *, ur_program_handle_t *phProgram) { - UR_ASSERT(hNativeProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE); *phProgram = reinterpret_cast(hNativeProgram); return UR_RESULT_SUCCESS; diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp index 04a8632739fdb..f0ba5092a4ed3 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp @@ -54,8 +54,6 @@ convertURQueuePropertiesToCL(const ur_queue_properties_t *URQueueProperties) { UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_properties_t *pProperties, ur_queue_handle_t *phQueue) { - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); cl_platform_id CurPlatform; CL_RETURN_ON_FAILURE_AND_SET_NULL( @@ -104,8 +102,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - if (propName == UR_QUEUE_INFO_EMPTY) { // OpenCL doesn't provide API to check the status of the queue. return UR_RESULT_ERROR_INVALID_VALUE; @@ -127,14 +123,12 @@ urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *, } UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( - ur_native_handle_t hNativeQueue, ur_context_handle_t hContext, - ur_device_handle_t hDevice, const ur_queue_native_properties_t *pProperties, + ur_native_handle_t hNativeQueue, + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] const ur_queue_native_properties_t *pProperties, ur_queue_handle_t *phQueue) { - UR_ASSERT(hNativeQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(phQueue, UR_RESULT_ERROR_INVALID_NULL_POINTER); - (void)hContext; - (void)hDevice; - (void)pProperties; + *phQueue = reinterpret_cast(hNativeQueue); cl_int RetErr = clRetainCommandQueue(cl_adapter::cast(hNativeQueue)); @@ -143,21 +137,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); cl_int RetErr = clFinish(cl_adapter::cast(hQueue)); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); cl_int RetErr = clFinish(cl_adapter::cast(hQueue)); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); cl_int RetErr = clRetainCommandQueue(cl_adapter::cast(hQueue)); CL_RETURN_ON_FAILURE(RetErr); @@ -165,7 +156,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { } UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); cl_int RetErr = clReleaseCommandQueue(cl_adapter::cast(hQueue)); CL_RETURN_ON_FAILURE(RetErr); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp index 049d9377a1afe..1201974f88f25 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/sampler.cpp @@ -133,9 +133,6 @@ void cl2URSamplerInfoValue(cl_sampler_info Info, void *InfoValue) { ur_result_t urSamplerCreate(ur_context_handle_t hContext, const ur_sampler_desc_t *pDesc, ur_sampler_handle_t *phSampler) { - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pDesc, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(phSampler, UR_RESULT_ERROR_INVALID_NULL_POINTER); // Initialize properties according to OpenCL 2.1 spec. ur_result_t ErrorCode; @@ -155,8 +152,6 @@ ur_result_t urSamplerCreate(ur_context_handle_t hContext, UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UR_ASSERT(hSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pPropValue || pPropSizeRet, UR_RESULT_ERROR_INVALID_VALUE); cl_sampler_info SamplerInfo = ur2CLSamplerInfo(propName); static_assert(sizeof(cl_addressing_mode) == @@ -175,23 +170,18 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, UR_APIEXPORT ur_result_t UR_APICALL urSamplerRetain(ur_sampler_handle_t hSampler) { - UR_ASSERT(hSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE); return mapCLErrorToUR( clRetainSampler(cl_adapter::cast(hSampler))); } UR_APIEXPORT ur_result_t UR_APICALL urSamplerRelease(ur_sampler_handle_t hSampler) { - UR_ASSERT(hSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE); return mapCLErrorToUR( clReleaseSampler(cl_adapter::cast(hSampler))); } UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( ur_sampler_handle_t hSampler, ur_native_handle_t *phNativeSampler) { - UR_ASSERT(hSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(phNativeSampler, UR_RESULT_ERROR_INVALID_NULL_POINTER); - *phNativeSampler = reinterpret_cast( cl_adapter::cast(hSampler)); return UR_RESULT_SUCCESS; @@ -200,9 +190,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( ur_native_handle_t hNativeSampler, ur_context_handle_t, const ur_sampler_native_properties_t *, ur_sampler_handle_t *phSampler) { - UR_ASSERT(hNativeSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(phSampler, UR_RESULT_ERROR_INVALID_NULL_POINTER); - *phSampler = reinterpret_cast( cl_adapter::cast(hNativeSampler)); return UR_RESULT_SUCCESS; diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index 952abc9c130fe..84962edc187d0 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -41,7 +41,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( pDdiTable->pfnGetApiVersion = urPlatformGetApiVersion; pDdiTable->pfnGetInfo = urPlatformGetInfo; pDdiTable->pfnGetNativeHandle = urPlatformGetNativeHandle; - pDdiTable->pfnGetLastError = urPlatformGetLastError; pDdiTable->pfnGetBackendOption = urPlatformGetBackendOption; return UR_RESULT_SUCCESS; } @@ -200,6 +199,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( } pDdiTable->pfnInit = urInit; pDdiTable->pfnTearDown = urTearDown; + pDdiTable->pfnAdapterGet = urAdapterGet; + pDdiTable->pfnAdapterRelease = urAdapterRelease; + pDdiTable->pfnAdapterRetain = urAdapterRetain; + pDdiTable->pfnAdapterGetLastError = urAdapterGetLastError; + pDdiTable->pfnAdapterGetInfo = urAdapterGetInfo; return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp index 8a1b26bba9eac..59fd53b6831d0 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/usm.cpp @@ -11,8 +11,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); void *Ptr = nullptr; ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; @@ -63,9 +61,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); void *Ptr = nullptr; ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; @@ -119,9 +114,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); void *Ptr = nullptr; ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; @@ -181,8 +173,6 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, void *pMem) { - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); // Use a blocking free to avoid issues with indirect access from kernels that // might be still running. @@ -236,10 +226,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pDst, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(pSrc, UR_RESULT_ERROR_INVALID_NULL_POINTER); - // Have to look up the context from the kernel cl_context CLContext; cl_int CLErr = clGetCommandQueueInfo( @@ -266,14 +252,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - (void)pMem; - (void)size; - - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); + ur_queue_handle_t hQueue, [[maybe_unused]] const void *pMem, + [[maybe_unused]] size_t size, ur_usm_migration_flags_t flags, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { // flags is currently unused so fail if set if (flags != 0) @@ -313,15 +295,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( */ } -UR_APIEXPORT ur_result_t UR_APICALL -urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) { - (void)pMem; - (void)size; - (void)advice; - - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( + ur_queue_handle_t hQueue, [[maybe_unused]] const void *pMem, + [[maybe_unused]] size_t size, [[maybe_unused]] ur_usm_advice_flags_t advice, + ur_event_handle_t *phEvent) { return mapCLErrorToUR(clEnqueueMarkerWithWaitList( cl_adapter::cast(hQueue), 0, nullptr, @@ -382,8 +359,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, ur_usm_alloc_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); clGetMemAllocInfoINTEL_fn FuncPtr = nullptr; cl_context CLContext = cl_adapter::cast(hContext); From 9c9fbe0515e73885d2f9f3ec2b31d2f204c4989a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Fri, 18 Aug 2023 13:16:51 +0000 Subject: [PATCH 27/36] [SYCL][OpenCL] Remove Sycl and PI dependencies from the OpenCL adapter --- sycl/plugins/unified_runtime/CMakeLists.txt | 4 ++++ sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp | 5 ++--- sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp | 2 -- sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp | 3 --- sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp | 1 - sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp | 3 --- sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp | 2 -- sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp | 2 -- .../plugins/unified_runtime/ur/adapters/opencl/platform.cpp | 2 -- sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp | 6 +++--- sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp | 2 -- 11 files changed, 9 insertions(+), 23 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 38eb27eb143b1..8e5fb604998a3 100755 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -290,6 +290,10 @@ if ("opencl" IN_LIST SYCL_ENABLE_PLUGINS) OpenCL-ICD ) + # Suppress a compiler message about undefined CL_TARGET_OPENCL_VERSION. + # Define all symbols up to OpenCL 3.0. + target_compile_definitions(ur_adapter_opencl PRIVATE CL_TARGET_OPENCL_VERSION=300) + set_target_properties("ur_adapter_opencl" PROPERTIES VERSION "0.0.0" SOVERSION "0" diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp index 3161a8a3f6125..f78710d0dfea6 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/common.hpp @@ -7,13 +7,12 @@ //===-----------------------------------------------------------------===// #pragma once +#include +#include #include #include #include #include -#include -#include -#include #include /** diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp index 88531e8c5b543..ef678d0090fde 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.cpp @@ -8,8 +8,6 @@ #include "context.hpp" -#include - ur_result_t cl_adapter::getDevicesFromContext( ur_context_handle_t hContext, std::unique_ptr> &DevicesInCtx) { diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp index 2964ca20e7268..0581cd786539a 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/context.hpp @@ -9,9 +9,6 @@ #include "common.hpp" -#include -#include - namespace cl_adapter { ur_result_t getDevicesFromContext(ur_context_handle_t hContext, diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp index 46a08adfaddd7..73dc1bd1fb21f 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp @@ -11,7 +11,6 @@ #include "platform.hpp" #include -#include ur_result_t cl_adapter::getDeviceVersion(cl_device_id Dev, oclv::OpenCLVersion &Version) { diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp index 98ff0426a32da..ad5a1fa3b207d 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.hpp @@ -9,9 +9,6 @@ #include "common.hpp" -#include -#include - namespace cl_adapter { ur_result_t getDeviceVersion(cl_device_id Dev, oclv::OpenCLVersion &Version); diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp index 9024349cafb2a..e10f9e9437053 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp @@ -8,8 +8,6 @@ #include "common.hpp" -#include - cl_event_info convertUREventInfoToCL(const ur_event_info_t PropName) { switch (PropName) { case UR_EVENT_INFO_COMMAND_QUEUE: diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp index 5012c7fd65b7e..31484930f965d 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/memory.cpp @@ -8,8 +8,6 @@ #include "common.hpp" -#include - cl_image_format mapURImageFormatToCL(const ur_image_format_t *PImageFormat) { cl_image_format CLImageFormat; switch (PImageFormat->channelOrder) { diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp index d7e323e230a32..ebe7d32e4db40 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/platform.cpp @@ -8,8 +8,6 @@ #include "platform.hpp" -#include - ur_result_t cl_adapter::getPlatformVersion(cl_platform_id Plat, oclv::OpenCLVersion &Version) { diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp index c3502a4180041..d347e233ca73a 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp @@ -352,13 +352,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( *ppFunctionPointer = 0; size_t Size; CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), - PI_PROGRAM_INFO_KERNEL_NAMES, 0, - nullptr, &Size)); + CL_PROGRAM_KERNEL_NAMES, 0, nullptr, + &Size)); std::string KernelNames(Size, ' '); CL_RETURN_ON_FAILURE(clGetProgramInfo( - cl_adapter::cast(hProgram), PI_PROGRAM_INFO_KERNEL_NAMES, + cl_adapter::cast(hProgram), CL_PROGRAM_KERNEL_NAMES, KernelNames.size(), &KernelNames[0], nullptr)); // Get rid of the null terminator and search for the kernel name. If the diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp index f0ba5092a4ed3..70f5cb533b346 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp @@ -9,8 +9,6 @@ #include "common.hpp" #include "platform.hpp" -#include - cl_command_queue_info mapURQueueInfoToCL(const ur_queue_info_t PropName) { switch (PropName) { From 89a75d6defb94a883ae9d3cacb3233dd5b3b315e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Thu, 14 Sep 2023 17:25:07 +0100 Subject: [PATCH 28/36] Add support for urKernelSetArgLocal --- .../unified_runtime/ur/adapters/opencl/kernel.cpp | 11 +++++++++++ .../ur/adapters/opencl/ur_interface_loader.cpp | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp index 7cab7e0797b0f..8867be5f76727 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/kernel.cpp @@ -29,6 +29,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL +urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex, + size_t argSize, const ur_kernel_arg_local_properties_t *) { + + CL_RETURN_ON_FAILURE(clSetKernelArg(cl_adapter::cast(hKernel), + cl_adapter::cast(argIndex), + argSize, nullptr)); + + return UR_RESULT_SUCCESS; +} + static cl_int mapURKernelInfoToCL(ur_kernel_info_t URPropName) { switch (static_cast(URPropName)) { diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index 84962edc187d0..37f7b37121966 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -115,7 +115,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( pDdiTable->pfnGetSubGroupInfo = urKernelGetSubGroupInfo; pDdiTable->pfnRelease = urKernelRelease; pDdiTable->pfnRetain = urKernelRetain; - pDdiTable->pfnSetArgLocal = nullptr; + pDdiTable->pfnSetArgLocal = urKernelSetArgLocal; pDdiTable->pfnSetArgMemObj = urKernelSetArgMemObj; pDdiTable->pfnSetArgPointer = urKernelSetArgPointer; pDdiTable->pfnSetArgSampler = urKernelSetArgSampler; From b3ad97d7f4016a44d33d442908da0b806d0b462a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Fri, 15 Sep 2023 18:02:52 +0100 Subject: [PATCH 29/36] [SYCL][OpenCL] Fix urProgramGetBuildInfo --- .../ur/adapters/opencl/program.cpp | 43 ++++++++++++++++--- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp index d347e233ca73a..e219edb83f559 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/program.cpp @@ -227,17 +227,50 @@ static cl_int mapURProgramBuildInfoToCL(ur_program_build_info_t URPropName) { } } +static ur_program_binary_type_t +mapCLBinaryTypeToUR(cl_program_binary_type binaryType) { + switch (binaryType) { + case CL_PROGRAM_BINARY_TYPE_NONE: + return UR_PROGRAM_BINARY_TYPE_NONE; + case CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT: + return UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; + case CL_PROGRAM_BINARY_TYPE_LIBRARY: + return UR_PROGRAM_BINARY_TYPE_LIBRARY; + case CL_PROGRAM_BINARY_TYPE_EXECUTABLE: + return UR_PROGRAM_BINARY_TYPE_EXECUTABLE; + default: + return UR_PROGRAM_BINARY_TYPE_FORCE_UINT32; + } +} + UR_APIEXPORT ur_result_t UR_APICALL urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, ur_program_build_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - CL_RETURN_ON_FAILURE(clGetProgramBuildInfo( - cl_adapter::cast(hProgram), - cl_adapter::cast(hDevice), - mapURProgramBuildInfoToCL(propName), propSize, pPropValue, pPropSizeRet)); + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - return UR_RESULT_SUCCESS; + switch (propName) { + case UR_PROGRAM_BUILD_INFO_BINARY_TYPE: + cl_program_binary_type cl_value; + CL_RETURN_ON_FAILURE(clGetProgramBuildInfo( + cl_adapter::cast(hProgram), + cl_adapter::cast(hDevice), + mapURProgramBuildInfoToCL(propName), sizeof(cl_program_binary_type), + &cl_value, nullptr)); + return ReturnValue(mapCLBinaryTypeToUR(cl_value)); + case UR_PROGRAM_BUILD_INFO_LOG: + case UR_PROGRAM_BUILD_INFO_OPTIONS: + case UR_PROGRAM_BUILD_INFO_STATUS: + CL_RETURN_ON_FAILURE( + clGetProgramBuildInfo(cl_adapter::cast(hProgram), + cl_adapter::cast(hDevice), + mapURProgramBuildInfoToCL(propName), propSize, + pPropValue, pPropSizeRet)); + return UR_RESULT_SUCCESS; + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } } UR_APIEXPORT ur_result_t UR_APICALL From 77fc923c8f43f54767eec0a2baeba7835475a0a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Mon, 18 Sep 2023 13:09:23 +0100 Subject: [PATCH 30/36] [SYCL][OpenCL] Remove urInit and urTearDown. Fix naming of commandbuffer entrypoints --- .../ur/adapters/opencl/adapter.cpp | 26 ++++++++----------- .../ur/adapters/opencl/command_buffer.cpp | 14 +++++----- .../adapters/opencl/ur_interface_loader.cpp | 24 ++++++++--------- 3 files changed, 29 insertions(+), 35 deletions(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.cpp index 88684e597f465..19c7fdd08388d 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.cpp @@ -10,28 +10,20 @@ struct ur_adapter_handle_t_ { std::atomic RefCount = 0; + std::mutex Mutex; }; ur_adapter_handle_t_ adapter{}; -UR_APIEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t, - ur_loader_config_handle_t) { - cl_ext::ExtFuncPtrCache = new cl_ext::ExtFuncPtrCacheT(); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urTearDown(void *) { - if (cl_ext::ExtFuncPtrCache) { - delete cl_ext::ExtFuncPtrCache; - cl_ext::ExtFuncPtrCache = nullptr; - } - return UR_RESULT_SUCCESS; -} - UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, uint32_t *pNumAdapters) { if (NumEntries > 0 && phAdapters) { + std::lock_guard Lock{adapter.Mutex}; + if (adapter.RefCount++ == 0) { + cl_ext::ExtFuncPtrCache = new cl_ext::ExtFuncPtrCacheT(); + } + *phAdapters = &adapter; } @@ -48,7 +40,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { } UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { - --adapter.RefCount; + std::lock_guard Lock{adapter.Mutex}; + if (--adapter.RefCount == 0) { + delete cl_ext::ExtFuncPtrCache; + cl_ext::ExtFuncPtrCache = nullptr; + } return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp index 6a942823fcc05..d238964967647 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp @@ -63,7 +63,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] void *pDst, [[maybe_unused]] const void *pSrc, [[maybe_unused]] size_t size, @@ -77,7 +77,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] ur_mem_handle_t hSrcMem, [[maybe_unused]] ur_mem_handle_t hDstMem, [[maybe_unused]] size_t srcOffset, @@ -92,7 +92,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] ur_mem_handle_t hSrcMem, [[maybe_unused]] ur_mem_handle_t hDstMem, @@ -112,7 +112,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] size_t offset, [[maybe_unused]] size_t size, [[maybe_unused]] const void *pSrc, @@ -127,7 +127,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] size_t offset, [[maybe_unused]] size_t size, [[maybe_unused]] void *pDst, @@ -142,7 +142,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] ur_rect_offset_t bufferOffset, @@ -163,7 +163,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] ur_rect_offset_t bufferOffset, diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index 37f7b37121966..a4cd781e85d7b 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -197,8 +197,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( if (UR_RESULT_SUCCESS != Result) { return Result; } - pDdiTable->pfnInit = urInit; - pDdiTable->pfnTearDown = urTearDown; pDdiTable->pfnAdapterGet = urAdapterGet; pDdiTable->pfnAdapterRelease = urAdapterRelease; pDdiTable->pfnAdapterRetain = urAdapterRetain; @@ -283,17 +281,17 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp; pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; - pDdiTable->pfnAppendMemcpyUSMExp = urCommandBufferAppendMemcpyUSMExp; - pDdiTable->pfnAppendMembufferCopyExp = urCommandBufferAppendMembufferCopyExp; - pDdiTable->pfnAppendMembufferCopyRectExp = - urCommandBufferAppendMembufferCopyRectExp; - pDdiTable->pfnAppendMembufferReadExp = urCommandBufferAppendMembufferReadExp; - pDdiTable->pfnAppendMembufferReadRectExp = - urCommandBufferAppendMembufferReadRectExp; - pDdiTable->pfnAppendMembufferWriteExp = - urCommandBufferAppendMembufferWriteExp; - pDdiTable->pfnAppendMembufferWriteRectExp = - urCommandBufferAppendMembufferWriteRectExp; + pDdiTable->pfnAppendUSMMemcpyExp = urCommandBufferAppendUSMMemcpyExp; + pDdiTable->pfnAppendMemBufferCopyExp = urCommandBufferAppendMemBufferCopyExp; + pDdiTable->pfnAppendMemBufferCopyRectExp = + urCommandBufferAppendMemBufferCopyRectExp; + pDdiTable->pfnAppendMemBufferReadExp = urCommandBufferAppendMemBufferReadExp; + pDdiTable->pfnAppendMemBufferReadRectExp = + urCommandBufferAppendMemBufferReadRectExp; + pDdiTable->pfnAppendMemBufferWriteExp = + urCommandBufferAppendMemBufferWriteExp; + pDdiTable->pfnAppendMemBufferWriteRectExp = + urCommandBufferAppendMemBufferWriteRectExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; return retVal; From ff918776531fe30e94e22c7d8daac2cf509f73d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio?= Date: Mon, 18 Sep 2023 15:50:42 +0100 Subject: [PATCH 31/36] Update urQueueCreate to check for nullptr properties Co-authored-by: aarongreig --- sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp index 70f5cb533b346..8b5496e619768 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/queue.cpp @@ -61,7 +61,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( phQueue); cl_command_queue_properties CLProperties = - convertURQueuePropertiesToCL(pProperties); + pProperties ? convertURQueuePropertiesToCL(pProperties) : 0; // Properties supported by OpenCL backend. const cl_command_queue_properties SupportByOpenCL = From be9697bba08150f6aaa2d294ab3e37919c6dba87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Tue, 19 Sep 2023 11:45:09 +0100 Subject: [PATCH 32/36] [SYCL][OpenCL] Port PI_QUEUED changes from PI to UR --- .../ur/adapters/opencl/event.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp index e10f9e9437053..744a6da54bad5 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/event.cpp @@ -92,6 +92,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, clGetEventInfo(cl_adapter::cast(hEvent), CLEventInfo, propSize, pPropValue, pPropSizeRet); CL_RETURN_ON_FAILURE(RetErr); + + if (RetErr == CL_SUCCESS && + propName == UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) { + /* If the CL_EVENT_COMMAND_EXECUTION_STATUS info value is CL_QUEUED, change + * it to CL_SUBMITTED. sycl::info::event::event_command_status has no + * equivalent to CL_QUEUED. + * + * FIXME UR Port: This should not be part of the UR adapter. Since PI_QUEUED + * exists, SYCL RT should be changed to handle this situation. In addition, + * SYCL RT is relying on PI_QUEUED status to make sure that the queues are + * flushed. */ + const auto param_value_int = static_cast(pPropValue); + if (*param_value_int == UR_EVENT_STATUS_QUEUED) { + *param_value_int = UR_EVENT_STATUS_SUBMITTED; + } + } + return UR_RESULT_SUCCESS; } From 3a0a7c589b2f85b74a32912f3bb73e8f44e5d1b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Tue, 19 Sep 2023 16:11:17 +0100 Subject: [PATCH 33/36] [SYCL][OpenCL] Port ESIMD support query from PI to UR --- .../ur/adapters/opencl/device.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp index 73dc1bd1fb21f..3fc6f5d491466 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/device.cpp @@ -743,6 +743,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(Supported); } + case UR_DEVICE_INFO_ESIMD_SUPPORT: { + bool Supported = false; + cl_device_type DevType = CL_DEVICE_TYPE_DEFAULT; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, + sizeof(cl_device_type), &DevType, nullptr)); + + cl_uint VendorID = 0; + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl_adapter::cast(hDevice), CL_DEVICE_VENDOR_ID, + sizeof(VendorID), &VendorID, nullptr)); + + /* ESIMD is only supported by Intel GPUs. */ + Supported = DevType == CL_DEVICE_TYPE_GPU && VendorID == 0x8086; + + return ReturnValue(Supported); + } case UR_DEVICE_INFO_QUEUE_PROPERTIES: case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: From 958b75ea90b221a6ae10f4c7da9d9fe565b9dba4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Thu, 12 Oct 2023 17:32:57 +0100 Subject: [PATCH 34/36] Revert "[SYCL][OpenCL] Remove urInit and urTearDown. Fix naming of commandbuffer entrypoints" This reverts commit 77fc923c8f43f54767eec0a2baeba7835475a0a0. --- .../ur/adapters/opencl/adapter.cpp | 26 +++++++++++-------- .../ur/adapters/opencl/command_buffer.cpp | 14 +++++----- .../adapters/opencl/ur_interface_loader.cpp | 24 +++++++++-------- 3 files changed, 35 insertions(+), 29 deletions(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.cpp index 19c7fdd08388d..88684e597f465 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/adapter.cpp @@ -10,20 +10,28 @@ struct ur_adapter_handle_t_ { std::atomic RefCount = 0; - std::mutex Mutex; }; ur_adapter_handle_t_ adapter{}; +UR_APIEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t, + ur_loader_config_handle_t) { + cl_ext::ExtFuncPtrCache = new cl_ext::ExtFuncPtrCacheT(); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urTearDown(void *) { + if (cl_ext::ExtFuncPtrCache) { + delete cl_ext::ExtFuncPtrCache; + cl_ext::ExtFuncPtrCache = nullptr; + } + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, uint32_t *pNumAdapters) { if (NumEntries > 0 && phAdapters) { - std::lock_guard Lock{adapter.Mutex}; - if (adapter.RefCount++ == 0) { - cl_ext::ExtFuncPtrCache = new cl_ext::ExtFuncPtrCacheT(); - } - *phAdapters = &adapter; } @@ -40,11 +48,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { } UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { - std::lock_guard Lock{adapter.Mutex}; - if (--adapter.RefCount == 0) { - delete cl_ext::ExtFuncPtrCache; - cl_ext::ExtFuncPtrCache = nullptr; - } + --adapter.RefCount; return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp index d238964967647..6a942823fcc05 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/command_buffer.cpp @@ -63,7 +63,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] void *pDst, [[maybe_unused]] const void *pSrc, [[maybe_unused]] size_t size, @@ -77,7 +77,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] ur_mem_handle_t hSrcMem, [[maybe_unused]] ur_mem_handle_t hDstMem, [[maybe_unused]] size_t srcOffset, @@ -92,7 +92,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] ur_mem_handle_t hSrcMem, [[maybe_unused]] ur_mem_handle_t hDstMem, @@ -112,7 +112,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( +ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] size_t offset, [[maybe_unused]] size_t size, [[maybe_unused]] const void *pSrc, @@ -127,7 +127,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( +ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] size_t offset, [[maybe_unused]] size_t size, [[maybe_unused]] void *pDst, @@ -142,7 +142,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] ur_rect_offset_t bufferOffset, @@ -163,7 +163,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( } UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] ur_rect_offset_t bufferOffset, diff --git a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp index a4cd781e85d7b..37f7b37121966 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/opencl/ur_interface_loader.cpp @@ -197,6 +197,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( if (UR_RESULT_SUCCESS != Result) { return Result; } + pDdiTable->pfnInit = urInit; + pDdiTable->pfnTearDown = urTearDown; pDdiTable->pfnAdapterGet = urAdapterGet; pDdiTable->pfnAdapterRelease = urAdapterRelease; pDdiTable->pfnAdapterRetain = urAdapterRetain; @@ -281,17 +283,17 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp; pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; - pDdiTable->pfnAppendUSMMemcpyExp = urCommandBufferAppendUSMMemcpyExp; - pDdiTable->pfnAppendMemBufferCopyExp = urCommandBufferAppendMemBufferCopyExp; - pDdiTable->pfnAppendMemBufferCopyRectExp = - urCommandBufferAppendMemBufferCopyRectExp; - pDdiTable->pfnAppendMemBufferReadExp = urCommandBufferAppendMemBufferReadExp; - pDdiTable->pfnAppendMemBufferReadRectExp = - urCommandBufferAppendMemBufferReadRectExp; - pDdiTable->pfnAppendMemBufferWriteExp = - urCommandBufferAppendMemBufferWriteExp; - pDdiTable->pfnAppendMemBufferWriteRectExp = - urCommandBufferAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendMemcpyUSMExp = urCommandBufferAppendMemcpyUSMExp; + pDdiTable->pfnAppendMembufferCopyExp = urCommandBufferAppendMembufferCopyExp; + pDdiTable->pfnAppendMembufferCopyRectExp = + urCommandBufferAppendMembufferCopyRectExp; + pDdiTable->pfnAppendMembufferReadExp = urCommandBufferAppendMembufferReadExp; + pDdiTable->pfnAppendMembufferReadRectExp = + urCommandBufferAppendMembufferReadRectExp; + pDdiTable->pfnAppendMembufferWriteExp = + urCommandBufferAppendMembufferWriteExp; + pDdiTable->pfnAppendMembufferWriteRectExp = + urCommandBufferAppendMembufferWriteRectExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; return retVal; From 68378a9900ac5dd5f0eb955b274fd1a582775c0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Thu, 12 Oct 2023 18:41:18 +0100 Subject: [PATCH 35/36] Add if condition to level-zero --- sycl/plugins/unified_runtime/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index f5914563c0aaf..fc82f3357efd8 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -6,7 +6,9 @@ if (NOT DEFINED UNIFIED_RUNTIME_LIBRARY OR NOT DEFINED UNIFIED_RUNTIME_INCLUDE_D set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") set(UNIFIED_RUNTIME_TAG e6343f4cca9a37b17bc63f3a81968ac3f486be8a) - set(UR_BUILD_ADAPTER_L0 ON) + if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) + set(UR_BUILD_ADAPTER_L0 ON) + endif() if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS) set(UR_BUILD_ADAPTER_CUDA ON) From 8279d9e4371484fa371608d9b5e140bf42d904af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Fri, 13 Oct 2023 00:23:26 +0100 Subject: [PATCH 36/36] Fix bug introduced during merge conflict --- sycl/plugins/unified_runtime/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 4186229e203a2..e04e77dd6b4bb 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -102,7 +102,7 @@ endif() add_sycl_plugin(unified_runtime ${UNIFIED_RUNTIME_PLUGIN_ARGS}) -if("level-zero" IN_LIST SYCL_ENABLE_PLUGINS) +if("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) add_dependencies(sycl-runtime-libraries ur_adapter_level_zero) endif()