diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 8d7ca1a3af17f..b6ae50eb70888 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -389,6 +389,7 @@ set( SYCL_TOOLCHAIN_DEPLOY_COMPONENTS sycl libsycldevice level-zero-sycl-dev + unified-runtime-sycl-dev ${XPTIFW_LIBS} ${SYCL_TOOLCHAIN_DEPS} ) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index adba793192c14..527ce78a2b183 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -593,7 +593,7 @@ pi_result _pi_context::initialize() { // Prefer to use copy engine for initialization copies, // if available and allowed (main copy engine with index 0). ZeStruct ZeCommandQueueDesc; - const auto &Range = getRangeOfAllowedCopyEngines((zer_device_handle_t)Device); + const auto &Range = getRangeOfAllowedCopyEngines((ur_device_handle_t)Device); ZeCommandQueueDesc.ordinal = Device->QueueGroup[_pi_device::queue_group_info_t::Compute].ZeOrdinal; if (Range.first >= 0 && @@ -932,7 +932,7 @@ _pi_queue::_pi_queue(std::vector &ComputeQueues, // Copy group initialization. pi_queue_group_t CopyQueueGroup{this, queue_type::MainCopy}; - const auto &Range = getRangeOfAllowedCopyEngines((zer_device_handle_t)Device); + const auto &Range = getRangeOfAllowedCopyEngines((ur_device_handle_t)Device); if (Range.first < 0 || Range.second < 0) { // We are asked not to use copy engines, just do nothing. // Leave CopyQueueGroup.ZeQueues empty, and it won't be used. @@ -7801,7 +7801,7 @@ pi_result piextEnqueueDeviceGlobalVariableWrite( (Program->ZeModule, Name, &GlobalVarSize, &GlobalVarPtr)); if (GlobalVarSize < Offset + Count) { setErrorMessage("Write device global variable is out of range.", - ZER_RESULT_INVALID_VALUE); + UR_RESULT_ERROR_INVALID_VALUE); return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } @@ -7846,7 +7846,7 @@ pi_result piextEnqueueDeviceGlobalVariableRead( (Program->ZeModule, Name, &GlobalVarSize, &GlobalVarPtr)); if (GlobalVarSize < Offset + Count) { setErrorMessage("Read from device global variable is out of range.", - ZER_RESULT_INVALID_VALUE); + UR_RESULT_ERROR_INVALID_VALUE); return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } diff --git a/sycl/plugins/level_zero/ur_bindings.hpp b/sycl/plugins/level_zero/ur_bindings.hpp index 1f92bea523246..9d72e579a070f 100755 --- a/sycl/plugins/level_zero/ur_bindings.hpp +++ b/sycl/plugins/level_zero/ur_bindings.hpp @@ -8,14 +8,14 @@ #pragma once #include "pi_level_zero.hpp" -#include +#include // Make the Unified Runtime handles definition complete. // This is used in various "create" API where new handles are allocated. -struct _zer_platform_handle_t : public _pi_platform { +struct ur_platform_handle_t_ : public _pi_platform { using _pi_platform::_pi_platform; }; -struct _zer_device_handle_t : public _pi_device { +struct ur_device_handle_t_ : public _pi_device { using _pi_device::_pi_device; }; diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index e4fe9c1c14a60..cd84552674a24 100755 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -1,10 +1,10 @@ # PI Unified Runtime plugin library # -if (NOT DEFINED UNIFIED_RUNTIME_INCLUDE_DIR) +if (NOT DEFINED UNIFIED_RUNTIME_LIBRARY OR NOT DEFINED UNIFIED_RUNTIME_INCLUDE_DIR) include(FetchContent) set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - set(UNIFIED_RUNTIME_TAG fd711c920acc4434cb52ff18b078c082d9d7f44d) + set(UNIFIED_RUNTIME_TAG 61cb864b3a73918124bff11f9ad58dafb8c7769a) message(STATUS "Will fetch Unified Runtime from ${UNIFIED_RUNTIME_REPO}") FetchContent_Declare(unified-runtime @@ -12,20 +12,55 @@ if (NOT DEFINED UNIFIED_RUNTIME_INCLUDE_DIR) GIT_TAG ${UNIFIED_RUNTIME_TAG} ) - FetchContent_MakeAvailable(unified-runtime) + # Disable some compilation options to avoid errors while building the UR + if (UNIX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-but-set-variable") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-pedantic") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-stringop-truncation") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-suggest-override") + endif() + FetchContent_GetProperties(unified-runtime) + FetchContent_MakeAvailable(unified-runtime) + + add_library(${PROJECT_NAME}::ur_loader ALIAS loader) + + set(UNIFIED_RUNTIME_LIBRARY ${PROJECT_NAME}::ur_loader) set(UNIFIED_RUNTIME_SOURCE_DIR ${unified-runtime_SOURCE_DIR} CACHE PATH "Path to Unified Runtime Headers") + set(UNIFIED_RUNTIME_LOADER_SOURCE_DIR + ${unified-runtime_SOURCE_DIR}/source/loader CACHE PATH "Path to Unified Runtime Loader Headers") set(UNIFIED_RUNTIME_INCLUDE_DIR "${UNIFIED_RUNTIME_SOURCE_DIR}/include") endif() +add_library (UnifiedRuntimeLoader INTERFACE) +get_filename_component(UNIFIED_RUNTIME_LIBRARY_SRC "${UNIFIED_RUNTIME_LOADER_SOURCE_DIR}" DIRECTORY) +get_filename_component(UNIFIED_RUNTIME_LIB_NAME "${UNIFIED_RUNTIME_LIBRARY}" NAME) + +message(STATUS "UNIFIED_RUNTIME_LIB_NAME ${UNIFIED_RUNTIME_LIB_NAME}") + add_library (UnifiedRuntime-Headers INTERFACE) +add_library (UnifiedRuntimeLoader-Headers INTERFACE) + target_include_directories(UnifiedRuntime-Headers INTERFACE "${UNIFIED_RUNTIME_INCLUDE_DIR}" ) +target_include_directories(UnifiedRuntimeLoader-Headers + INTERFACE "${UNIFIED_RUNTIME_INCLUDE_DIR}" +) + +target_include_directories(UnifiedRuntimeLoader + INTERFACE "${UNIFIED_RUNTIME_INCLUDE_DIR}" +) + +target_link_libraries(UnifiedRuntimeLoader + INTERFACE "${UNIFIED_RUNTIME_LIB_NAME}" +) + find_package(Threads REQUIRED) # @@ -52,9 +87,27 @@ add_sycl_plugin(unified_runtime # These below belong to Unified Runtime PI Plugin only "pi_unified_runtime.hpp" "pi_unified_runtime.cpp" - LIBRARIES + LIBRARIES Threads::Threads UnifiedRuntime-Headers + UnifiedRuntimeLoader-Headers + UnifiedRuntimeLoader LevelZeroLoader-Headers LevelZeroLoader ) + +if (TARGET ${PROJECT_NAME}::ur_loader) + set_target_properties(hello_world PROPERTIES EXCLUDE_FROM_ALL 1 EXCLUDE_FROM_DEFAULT_BUILD 1) + set_target_properties(ur_null PROPERTIES EXCLUDE_FROM_ALL 1 EXCLUDE_FROM_DEFAULT_BUILD 1) + install(TARGETS loader + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT unified-runtime-sycl-dev + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT unified-runtime-sycl-dev + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT unified-runtime-sycl-dev + ) + file(GLOB UNIFIED_RUNTIME_API_HEADERS "${UNIFIED_RUNTIME_INCLUDE_DIR}/*.h") + install(FILES ${UNIFIED_RUNTIME_API_HEADERS} + DESTINATION ${CMAKE_INSTALL_PREFIX}/include/sycl/unified_runtime/ + COMPONENT unified-runtime-sycl-dev + ) +endif() + diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 224589f482578..7f23e87e7f56d 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -9,28 +9,29 @@ #include -#include "zer_api.h" +#include "ur_api.h" #include #include // Map of UR error codes to PI error codes -static pi_result ur2piResult(zer_result_t urResult) { - std::unordered_map ErrorMapping = { - {ZER_RESULT_SUCCESS, PI_SUCCESS}, - {ZER_RESULT_ERROR_UNKNOWN, PI_ERROR_UNKNOWN}, - {ZER_RESULT_ERROR_DEVICE_LOST, PI_ERROR_DEVICE_NOT_FOUND}, - {ZER_RESULT_INVALID_OPERATION, PI_ERROR_INVALID_OPERATION}, - {ZER_RESULT_INVALID_PLATFORM, PI_ERROR_INVALID_PLATFORM}, - {ZER_RESULT_ERROR_INVALID_ARGUMENT, PI_ERROR_INVALID_ARG_VALUE}, - {ZER_RESULT_INVALID_VALUE, PI_ERROR_INVALID_VALUE}, - {ZER_RESULT_INVALID_EVENT, PI_ERROR_INVALID_EVENT}, - {ZER_RESULT_INVALID_BINARY, PI_ERROR_INVALID_BINARY}, - {ZER_RESULT_INVALID_KERNEL_NAME, PI_ERROR_INVALID_KERNEL_NAME}, - {ZER_RESULT_ERROR_INVALID_FUNCTION_NAME, PI_ERROR_BUILD_PROGRAM_FAILURE}, - {ZER_RESULT_INVALID_WORK_GROUP_SIZE, PI_ERROR_INVALID_WORK_GROUP_SIZE}, - {ZER_RESULT_ERROR_MODULE_BUILD_FAILURE, PI_ERROR_BUILD_PROGRAM_FAILURE}, - {ZER_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, PI_ERROR_OUT_OF_RESOURCES}, - {ZER_RESULT_ERROR_OUT_OF_HOST_MEMORY, PI_ERROR_OUT_OF_HOST_MEMORY}}; +static pi_result ur2piResult(ur_result_t urResult) { + std::unordered_map ErrorMapping = { + {UR_RESULT_SUCCESS, PI_SUCCESS}, + {UR_RESULT_ERROR_UNKNOWN, PI_ERROR_UNKNOWN}, + {UR_RESULT_ERROR_DEVICE_LOST, PI_ERROR_DEVICE_NOT_FOUND}, + {UR_RESULT_ERROR_INVALID_OPERATION, PI_ERROR_INVALID_OPERATION}, + {UR_RESULT_ERROR_INVALID_PLATFORM, PI_ERROR_INVALID_PLATFORM}, + {UR_RESULT_ERROR_INVALID_ARGUMENT, PI_ERROR_INVALID_ARG_VALUE}, + {UR_RESULT_ERROR_INVALID_VALUE, PI_ERROR_INVALID_VALUE}, + {UR_RESULT_ERROR_INVALID_EVENT, PI_ERROR_INVALID_EVENT}, + {UR_RESULT_ERROR_INVALID_BINARY, PI_ERROR_INVALID_BINARY}, + {UR_RESULT_ERROR_INVALID_KERNEL_NAME, PI_ERROR_INVALID_KERNEL_NAME}, + {UR_RESULT_ERROR_INVALID_FUNCTION_NAME, PI_ERROR_BUILD_PROGRAM_FAILURE}, + {UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE, + PI_ERROR_INVALID_WORK_GROUP_SIZE}, + {UR_RESULT_ERROR_MODULE_BUILD_FAILURE, PI_ERROR_BUILD_PROGRAM_FAILURE}, + {UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, PI_ERROR_OUT_OF_RESOURCES}, + {UR_RESULT_ERROR_OUT_OF_HOST_MEMORY, PI_ERROR_OUT_OF_HOST_MEMORY}}; auto It = ErrorMapping.find(urResult); if (It == ErrorMapping.end()) { @@ -44,7 +45,7 @@ static pi_result ur2piResult(zer_result_t urResult) { if (auto Result = urCall) \ return ur2piResult(Result); -// A version of return helper that returns pi_result and not zer_result_t +// A version of return helper that returns pi_result and not ur_result_t class ReturnHelper : public UrReturnHelper { public: using UrReturnHelper::UrReturnHelper; @@ -161,69 +162,69 @@ class ConvertHelper : public ReturnHelper { }; // Translate UR info values to PI info values -inline pi_result ur2piInfoValue(zer_device_info_t ParamName, +inline pi_result ur2piInfoValue(ur_device_info_t ParamName, size_t ParamValueSizePI, size_t *ParamValueSizeUR, void *ParamValue) { ConvertHelper Value(ParamValueSizePI, ParamValue, ParamValueSizeUR); - if (ParamName == ZER_DEVICE_INFO_TYPE) { - static std::unordered_map Map = { - {ZER_DEVICE_TYPE_CPU, PI_DEVICE_TYPE_CPU}, - {ZER_DEVICE_TYPE_GPU, PI_DEVICE_TYPE_GPU}, - {ZER_DEVICE_TYPE_FPGA, PI_DEVICE_TYPE_ACC}, + if (ParamName == UR_DEVICE_INFO_TYPE) { + static std::unordered_map Map = { + {UR_DEVICE_TYPE_CPU, PI_DEVICE_TYPE_CPU}, + {UR_DEVICE_TYPE_GPU, PI_DEVICE_TYPE_GPU}, + {UR_DEVICE_TYPE_FPGA, PI_DEVICE_TYPE_ACC}, }; return Value.convert(Map); - } else if (ParamName == ZER_DEVICE_INFO_QUEUE_PROPERTIES) { - static std::unordered_map Map = { - {ZER_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE, + } else if (ParamName == UR_DEVICE_INFO_QUEUE_PROPERTIES) { + static std::unordered_map Map = { + {UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE, PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE}, - {ZER_QUEUE_FLAG_PROFILING_ENABLE, PI_QUEUE_FLAG_PROFILING_ENABLE}, - {ZER_QUEUE_FLAG_ON_DEVICE, PI_QUEUE_FLAG_ON_DEVICE}, - {ZER_QUEUE_FLAG_ON_DEVICE_DEFAULT, PI_QUEUE_FLAG_ON_DEVICE_DEFAULT}, + {UR_QUEUE_FLAG_PROFILING_ENABLE, PI_QUEUE_FLAG_PROFILING_ENABLE}, + {UR_QUEUE_FLAG_ON_DEVICE, PI_QUEUE_FLAG_ON_DEVICE}, + {UR_QUEUE_FLAG_ON_DEVICE_DEFAULT, PI_QUEUE_FLAG_ON_DEVICE_DEFAULT}, }; return Value.convertBitSet(Map); - } else if (ParamName == ZER_DEVICE_INFO_EXECUTION_CAPABILITIES) { - static std::unordered_map Map = { - {ZER_DEVICE_EXEC_CAPABILITY_FLAG_KERNEL, + {UR_DEVICE_EXEC_CAPABILITY_FLAG_KERNEL, PI_DEVICE_EXEC_CAPABILITIES_KERNEL}, - {ZER_DEVICE_EXEC_CAPABILITY_FLAG_NATIVE_KERNEL, + {UR_DEVICE_EXEC_CAPABILITY_FLAG_NATIVE_KERNEL, PI_DEVICE_EXEC_CAPABILITIES_NATIVE_KERNEL}, }; return Value.convertBitSet(Map); - } else if (ParamName == ZER_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) { - static std::unordered_map Map = { - {ZER_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA, + {UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA, PI_DEVICE_AFFINITY_DOMAIN_NUMA}, - {ZER_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE, + {UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE, PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE}, }; return Value.convertBitSet(Map); - } else if (ParamName == ZER_DEVICE_INFO_PARTITION_TYPE) { - static std::unordered_map Map = { - {ZER_DEVICE_PARTITION_PROPERTY_FLAG_BY_AFFINITY_DOMAIN, + {UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN}, - {ZER_EXT_DEVICE_PARTITION_PROPERTY_FLAG_BY_CSLICE, + {UR_EXT_DEVICE_PARTITION_PROPERTY_FLAG_BY_CSLICE, PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE}, - {(zer_device_partition_property_flag_t) - ZER_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE, + {(ur_device_partition_property_t) + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE, (pi_device_partition_property) PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE}, }; return Value.convertArray(Map); - } else if (ParamName == ZER_DEVICE_INFO_PARTITION_PROPERTIES) { - static std::unordered_map Map = { - {ZER_DEVICE_PARTITION_PROPERTY_FLAG_BY_AFFINITY_DOMAIN, + {UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN}, - {ZER_EXT_DEVICE_PARTITION_PROPERTY_FLAG_BY_CSLICE, + {UR_EXT_DEVICE_PARTITION_PROPERTY_FLAG_BY_CSLICE, PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE}, }; return Value.convertArray(Map); @@ -241,14 +242,9 @@ namespace pi2ur { inline pi_result piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, pi_uint32 *num_platforms) { - // https://spec.oneapi.io/unified-runtime/latest/core/api.html#zerplatformget - uint32_t Count = num_entries; - auto phPlatforms = reinterpret_cast(platforms); - HANDLE_ERRORS(zerPlatformGet(&Count, phPlatforms)); - if (num_platforms) { - *num_platforms = Count; - } + auto phPlatforms = reinterpret_cast(platforms); + HANDLE_ERRORS(urPlatformGet(Count, phPlatforms, num_platforms)); return PI_SUCCESS; } @@ -257,13 +253,13 @@ inline pi_result piPlatformGetInfo(pi_platform platform, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) { - static std::unordered_map InfoMapping = + static std::unordered_map InfoMapping = { - {PI_PLATFORM_INFO_EXTENSIONS, ZER_PLATFORM_INFO_NAME}, - {PI_PLATFORM_INFO_NAME, ZER_PLATFORM_INFO_NAME}, - {PI_PLATFORM_INFO_PROFILE, ZER_PLATFORM_INFO_PROFILE}, - {PI_PLATFORM_INFO_VENDOR, ZER_PLATFORM_INFO_VENDOR_NAME}, - {PI_PLATFORM_INFO_VERSION, ZER_PLATFORM_INFO_VERSION}, + {PI_PLATFORM_INFO_EXTENSIONS, UR_PLATFORM_INFO_NAME}, + {PI_PLATFORM_INFO_NAME, UR_PLATFORM_INFO_NAME}, + {PI_PLATFORM_INFO_PROFILE, UR_PLATFORM_INFO_PROFILE}, + {PI_PLATFORM_INFO_VENDOR, UR_PLATFORM_INFO_VENDOR_NAME}, + {PI_PLATFORM_INFO_VERSION, UR_PLATFORM_INFO_VERSION}, }; auto InfoType = InfoMapping.find(ParamName); @@ -272,12 +268,9 @@ inline pi_result piPlatformGetInfo(pi_platform platform, } size_t SizeInOut = ParamValueSize; - auto hPlatform = reinterpret_cast(platform); - HANDLE_ERRORS( - zerPlatformGetInfo(hPlatform, InfoType->second, &SizeInOut, ParamValue)); - if (ParamValueSizeRet) { - *ParamValueSizeRet = SizeInOut; - } + auto hPlatform = reinterpret_cast(platform); + HANDLE_ERRORS(urPlatformGetInfo(hPlatform, InfoType->second, SizeInOut, + ParamValue, ParamValueSizeRet)); return PI_SUCCESS; } @@ -285,11 +278,11 @@ inline pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, pi_uint32 NumEntries, pi_device *Devices, pi_uint32 *NumDevices) { - static std::unordered_map TypeMapping = { - {PI_DEVICE_TYPE_ALL, ZER_DEVICE_TYPE_ALL}, - {PI_DEVICE_TYPE_GPU, ZER_DEVICE_TYPE_GPU}, - {PI_DEVICE_TYPE_CPU, ZER_DEVICE_TYPE_CPU}, - {PI_DEVICE_TYPE_ACC, ZER_DEVICE_TYPE_FPGA}, + static std::unordered_map TypeMapping = { + {PI_DEVICE_TYPE_ALL, UR_DEVICE_TYPE_ALL}, + {PI_DEVICE_TYPE_GPU, UR_DEVICE_TYPE_GPU}, + {PI_DEVICE_TYPE_CPU, UR_DEVICE_TYPE_CPU}, + {PI_DEVICE_TYPE_ACC, UR_DEVICE_TYPE_FPGA}, }; auto Type = TypeMapping.find(DeviceType); @@ -298,196 +291,190 @@ inline pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, } uint32_t Count = NumEntries; - auto hPlatform = reinterpret_cast(Platform); - auto phDevices = reinterpret_cast(Devices); - HANDLE_ERRORS(zerDeviceGet(hPlatform, Type->second, &Count, phDevices)); - if (NumDevices) { - *NumDevices = Count; - } + auto hPlatform = reinterpret_cast(Platform); + auto phDevices = reinterpret_cast(Devices); + HANDLE_ERRORS( + urDeviceGet(hPlatform, Type->second, Count, phDevices, NumDevices)); return PI_SUCCESS; } inline pi_result piDeviceRetain(pi_device Device) { - auto hDevice = reinterpret_cast(Device); - HANDLE_ERRORS(zerDeviceGetReference(hDevice)); + auto hDevice = reinterpret_cast(Device); + HANDLE_ERRORS(urDeviceRetain(hDevice)); return PI_SUCCESS; } inline pi_result piDeviceRelease(pi_device Device) { - auto hDevice = reinterpret_cast(Device); - HANDLE_ERRORS(zerDeviceRelease(hDevice)); + auto hDevice = reinterpret_cast(Device); + HANDLE_ERRORS(urDeviceRelease(hDevice)); return PI_SUCCESS; } -inline pi_result piPluginGetLastError(char **message) { - HANDLE_ERRORS(zerPluginGetLastError(message)); - return PI_SUCCESS; -} +inline pi_result piPluginGetLastError(char **message) { return PI_SUCCESS; } inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) { - static std::unordered_map InfoMapping = { - {PI_DEVICE_INFO_TYPE, ZER_DEVICE_INFO_TYPE}, - {PI_DEVICE_INFO_PARENT_DEVICE, ZER_DEVICE_INFO_PARENT_DEVICE}, - {PI_DEVICE_INFO_PLATFORM, ZER_DEVICE_INFO_PLATFORM}, - {PI_DEVICE_INFO_VENDOR_ID, ZER_DEVICE_INFO_VENDOR_ID}, - {PI_DEVICE_INFO_UUID, ZER_DEVICE_INFO_UUID}, - {PI_DEVICE_INFO_ATOMIC_64, ZER_DEVICE_INFO_ATOMIC_64}, - {PI_DEVICE_INFO_EXTENSIONS, ZER_DEVICE_INFO_EXTENSIONS}, - {PI_DEVICE_INFO_NAME, ZER_DEVICE_INFO_NAME}, - {PI_DEVICE_INFO_COMPILER_AVAILABLE, ZER_DEVICE_INFO_COMPILER_AVAILABLE}, - {PI_DEVICE_INFO_LINKER_AVAILABLE, ZER_DEVICE_INFO_LINKER_AVAILABLE}, - {PI_DEVICE_INFO_MAX_COMPUTE_UNITS, ZER_DEVICE_INFO_MAX_COMPUTE_UNITS}, + static std::unordered_map InfoMapping = { + {PI_DEVICE_INFO_TYPE, UR_DEVICE_INFO_TYPE}, + {PI_DEVICE_INFO_PARENT_DEVICE, UR_DEVICE_INFO_PARENT_DEVICE}, + {PI_DEVICE_INFO_PLATFORM, UR_DEVICE_INFO_PLATFORM}, + {PI_DEVICE_INFO_VENDOR_ID, UR_DEVICE_INFO_VENDOR_ID}, + {PI_DEVICE_INFO_UUID, UR_DEVICE_INFO_UUID}, + {PI_DEVICE_INFO_ATOMIC_64, UR_DEVICE_INFO_ATOMIC_64}, + {PI_DEVICE_INFO_EXTENSIONS, UR_DEVICE_INFO_EXTENSIONS}, + {PI_DEVICE_INFO_NAME, UR_DEVICE_INFO_NAME}, + {PI_DEVICE_INFO_COMPILER_AVAILABLE, UR_DEVICE_INFO_COMPILER_AVAILABLE}, + {PI_DEVICE_INFO_LINKER_AVAILABLE, UR_DEVICE_INFO_LINKER_AVAILABLE}, + {PI_DEVICE_INFO_MAX_COMPUTE_UNITS, UR_DEVICE_INFO_MAX_COMPUTE_UNITS}, {PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS, - ZER_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS}, - {PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, ZER_DEVICE_INFO_MAX_WORK_GROUP_SIZE}, - {PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, ZER_DEVICE_INFO_MAX_WORK_ITEM_SIZES}, - {PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY, ZER_DEVICE_INFO_MAX_CLOCK_FREQUENCY}, - {PI_DEVICE_INFO_ADDRESS_BITS, ZER_DEVICE_INFO_ADDRESS_BITS}, - {PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE, ZER_DEVICE_INFO_MAX_MEM_ALLOC_SIZE}, - {PI_DEVICE_INFO_GLOBAL_MEM_SIZE, ZER_DEVICE_INFO_GLOBAL_MEM_SIZE}, - {PI_DEVICE_INFO_LOCAL_MEM_SIZE, ZER_DEVICE_INFO_LOCAL_MEM_SIZE}, - {PI_DEVICE_INFO_IMAGE_SUPPORT, ZER_DEVICE_INFO_IMAGE_SUPPORTED}, - {PI_DEVICE_INFO_HOST_UNIFIED_MEMORY, ZER_DEVICE_INFO_HOST_UNIFIED_MEMORY}, - {PI_DEVICE_INFO_AVAILABLE, ZER_DEVICE_INFO_AVAILABLE}, - {PI_DEVICE_INFO_VENDOR, ZER_DEVICE_INFO_VENDOR}, - {PI_DEVICE_INFO_DRIVER_VERSION, ZER_DEVICE_INFO_DRIVER_VERSION}, - {PI_DEVICE_INFO_VERSION, ZER_DEVICE_INFO_VERSION}, + UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS}, + {PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE}, + {PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES}, + {PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY, UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY}, + {PI_DEVICE_INFO_ADDRESS_BITS, UR_DEVICE_INFO_ADDRESS_BITS}, + {PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE, UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE}, + {PI_DEVICE_INFO_GLOBAL_MEM_SIZE, UR_DEVICE_INFO_GLOBAL_MEM_SIZE}, + {PI_DEVICE_INFO_LOCAL_MEM_SIZE, UR_DEVICE_INFO_LOCAL_MEM_SIZE}, + {PI_DEVICE_INFO_IMAGE_SUPPORT, UR_DEVICE_INFO_IMAGE_SUPPORTED}, + {PI_DEVICE_INFO_HOST_UNIFIED_MEMORY, UR_DEVICE_INFO_HOST_UNIFIED_MEMORY}, + {PI_DEVICE_INFO_AVAILABLE, UR_DEVICE_INFO_AVAILABLE}, + {PI_DEVICE_INFO_VENDOR, UR_DEVICE_INFO_VENDOR}, + {PI_DEVICE_INFO_DRIVER_VERSION, UR_DEVICE_INFO_DRIVER_VERSION}, + {PI_DEVICE_INFO_VERSION, UR_DEVICE_INFO_VERSION}, {PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES, - ZER_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES}, - {PI_DEVICE_INFO_REFERENCE_COUNT, ZER_DEVICE_INFO_REFERENCE_COUNT}, + UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES}, + {PI_DEVICE_INFO_REFERENCE_COUNT, UR_DEVICE_INFO_REFERENCE_COUNT}, {PI_DEVICE_INFO_PARTITION_PROPERTIES, - ZER_DEVICE_INFO_PARTITION_PROPERTIES}, + UR_DEVICE_INFO_PARTITION_PROPERTIES}, {PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN, - ZER_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN}, - {PI_DEVICE_INFO_PARTITION_TYPE, ZER_DEVICE_INFO_PARTITION_TYPE}, - {PI_DEVICE_INFO_OPENCL_C_VERSION, ZER_DEVICE_INFO_OPENCL_C_VERSION}, + UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN}, + {PI_DEVICE_INFO_PARTITION_TYPE, UR_DEVICE_INFO_PARTITION_TYPE}, + {PI_DEVICE_INFO_OPENCL_C_VERSION, UR_EXT_DEVICE_INFO_OPENCL_C_VERSION}, {PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC, - ZER_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC}, - {PI_DEVICE_INFO_PRINTF_BUFFER_SIZE, ZER_DEVICE_INFO_PRINTF_BUFFER_SIZE}, - {PI_DEVICE_INFO_PROFILE, ZER_DEVICE_INFO_PROFILE}, - {PI_DEVICE_INFO_BUILT_IN_KERNELS, ZER_DEVICE_INFO_BUILT_IN_KERNELS}, - {PI_DEVICE_INFO_QUEUE_PROPERTIES, ZER_DEVICE_INFO_QUEUE_PROPERTIES}, + UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC}, + {PI_DEVICE_INFO_PRINTF_BUFFER_SIZE, UR_DEVICE_INFO_PRINTF_BUFFER_SIZE}, + {PI_DEVICE_INFO_PROFILE, UR_DEVICE_INFO_PROFILE}, + {PI_DEVICE_INFO_BUILT_IN_KERNELS, UR_DEVICE_INFO_BUILT_IN_KERNELS}, + {PI_DEVICE_INFO_QUEUE_PROPERTIES, UR_DEVICE_INFO_QUEUE_PROPERTIES}, {PI_DEVICE_INFO_EXECUTION_CAPABILITIES, - ZER_DEVICE_INFO_EXECUTION_CAPABILITIES}, - {PI_DEVICE_INFO_ENDIAN_LITTLE, ZER_DEVICE_INFO_ENDIAN_LITTLE}, + UR_DEVICE_INFO_EXECUTION_CAPABILITIES}, + {PI_DEVICE_INFO_ENDIAN_LITTLE, UR_DEVICE_INFO_ENDIAN_LITTLE}, {PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT, - ZER_DEVICE_INFO_ERROR_CORRECTION_SUPPORT}, + UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT}, {PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION, - ZER_DEVICE_INFO_PROFILING_TIMER_RESOLUTION}, - {PI_DEVICE_INFO_LOCAL_MEM_TYPE, ZER_DEVICE_INFO_LOCAL_MEM_TYPE}, - {PI_DEVICE_INFO_MAX_CONSTANT_ARGS, ZER_DEVICE_INFO_MAX_CONSTANT_ARGS}, + UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION}, + {PI_DEVICE_INFO_LOCAL_MEM_TYPE, UR_DEVICE_INFO_LOCAL_MEM_TYPE}, + {PI_DEVICE_INFO_MAX_CONSTANT_ARGS, UR_DEVICE_INFO_MAX_CONSTANT_ARGS}, {PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE, - ZER_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE}, + UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE}, {PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE, - ZER_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE}, + UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE}, {PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE, - ZER_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE}, + UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE}, {PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE, - ZER_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE}, - {PI_DEVICE_INFO_MAX_PARAMETER_SIZE, ZER_DEVICE_INFO_MAX_PARAMETER_SIZE}, - {PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN, ZER_DEVICE_INFO_MEM_BASE_ADDR_ALIGN}, - {PI_DEVICE_INFO_MAX_SAMPLERS, ZER_DEVICE_INFO_MAX_SAMPLERS}, - {PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS, ZER_DEVICE_INFO_MAX_READ_IMAGE_ARGS}, + UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE}, + {PI_DEVICE_INFO_MAX_PARAMETER_SIZE, UR_DEVICE_INFO_MAX_PARAMETER_SIZE}, + {PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN, UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN}, + {PI_DEVICE_INFO_MAX_SAMPLERS, UR_DEVICE_INFO_MAX_SAMPLERS}, + {PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS, UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS}, {PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS, - ZER_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS}, - {PI_DEVICE_INFO_SINGLE_FP_CONFIG, ZER_DEVICE_INFO_SINGLE_FP_CONFIG}, - {PI_DEVICE_INFO_HALF_FP_CONFIG, ZER_DEVICE_INFO_HALF_FP_CONFIG}, - {PI_DEVICE_INFO_DOUBLE_FP_CONFIG, ZER_DEVICE_INFO_DOUBLE_FP_CONFIG}, - {PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH, ZER_DEVICE_INFO_IMAGE2D_MAX_WIDTH}, - {PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT, ZER_DEVICE_INFO_IMAGE2D_MAX_HEIGHT}, - {PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH, ZER_DEVICE_INFO_IMAGE3D_MAX_WIDTH}, - {PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT, ZER_DEVICE_INFO_IMAGE3D_MAX_HEIGHT}, - {PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH, ZER_DEVICE_INFO_IMAGE3D_MAX_DEPTH}, + UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS}, + {PI_DEVICE_INFO_SINGLE_FP_CONFIG, UR_DEVICE_INFO_SINGLE_FP_CONFIG}, + {PI_DEVICE_INFO_HALF_FP_CONFIG, UR_DEVICE_INFO_HALF_FP_CONFIG}, + {PI_DEVICE_INFO_DOUBLE_FP_CONFIG, UR_DEVICE_INFO_DOUBLE_FP_CONFIG}, + {PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH, UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH}, + {PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT, UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT}, + {PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH, UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH}, + {PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT, UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT}, + {PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH, UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH}, {PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE, - ZER_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE}, + UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE}, {PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE, - (zer_device_info_t)ZER_EXT_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE}, + (ur_device_info_t)UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE}, {PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR, - ZER_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR}, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR}, {PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR, - ZER_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR}, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR}, {PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT, - ZER_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT}, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT}, {PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT, - ZER_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT}, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT}, {PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT, - ZER_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT}, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT}, {PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT, - ZER_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT}, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT}, {PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG, - ZER_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG}, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG}, {PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG, - ZER_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG}, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG}, {PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT, - ZER_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT}, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT}, {PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT, - ZER_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT}, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT}, {PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE, - ZER_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE}, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE}, {PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE, - ZER_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE}, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE}, {PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF, - ZER_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF}, + UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF}, {PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF, - ZER_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF}, - {PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS, ZER_DEVICE_INFO_MAX_NUM_SUB_GROUPS}, + UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF}, + {PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS, UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS}, {PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, - ZER_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS}, + UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS}, {PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, - ZER_DEVICE_INFO_SUB_GROUP_SIZES_INTEL}, - {PI_DEVICE_INFO_IL_VERSION, ZER_DEVICE_INFO_IL_VERSION}, - {PI_DEVICE_INFO_USM_HOST_SUPPORT, ZER_DEVICE_INFO_USM_HOST_SUPPORT}, - {PI_DEVICE_INFO_USM_DEVICE_SUPPORT, ZER_DEVICE_INFO_USM_DEVICE_SUPPORT}, + UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL}, + {PI_DEVICE_INFO_IL_VERSION, UR_DEVICE_INFO_IL_VERSION}, + {PI_DEVICE_INFO_USM_HOST_SUPPORT, UR_DEVICE_INFO_USM_HOST_SUPPORT}, + {PI_DEVICE_INFO_USM_DEVICE_SUPPORT, UR_DEVICE_INFO_USM_DEVICE_SUPPORT}, {PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT, - ZER_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT}, + UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT}, {PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT, - ZER_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT}, + UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT}, {PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT, - ZER_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT}, - {PI_DEVICE_INFO_USM_HOST_SUPPORT, ZER_DEVICE_INFO_USM_HOST_SUPPORT}, - {PI_DEVICE_INFO_USM_DEVICE_SUPPORT, ZER_DEVICE_INFO_USM_DEVICE_SUPPORT}, + UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT}, + {PI_DEVICE_INFO_USM_HOST_SUPPORT, UR_DEVICE_INFO_USM_HOST_SUPPORT}, + {PI_DEVICE_INFO_USM_DEVICE_SUPPORT, UR_DEVICE_INFO_USM_DEVICE_SUPPORT}, {PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT, - ZER_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT}, + UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT}, {PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT, - ZER_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT}, + UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT}, {PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT, - ZER_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT}, - {PI_DEVICE_INFO_PCI_ADDRESS, ZER_DEVICE_INFO_PCI_ADDRESS}, - {PI_DEVICE_INFO_GPU_EU_COUNT, ZER_DEVICE_INFO_GPU_EU_COUNT}, - {PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH, ZER_DEVICE_INFO_GPU_EU_SIMD_WIDTH}, + UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT}, + {PI_DEVICE_INFO_PCI_ADDRESS, UR_DEVICE_INFO_PCI_ADDRESS}, + {PI_DEVICE_INFO_GPU_EU_COUNT, UR_DEVICE_INFO_GPU_EU_COUNT}, + {PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH, UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH}, {PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE, - ZER_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE}, + UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE}, {PI_DEVICE_INFO_BUILD_ON_SUBDEVICE, - (zer_device_info_t)ZER_EXT_DEVICE_INFO_BUILD_ON_SUBDEVICE}, + (ur_device_info_t)UR_EXT_DEVICE_INFO_BUILD_ON_SUBDEVICE}, {PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D, - (zer_device_info_t)ZER_EXT_DEVICE_INFO_MAX_WORK_GROUPS_3D}, + (ur_device_info_t)UR_EXT_DEVICE_INFO_MAX_WORK_GROUPS_3D}, {PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE, - (zer_device_info_t)ZER_EXT_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE}, - {PI_DEVICE_INFO_DEVICE_ID, - (zer_device_info_t)ZER_EXT_DEVICE_INFO_DEVICE_ID}, + (ur_device_info_t)UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE}, + {PI_DEVICE_INFO_DEVICE_ID, (ur_device_info_t)UR_DEVICE_INFO_DEVICE_ID}, {PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY, - (zer_device_info_t)ZER_EXT_DEVICE_INFO_FREE_MEMORY}, + (ur_device_info_t)UR_EXT_DEVICE_INFO_FREE_MEMORY}, {PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE, - (zer_device_info_t)ZER_EXT_DEVICE_INFO_MEMORY_CLOCK_RATE}, + (ur_device_info_t)UR_DEVICE_INFO_MEMORY_CLOCK_RATE}, {PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH, - (zer_device_info_t)ZER_EXT_DEVICE_INFO_MEMORY_BUS_WIDTH}, + (ur_device_info_t)UR_EXT_DEVICE_INFO_MEMORY_BUS_WIDTH}, {PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES, - (zer_device_info_t)ZER_EXT_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES}, + (ur_device_info_t)UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES}, {PI_DEVICE_INFO_GPU_SLICES, - (zer_device_info_t)ZER_EXT_DEVICE_INFO_GPU_SLICES}, + (ur_device_info_t)UR_EXT_DEVICE_INFO_GPU_SLICES}, {PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE, - (zer_device_info_t)ZER_EXT_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE}, + (ur_device_info_t)UR_EXT_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE}, {PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU, - (zer_device_info_t)ZER_EXT_DEVICE_INFO_GPU_HW_THREADS_PER_EU}, + (ur_device_info_t)UR_EXT_DEVICE_INFO_GPU_HW_THREADS_PER_EU}, {PI_DEVICE_INFO_MAX_MEM_BANDWIDTH, - (zer_device_info_t)ZER_EXT_DEVICE_INFO_MAX_MEM_BANDWIDTH}, + (ur_device_info_t)UR_EXT_DEVICE_INFO_MAX_MEM_BANDWIDTH}, {PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS, - (zer_device_info_t)ZER_EXT_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS}, + (ur_device_info_t)UR_DEVICE_INFO_BFLOAT16}, {PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, - (zer_device_info_t)ZER_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES}, + (ur_device_info_t)UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES}, }; auto InfoType = InfoMapping.find(ParamName); @@ -496,15 +483,12 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, } size_t SizeInOut = ParamValueSize; - auto hDevice = reinterpret_cast(Device); - HANDLE_ERRORS( - zerDeviceGetInfo(hDevice, InfoType->second, &SizeInOut, ParamValue)); + auto hDevice = reinterpret_cast(Device); + HANDLE_ERRORS(urDeviceGetInfo(hDevice, InfoType->second, SizeInOut, + ParamValue, ParamValueSizeRet)); ur2piInfoValue(InfoType->second, ParamValueSize, &SizeInOut, ParamValue); - if (ParamValueSizeRet) { - *ParamValueSizeRet = SizeInOut; - } return PI_SUCCESS; } @@ -516,16 +500,14 @@ inline pi_result piDevicePartition( return PI_ERROR_INVALID_VALUE; static std::unordered_map + ur_device_partition_property_t> PropertyMap = { - {PI_DEVICE_PARTITION_EQUALLY, - ZER_DEVICE_PARTITION_PROPERTY_FLAG_EQUALLY}, - {PI_DEVICE_PARTITION_BY_COUNTS, - ZER_DEVICE_PARTITION_PROPERTY_FLAG_BY_COUNTS}, + {PI_DEVICE_PARTITION_EQUALLY, UR_DEVICE_PARTITION_EQUALLY}, + {PI_DEVICE_PARTITION_BY_COUNTS, UR_DEVICE_PARTITION_BY_COUNTS}, {PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, - ZER_DEVICE_PARTITION_PROPERTY_FLAG_BY_AFFINITY_DOMAIN}, + UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN}, {PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE, - ZER_EXT_DEVICE_PARTITION_PROPERTY_FLAG_BY_CSLICE}, + UR_EXT_DEVICE_PARTITION_PROPERTY_FLAG_BY_CSLICE}, }; auto PropertyIt = PropertyMap.find(Properties[0]); @@ -535,15 +517,14 @@ inline pi_result piDevicePartition( // Some partitioning types require a value auto Value = uint32_t(Properties[1]); - if (PropertyIt->second == - ZER_DEVICE_PARTITION_PROPERTY_FLAG_BY_AFFINITY_DOMAIN) { + if (PropertyIt->second == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { static std::unordered_map + ur_device_affinity_domain_flag_t> ValueMap = { {PI_DEVICE_AFFINITY_DOMAIN_NUMA, - ZER_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA}, + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA}, {PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE, - ZER_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE}, + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE}, }; auto ValueIt = ValueMap.find(Properties[1]); if (ValueIt == ValueMap.end()) { @@ -559,18 +540,13 @@ inline pi_result piDevicePartition( // TODO: correctly terminate the UR properties, see: // https://github.com/oneapi-src/unified-runtime/issues/183 // - zer_device_partition_property_value_t UrProperties[] = { - {zer_device_partition_property_flags_t(PropertyIt->second), Value}, - {0, 0}}; + ur_device_partition_property_t UrProperties[] = { + ur_device_partition_property_t(PropertyIt->second), Value, 0}; - uint32_t Count = NumEntries; - auto hDevice = reinterpret_cast(Device); - auto phSubDevices = reinterpret_cast(SubDevices); - HANDLE_ERRORS( - zerDevicePartition(hDevice, UrProperties, &Count, phSubDevices)); - if (NumSubDevices) { - *NumSubDevices = Count; - } + auto hDevice = reinterpret_cast(Device); + auto phSubDevices = reinterpret_cast(SubDevices); + HANDLE_ERRORS(urDevicePartition(hDevice, UrProperties, NumEntries, phSubDevices, + NumSubDevices)); return PI_SUCCESS; } } // namespace pi2ur diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp index 6a4252599da36..b0cef3dd8c8db 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp @@ -234,7 +234,7 @@ static const bool ExposeCSliceInAffinityPartitioning = [] { return Flag ? std::atoi(Flag) != 0 : false; }(); -zer_result_t _ur_platform_handle_t::initialize() { +ur_result_t _ur_platform_handle_t::initialize() { // Cache driver properties ZeStruct ZeDriverProperties; ZE_CALL(zeDriverGetProperties, (ZeDriver, &ZeDriverProperties)); @@ -283,25 +283,23 @@ zer_result_t _ur_platform_handle_t::initialize() { // If yes, then set up L0 API pointers if the platform supports it. ZeUSMImport.setZeUSMImport(this); - return ZER_RESULT_SUCCESS; + return UR_RESULT_SUCCESS; } -ZER_APIEXPORT zer_result_t ZER_APICALL zerPlatformGet( - uint32_t - *NumPlatforms, ///< [in,out] pointer to the number of platforms. - ///< if count is zero, then the call shall update the - ///< value with the total number of platforms available. - ///< if count is greater than the number of platforms - ///< available, then the call shall update the value with - ///< the correct number of platforms available. - zer_platform_handle_t - *Platforms ///< [out][optional][range(0, *pCount)] array of handle of - ///< platforms. if count is less than the number of platforms - ///< available, then platform shall only retrieve that number - ///< of platforms. +UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( + uint32_t NumEntries, ///< [in] the number of platforms to be added to + ///< phPlatforms. If phPlatforms is not NULL, then + ///< NumEntries should be greater than zero, otherwise + ///< ::UR_RESULT_ERROR_INVALID_SIZE, will be returned. + ur_platform_handle_t + *Platforms, ///< [out][optional][range(0, NumEntries)] array of handle + ///< of platforms. If NumEntries is less than the number of + ///< platforms available, then + ///< ::urPlatformGet shall only retrieve that number of + ///< platforms. + uint32_t *NumPlatforms ///< [out][optional] returns the total number of + ///< platforms available. ) { - PI_ASSERT(NumPlatforms, ZER_RESULT_INVALID_VALUE); - static std::once_flag ZeCallCountInitialized; try { std::call_once(ZeCallCountInitialized, []() { @@ -310,9 +308,9 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerPlatformGet( } }); } catch (const std::bad_alloc &) { - return ZER_RESULT_ERROR_OUT_OF_HOST_MEMORY; + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { - return ZER_RESULT_ERROR_UNKNOWN; + return UR_RESULT_ERROR_UNKNOWN; } // Setting these environment variables before running zeInit will enable the @@ -338,9 +336,10 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerPlatformGet( // Absorb the ZE_RESULT_ERROR_UNINITIALIZED and just return 0 Platforms. if (ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) { - PI_ASSERT(NumPlatforms != 0, ZER_RESULT_INVALID_VALUE); - *NumPlatforms = 0; - return ZER_RESULT_SUCCESS; + PI_ASSERT(NumEntries != 0, UR_RESULT_ERROR_INVALID_VALUE); + if (NumPlatforms) + *NumPlatforms = 0; + return UR_RESULT_SUCCESS; } if (ZeResult != ZE_RESULT_SUCCESS) { @@ -369,63 +368,65 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerPlatformGet( ZE_CALL(zeDriverGet, (&ZeDriverCount, ZeDrivers.data())); for (uint32_t I = 0; I < ZeDriverCount; ++I) { - auto Platform = new _zer_platform_handle_t(ZeDrivers[I]); + auto Platform = new ur_platform_handle_t_(ZeDrivers[I]); // Save a copy in the cache for future uses. PiPlatformsCache->push_back(Platform); - zer_result_t Result = Platform->initialize(); - if (Result != ZER_RESULT_SUCCESS) { + ur_result_t Result = Platform->initialize(); + if (Result != UR_RESULT_SUCCESS) { return Result; } } PiPlatformCachePopulated = true; } } catch (const std::bad_alloc &) { - return ZER_RESULT_ERROR_OUT_OF_HOST_MEMORY; + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { - return ZER_RESULT_ERROR_UNKNOWN; + return UR_RESULT_ERROR_UNKNOWN; } } // Populate returned platforms from the cache. if (Platforms) { - PI_ASSERT(*NumPlatforms <= PiPlatformsCache->size(), - ZER_RESULT_INVALID_PLATFORM); - std::copy_n(PiPlatformsCache->begin(), *NumPlatforms, Platforms); + PI_ASSERT(NumEntries <= PiPlatformsCache->size(), + UR_RESULT_ERROR_INVALID_PLATFORM); + std::copy_n(PiPlatformsCache->begin(), NumEntries, Platforms); } - if (*NumPlatforms == 0) - *NumPlatforms = PiPlatformsCache->size(); - else - *NumPlatforms = std::min(PiPlatformsCache->size(), (size_t)*NumPlatforms); + if (NumPlatforms) { + if (*NumPlatforms == 0) + *NumPlatforms = PiPlatformsCache->size(); + else + *NumPlatforms = std::min(PiPlatformsCache->size(), (size_t)NumEntries); + } - return ZER_RESULT_SUCCESS; + return UR_RESULT_SUCCESS; } -ZER_APIEXPORT zer_result_t ZER_APICALL zerPlatformGetInfo( - zer_platform_handle_t Platform, ///< [in] handle of the platform - zer_platform_info_t ParamName, ///< [in] type of the info to retrieve - size_t *pSize, ///< [in,out] pointer to the number of bytes needed to return - ///< info queried. the call shall update it with the real - ///< number of bytes needed to return the info - void *ParamValue ///< [out][optional] array of bytes holding the info. - ///< if *pSize is not equal to the real number of bytes - ///< needed to return the info then the - ///< ::ZER_RESULT_ERROR_INVALID_SIZE error is returned and - ///< pPlatformInfo is not used. +UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetInfo( + ur_platform_handle_t Platform, ///< [in] handle of the platform + ur_platform_info_t ParamName, ///< [in] type of the info to retrieve + size_t Size, ///< [in] the number of bytes pointed to by pPlatformInfo. + void *ParamValue, ///< [out][optional] array of bytes holding the info. + ///< If Size is not equal to or greater to the real number + ///< of bytes needed to return the info then the + ///< ::UR_RESULT_ERROR_INVALID_SIZE error is returned and + ///< pPlatformInfo is not used. + size_t *pSizeRet ///< [out][optional] pointer to the actual number of bytes + ///< being queried by pPlatformInfo. ) { - PI_ASSERT(Platform, ZER_RESULT_INVALID_PLATFORM); - UrReturnHelper ReturnValue(pSize, ParamValue); + PI_ASSERT(Platform, UR_RESULT_ERROR_INVALID_PLATFORM); + UrReturnHelper ReturnValue(Size, ParamValue, pSizeRet); switch (ParamName) { - case ZER_PLATFORM_INFO_NAME: + case UR_PLATFORM_INFO_NAME: // TODO: Query Level Zero driver when relevant info is added there. return ReturnValue("Intel(R) oneAPI Unified Runtime over Level-Zero"); - case ZER_PLATFORM_INFO_VENDOR_NAME: + case UR_PLATFORM_INFO_VENDOR_NAME: // TODO: Query Level Zero driver when relevant info is added there. return ReturnValue("Intel(R) Corporation"); - case ZER_PLATFORM_INFO_EXTENSIONS: + case UR_PLATFORM_INFO_EXTENSIONS: // Convention adopted from OpenCL: // "Returns a space-separated list of extension names (the extension // names themselves do not contain any spaces) supported by the platform. @@ -436,10 +437,10 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerPlatformGetInfo( // return them. For now, hardcoding some extensions we know are supported by // all Level Zero devices. return ReturnValue(ZE_SUPPORTED_EXTENSIONS); - case ZER_PLATFORM_INFO_PROFILE: + case UR_PLATFORM_INFO_PROFILE: // TODO: figure out what this means and how is this used return ReturnValue("FULL_PROFILE"); - case ZER_PLATFORM_INFO_VERSION: + case UR_PLATFORM_INFO_VERSION: // TODO: this should query to zeDriverGetDriverVersion // but we don't yet have the driver handle here. // @@ -450,37 +451,39 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerPlatformGetInfo( return ReturnValue(Platform->ZeDriverApiVersion.c_str()); default: zePrint("piPlatformGetInfo: unrecognized ParamName\n"); - return ZER_RESULT_INVALID_VALUE; + return UR_RESULT_ERROR_INVALID_VALUE; } - return ZER_RESULT_SUCCESS; + return UR_RESULT_SUCCESS; } -ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGet( - zer_platform_handle_t Platform, ///< [in] handle of the platform instance - zer_device_type_t DeviceType, ///< [in] the type of the devices. - uint32_t *NumDevices, ///< [in,out] pointer to the number of devices. - ///< If count is zero, then the call shall update the - ///< value with the total number of devices available. - ///< If count is greater than the number of devices - ///< available, then the call shall update the value - ///< with the correct number of devices available. - zer_device_handle_t - *Devices ///< [out][optional][range(0, *pCount)] array of handle of - ///< devices. If count is less than the number of devices - ///< available, then platform shall only retrieve that number - ///< of devices. +UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( + ur_platform_handle_t Platform, ///< [in] handle of the platform instance + ur_device_type_t DeviceType, ///< [in] the type of the devices. + uint32_t NumEntries, ///< [in] the number of devices to be added to + ///< phDevices. If phDevices in not NULL then + ///< NumEntries should be greater than zero, otherwise + ///< ::UR_RESULT_ERROR_INVALID_SIZE, will be returned. + ur_device_handle_t + *Devices, ///< [out][optional][range(0, NumEntries)] array of handle of + ///< devices. If NumEntries is less than the number of devices + ///< available, then platform shall only retrieve that number + ///< of devices. + uint32_t *NumDevices ///< [out][optional] pointer to the number of devices. + ///< pNumDevices will be updated with the total number + ///< of devices available. + ) { - PI_ASSERT(Platform, ZER_RESULT_INVALID_PLATFORM); + PI_ASSERT(Platform, UR_RESULT_ERROR_INVALID_PLATFORM); auto Res = Platform->populateDeviceCacheIfNeeded(); - if (Res != ZER_RESULT_SUCCESS) { + if (Res != UR_RESULT_SUCCESS) { return Res; } // Filter available devices based on input DeviceType. - std::vector MatchedDevices; + std::vector MatchedDevices; std::shared_lock Lock(Platform->PiDevicesCacheMutex); for (auto &D : Platform->PiDevicesCache) { // Only ever return root-devices from piDevicesGet, but the @@ -490,20 +493,20 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGet( bool Matched = false; switch (DeviceType) { - case ZER_DEVICE_TYPE_ALL: + case UR_DEVICE_TYPE_ALL: Matched = true; break; - case ZER_DEVICE_TYPE_GPU: - case ZER_DEVICE_TYPE_DEFAULT: + case UR_DEVICE_TYPE_GPU: + case UR_DEVICE_TYPE_DEFAULT: Matched = (D->ZeDeviceProperties->type == ZE_DEVICE_TYPE_GPU); break; - case ZER_DEVICE_TYPE_CPU: + case UR_DEVICE_TYPE_CPU: Matched = (D->ZeDeviceProperties->type == ZE_DEVICE_TYPE_CPU); break; - case ZER_DEVICE_TYPE_FPGA: + case UR_DEVICE_TYPE_FPGA: Matched = D->ZeDeviceProperties->type == ZE_DEVICE_TYPE_FPGA; break; - case ZER_DEVICE_TYPE_MCA: + case UR_DEVICE_TYPE_MCA: Matched = D->ZeDeviceProperties->type == ZE_DEVICE_TYPE_MCA; break; default: @@ -517,63 +520,68 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGet( uint32_t ZeDeviceCount = MatchedDevices.size(); - if (*NumDevices == 0) - *NumDevices = ZeDeviceCount; - else { - auto N = std::min(ZeDeviceCount, *NumDevices); + auto N = std::min(ZeDeviceCount, NumEntries); + if (Devices) std::copy_n(MatchedDevices.begin(), N, Devices); + + if (NumDevices) { + if (*NumDevices == 0) + *NumDevices = ZeDeviceCount; + else + *NumDevices = N; } - return ZER_RESULT_SUCCESS; + + return UR_RESULT_SUCCESS; } -ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( - zer_device_handle_t Device, ///< [in] handle of the device instance - zer_device_info_t ParamName, ///< [in] type of the info to retrieve - size_t *pSize, ///< [in,out] pointer to the number of bytes needed to return - ///< info queried. The call shall update it with the real - ///< number of bytes needed to return the info - void *ParamValue ///< [out][optional] array of bytes holding the info. - ///< If *pSize input is not 0 and not equal to the real - ///< number of bytes needed to return the info then the - ///< ::ZER_RESULT_ERROR_INVALID_SIZE error is returned and - ///< pDeviceInfo is not used. +UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( + ur_device_handle_t Device, ///< [in] handle of the device instance + ur_device_info_t ParamName, ///< [in] type of the info to retrieve + size_t propSize, ///< [in] the number of bytes pointed to by pDeviceInfo. + void *ParamValue, ///< [out][optional] array of bytes holding the info. + ///< If propSize is not equal to or greater than the real + ///< number of bytes needed to return the info then the + ///< ::UR_RESULT_ERROR_INVALID_SIZE error is returned and + ///< pDeviceInfo is not used. + size_t *pSize ///< [out][optional] pointer to the actual size in bytes of + ///< the queried infoType. ) { - PI_ASSERT(Device, ZER_RESULT_INVALID_DEVICE); - UrReturnHelper ReturnValue(pSize, ParamValue); + PI_ASSERT(Device, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UrReturnHelper ReturnValue(propSize, ParamValue, pSize); ze_device_handle_t ZeDevice = Device->ZeDevice; switch ((int)ParamName) { - case ZER_DEVICE_INFO_TYPE: { + case UR_DEVICE_INFO_TYPE: { switch (Device->ZeDeviceProperties->type) { case ZE_DEVICE_TYPE_GPU: - return ReturnValue(ZER_DEVICE_TYPE_GPU); + return ReturnValue(UR_DEVICE_TYPE_GPU); case ZE_DEVICE_TYPE_CPU: - return ReturnValue(ZER_DEVICE_TYPE_CPU); + return ReturnValue(UR_DEVICE_TYPE_CPU); case ZE_DEVICE_TYPE_FPGA: - return ReturnValue(ZER_DEVICE_TYPE_FPGA); + return ReturnValue(UR_DEVICE_TYPE_FPGA); default: zePrint("This device type is not supported\n"); - return ZER_RESULT_INVALID_VALUE; + return UR_RESULT_ERROR_INVALID_VALUE; } } - case ZER_DEVICE_INFO_PARENT_DEVICE: + case UR_DEVICE_INFO_PARENT_DEVICE: return ReturnValue(Device->RootDevice); - case ZER_DEVICE_INFO_PLATFORM: + case UR_DEVICE_INFO_PLATFORM: return ReturnValue(Device->Platform); - case ZER_DEVICE_INFO_VENDOR_ID: + case UR_DEVICE_INFO_VENDOR_ID: return ReturnValue(uint32_t{Device->ZeDeviceProperties->vendorId}); - case ZER_DEVICE_INFO_UUID: { + case UR_DEVICE_INFO_UUID: { // Intel extension for device UUID. This returns the UUID as // std::array. For details about this extension, // see sycl/doc/extensions/supported/sycl_ext_intel_device_info.md. const auto &UUID = Device->ZeDeviceProperties->uuid.id; return ReturnValue(UUID, sizeof(UUID)); } - case ZER_DEVICE_INFO_ATOMIC_64: + case UR_DEVICE_INFO_ATOMIC_64: return ReturnValue(uint32_t{Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS}); - case ZER_DEVICE_INFO_EXTENSIONS: { + case UR_DEVICE_INFO_EXTENSIONS: { // Convention adopted from OpenCL: // "Returns a space separated list of extension names (the extension // names themselves do not contain any spaces) supported by the device." @@ -628,38 +636,38 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( return ReturnValue(SupportedExtensions.c_str()); } - case ZER_DEVICE_INFO_NAME: + case UR_DEVICE_INFO_NAME: return ReturnValue(Device->ZeDeviceProperties->name); // zeModuleCreate allows using root device module for sub-devices: // > The application must only use the module for the device, or its // > sub-devices, which was provided during creation. - case ZER_EXT_DEVICE_INFO_BUILD_ON_SUBDEVICE: + case UR_EXT_DEVICE_INFO_BUILD_ON_SUBDEVICE: return ReturnValue(uint32_t{0}); - case ZER_DEVICE_INFO_COMPILER_AVAILABLE: + case UR_DEVICE_INFO_COMPILER_AVAILABLE: return ReturnValue(uint32_t{1}); - case ZER_DEVICE_INFO_LINKER_AVAILABLE: + case UR_DEVICE_INFO_LINKER_AVAILABLE: return ReturnValue(uint32_t{1}); - case ZER_DEVICE_INFO_MAX_COMPUTE_UNITS: { + case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: { uint32_t MaxComputeUnits = Device->ZeDeviceProperties->numEUsPerSubslice * Device->ZeDeviceProperties->numSubslicesPerSlice * Device->ZeDeviceProperties->numSlices; bool RepresentsCSlice = - Device->QueueGroup[_zer_device_handle_t::queue_group_info_t::Compute] + Device->QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute] .ZeIndex >= 0; if (RepresentsCSlice) MaxComputeUnits /= Device->RootDevice->SubDevices.size(); return ReturnValue(uint32_t{MaxComputeUnits}); } - case ZER_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: + case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: // Level Zero spec defines only three dimensions return ReturnValue(uint32_t{3}); - case ZER_DEVICE_INFO_MAX_WORK_GROUP_SIZE: + case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE: return ReturnValue( uint64_t{Device->ZeDeviceComputeProperties->maxTotalGroupSize}); - case ZER_DEVICE_INFO_MAX_WORK_ITEM_SIZES: { + case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: { struct { size_t Arr[3]; } MaxGroupSize = {{Device->ZeDeviceComputeProperties->maxGroupSizeX, @@ -667,7 +675,7 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( Device->ZeDeviceComputeProperties->maxGroupSizeZ}}; return ReturnValue(MaxGroupSize); } - case ZER_EXT_DEVICE_INFO_MAX_WORK_GROUPS_3D: { + case UR_EXT_DEVICE_INFO_MAX_WORK_GROUPS_3D: { struct { size_t Arr[3]; } MaxGroupCounts = {{Device->ZeDeviceComputeProperties->maxGroupCountX, @@ -675,15 +683,15 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( Device->ZeDeviceComputeProperties->maxGroupCountZ}}; return ReturnValue(MaxGroupCounts); } - case ZER_DEVICE_INFO_MAX_CLOCK_FREQUENCY: + case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: return ReturnValue(uint32_t{Device->ZeDeviceProperties->coreClockRate}); - case ZER_DEVICE_INFO_ADDRESS_BITS: { + case UR_DEVICE_INFO_ADDRESS_BITS: { // TODO: To confirm with spec. return ReturnValue(uint32_t{64}); } - case ZER_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: + case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: return ReturnValue(uint64_t{Device->ZeDeviceProperties->maxMemAllocSize}); - case ZER_DEVICE_INFO_GLOBAL_MEM_SIZE: { + case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: { uint64_t GlobalMemSize = 0; for (const auto &ZeDeviceMemoryExtProperty : Device->ZeDeviceMemoryProperties->second) { @@ -691,282 +699,278 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( } return ReturnValue(uint64_t{GlobalMemSize}); } - case ZER_DEVICE_INFO_LOCAL_MEM_SIZE: + case UR_DEVICE_INFO_LOCAL_MEM_SIZE: return ReturnValue( uint64_t{Device->ZeDeviceComputeProperties->maxSharedLocalMemory}); - case ZER_DEVICE_INFO_IMAGE_SUPPORTED: + case UR_DEVICE_INFO_IMAGE_SUPPORTED: return ReturnValue( uint32_t{Device->ZeDeviceImageProperties->maxImageDims1D > 0}); - case ZER_DEVICE_INFO_HOST_UNIFIED_MEMORY: + case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: return ReturnValue(uint32_t{(Device->ZeDeviceProperties->flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED) != 0}); - case ZER_DEVICE_INFO_AVAILABLE: + case UR_DEVICE_INFO_AVAILABLE: return ReturnValue(uint32_t{ZeDevice ? true : false}); - case ZER_DEVICE_INFO_VENDOR: + case UR_DEVICE_INFO_VENDOR: // TODO: Level-Zero does not return vendor's name at the moment // only the ID. return ReturnValue("Intel(R) Corporation"); - case ZER_DEVICE_INFO_DRIVER_VERSION: + case UR_DEVICE_INFO_DRIVER_VERSION: return ReturnValue(Device->Platform->ZeDriverVersion.c_str()); - case ZER_DEVICE_INFO_VERSION: + case UR_DEVICE_INFO_VERSION: return ReturnValue(Device->Platform->ZeDriverApiVersion.c_str()); - case ZER_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: { + case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: { auto Res = Device->Platform->populateDeviceCacheIfNeeded(); - if (Res != ZER_RESULT_SUCCESS) { + if (Res != UR_RESULT_SUCCESS) { return Res; } return ReturnValue((uint32_t)Device->SubDevices.size()); } - case ZER_DEVICE_INFO_REFERENCE_COUNT: + case UR_DEVICE_INFO_REFERENCE_COUNT: return ReturnValue(uint32_t{Device->RefCount.load()}); - case ZER_DEVICE_INFO_PARTITION_PROPERTIES: { + case UR_DEVICE_INFO_PARTITION_PROPERTIES: { // SYCL spec says: if this SYCL device cannot be partitioned into at least // two sub devices then the returned vector must be empty. auto Res = Device->Platform->populateDeviceCacheIfNeeded(); - if (Res != ZER_RESULT_SUCCESS) { + if (Res != UR_RESULT_SUCCESS) { return Res; } uint32_t ZeSubDeviceCount = Device->SubDevices.size(); if (ZeSubDeviceCount < 2) { - return ReturnValue((zer_device_partition_property_flag_t)0); + return ReturnValue((ur_device_partition_property_t)0); } bool PartitionedByCSlice = Device->SubDevices[0]->isCCS(); auto ReturnHelper = [&](auto... Partitions) { struct { - zer_device_partition_property_flag_t Arr[sizeof...(Partitions) + 1]; + ur_device_partition_property_t Arr[sizeof...(Partitions) + 1]; } PartitionProperties = { - {Partitions..., zer_device_partition_property_flag_t(0)}}; + {Partitions..., ur_device_partition_property_t(0)}}; return ReturnValue(PartitionProperties); }; if (ExposeCSliceInAffinityPartitioning) { if (PartitionedByCSlice) - return ReturnHelper( - ZER_EXT_DEVICE_PARTITION_PROPERTY_FLAG_BY_CSLICE, - ZER_DEVICE_PARTITION_PROPERTY_FLAG_BY_AFFINITY_DOMAIN); + return ReturnHelper(UR_DEVICE_PARTITION_BY_CSLICE, + UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN); else - return ReturnHelper( - ZER_DEVICE_PARTITION_PROPERTY_FLAG_BY_AFFINITY_DOMAIN); + return ReturnHelper(UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN); } else { - return ReturnHelper( - PartitionedByCSlice - ? ZER_EXT_DEVICE_PARTITION_PROPERTY_FLAG_BY_CSLICE - : ZER_DEVICE_PARTITION_PROPERTY_FLAG_BY_AFFINITY_DOMAIN); + return ReturnHelper(PartitionedByCSlice + ? UR_DEVICE_PARTITION_BY_CSLICE + : UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN); } break; } - case ZER_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: - return ReturnValue(zer_device_affinity_domain_flag_t( - ZER_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA | - ZER_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE)); - case ZER_DEVICE_INFO_PARTITION_TYPE: { + case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: + return ReturnValue(ur_device_affinity_domain_flag_t( + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA | + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE)); + case UR_DEVICE_INFO_PARTITION_TYPE: { // For root-device there is no partitioning to report. if (!Device->isSubDevice()) - return ReturnValue(zer_device_partition_property_flag_t(0)); + return ReturnValue(ur_device_partition_property_t(0)); if (Device->isCCS()) { struct { - zer_device_partition_property_flag_t Arr[2]; - } PartitionProperties = { - {ZER_EXT_DEVICE_PARTITION_PROPERTY_FLAG_BY_CSLICE, - zer_device_partition_property_flag_t(0)}}; + ur_device_partition_property_t Arr[2]; + } PartitionProperties = {{UR_EXT_DEVICE_PARTITION_PROPERTY_FLAG_BY_CSLICE, + ur_device_partition_property_t(0)}}; return ReturnValue(PartitionProperties); } struct { - zer_device_partition_property_flag_t Arr[3]; + ur_device_partition_property_t Arr[3]; } PartitionProperties = { - {ZER_DEVICE_PARTITION_PROPERTY_FLAG_BY_AFFINITY_DOMAIN, - (zer_device_partition_property_flag_t) - ZER_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE, - zer_device_partition_property_flag_t(0)}}; + {UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, + (ur_device_partition_property_t) + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE, + ur_device_partition_property_t(0)}}; return ReturnValue(PartitionProperties); } // Everything under here is not supported yet - case ZER_DEVICE_INFO_OPENCL_C_VERSION: + case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: return ReturnValue(""); - case ZER_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: + case UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: return ReturnValue(uint32_t{true}); - case ZER_DEVICE_INFO_PRINTF_BUFFER_SIZE: + case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: return ReturnValue( size_t{Device->ZeDeviceModuleProperties->printfBufferSize}); - case ZER_DEVICE_INFO_PROFILE: + case UR_DEVICE_INFO_PROFILE: return ReturnValue("FULL_PROFILE"); - case ZER_DEVICE_INFO_BUILT_IN_KERNELS: + case UR_DEVICE_INFO_BUILT_IN_KERNELS: // TODO: To find out correct value return ReturnValue(""); - case ZER_DEVICE_INFO_QUEUE_PROPERTIES: + case UR_DEVICE_INFO_QUEUE_PROPERTIES: return ReturnValue( - zer_queue_flag_t(ZER_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE | - ZER_QUEUE_FLAG_PROFILING_ENABLE)); - case ZER_DEVICE_INFO_EXECUTION_CAPABILITIES: - return ReturnValue(zer_device_exec_capability_flag_t{ - ZER_DEVICE_EXEC_CAPABILITY_FLAG_NATIVE_KERNEL}); - case ZER_DEVICE_INFO_ENDIAN_LITTLE: + ur_queue_flag_t(UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE | + UR_QUEUE_FLAG_PROFILING_ENABLE)); + case UR_DEVICE_INFO_EXECUTION_CAPABILITIES: + return ReturnValue(ur_device_exec_capability_flag_t{ + UR_DEVICE_EXEC_CAPABILITY_FLAG_NATIVE_KERNEL}); + case UR_DEVICE_INFO_ENDIAN_LITTLE: return ReturnValue(uint32_t{true}); - case ZER_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: + case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: return ReturnValue(uint32_t{Device->ZeDeviceProperties->flags & ZE_DEVICE_PROPERTY_FLAG_ECC}); - case ZER_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: + case UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: return ReturnValue(size_t{Device->ZeDeviceProperties->timerResolution}); - case ZER_DEVICE_INFO_LOCAL_MEM_TYPE: - return ReturnValue(ZER_DEVICE_LOCAL_MEM_TYPE_LOCAL); - case ZER_DEVICE_INFO_MAX_CONSTANT_ARGS: + case UR_DEVICE_INFO_LOCAL_MEM_TYPE: + return ReturnValue(UR_DEVICE_LOCAL_MEM_TYPE_LOCAL); + case UR_DEVICE_INFO_MAX_CONSTANT_ARGS: return ReturnValue(uint32_t{64}); - case ZER_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: + case UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: return ReturnValue( uint64_t{Device->ZeDeviceImageProperties->maxImageBufferSize}); - case ZER_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: - return ReturnValue(ZER_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE); - case ZER_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: + case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: + return ReturnValue(UR_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE); + case UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: return ReturnValue( // TODO[1.0]: how to query cache line-size? uint32_t{1}); - case ZER_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: + case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: return ReturnValue(uint64_t{Device->ZeDeviceCacheProperties->cacheSize}); - case ZER_DEVICE_INFO_MAX_PARAMETER_SIZE: + case UR_DEVICE_INFO_MAX_PARAMETER_SIZE: return ReturnValue( size_t{Device->ZeDeviceModuleProperties->maxArgumentsSize}); - case ZER_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: + case UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: // SYCL/OpenCL spec is vague on what this means exactly, but seems to // be for "alignment requirement (in bits) for sub-buffer offsets." // An OpenCL implementation returns 8*128, but Level Zero can do just 8, // meaning unaligned access for values of types larger than 8 bits. return ReturnValue(uint32_t{8}); - case ZER_DEVICE_INFO_MAX_SAMPLERS: + case UR_DEVICE_INFO_MAX_SAMPLERS: return ReturnValue(uint32_t{Device->ZeDeviceImageProperties->maxSamplers}); - case ZER_DEVICE_INFO_MAX_READ_IMAGE_ARGS: + case UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS: return ReturnValue( uint32_t{Device->ZeDeviceImageProperties->maxReadImageArgs}); - case ZER_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: + case UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: return ReturnValue( uint32_t{Device->ZeDeviceImageProperties->maxWriteImageArgs}); - case ZER_DEVICE_INFO_SINGLE_FP_CONFIG: { + case UR_DEVICE_INFO_SINGLE_FP_CONFIG: { uint64_t SingleFPValue = 0; ze_device_fp_flags_t ZeSingleFPCapabilities = Device->ZeDeviceModuleProperties->fp32flags; if (ZE_DEVICE_FP_FLAG_DENORM & ZeSingleFPCapabilities) { - SingleFPValue |= ZER_FP_CAPABILITY_FLAG_DENORM; + SingleFPValue |= UR_FP_CAPABILITY_FLAG_DENORM; } if (ZE_DEVICE_FP_FLAG_INF_NAN & ZeSingleFPCapabilities) { - SingleFPValue |= ZER_FP_CAPABILITY_FLAG_INF_NAN; + SingleFPValue |= UR_FP_CAPABILITY_FLAG_INF_NAN; } if (ZE_DEVICE_FP_FLAG_ROUND_TO_NEAREST & ZeSingleFPCapabilities) { - SingleFPValue |= ZER_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST; + SingleFPValue |= UR_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST; } if (ZE_DEVICE_FP_FLAG_ROUND_TO_ZERO & ZeSingleFPCapabilities) { - SingleFPValue |= ZER_FP_CAPABILITY_FLAG_ROUND_TO_ZERO; + SingleFPValue |= UR_FP_CAPABILITY_FLAG_ROUND_TO_ZERO; } if (ZE_DEVICE_FP_FLAG_ROUND_TO_INF & ZeSingleFPCapabilities) { - SingleFPValue |= ZER_FP_CAPABILITY_FLAG_ROUND_TO_INF; + SingleFPValue |= UR_FP_CAPABILITY_FLAG_ROUND_TO_INF; } if (ZE_DEVICE_FP_FLAG_FMA & ZeSingleFPCapabilities) { - SingleFPValue |= ZER_FP_CAPABILITY_FLAG_FMA; + SingleFPValue |= UR_FP_CAPABILITY_FLAG_FMA; } if (ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT & ZeSingleFPCapabilities) { - SingleFPValue |= ZER_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; + SingleFPValue |= UR_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; } return ReturnValue(uint64_t{SingleFPValue}); } - case ZER_DEVICE_INFO_HALF_FP_CONFIG: { + case UR_DEVICE_INFO_HALF_FP_CONFIG: { uint64_t HalfFPValue = 0; ze_device_fp_flags_t ZeHalfFPCapabilities = Device->ZeDeviceModuleProperties->fp16flags; if (ZE_DEVICE_FP_FLAG_DENORM & ZeHalfFPCapabilities) { - HalfFPValue |= ZER_FP_CAPABILITY_FLAG_DENORM; + HalfFPValue |= UR_FP_CAPABILITY_FLAG_DENORM; } if (ZE_DEVICE_FP_FLAG_INF_NAN & ZeHalfFPCapabilities) { - HalfFPValue |= ZER_FP_CAPABILITY_FLAG_INF_NAN; + HalfFPValue |= UR_FP_CAPABILITY_FLAG_INF_NAN; } if (ZE_DEVICE_FP_FLAG_ROUND_TO_NEAREST & ZeHalfFPCapabilities) { - HalfFPValue |= ZER_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST; + HalfFPValue |= UR_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST; } if (ZE_DEVICE_FP_FLAG_ROUND_TO_ZERO & ZeHalfFPCapabilities) { - HalfFPValue |= ZER_FP_CAPABILITY_FLAG_ROUND_TO_ZERO; + HalfFPValue |= UR_FP_CAPABILITY_FLAG_ROUND_TO_ZERO; } if (ZE_DEVICE_FP_FLAG_ROUND_TO_INF & ZeHalfFPCapabilities) { - HalfFPValue |= ZER_FP_CAPABILITY_FLAG_ROUND_TO_INF; + HalfFPValue |= UR_FP_CAPABILITY_FLAG_ROUND_TO_INF; } if (ZE_DEVICE_FP_FLAG_FMA & ZeHalfFPCapabilities) { - HalfFPValue |= ZER_FP_CAPABILITY_FLAG_FMA; + HalfFPValue |= UR_FP_CAPABILITY_FLAG_FMA; } if (ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT & ZeHalfFPCapabilities) { - HalfFPValue |= ZER_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; + HalfFPValue |= UR_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; } return ReturnValue(uint64_t{HalfFPValue}); } - case ZER_DEVICE_INFO_DOUBLE_FP_CONFIG: { + case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: { uint64_t DoubleFPValue = 0; ze_device_fp_flags_t ZeDoubleFPCapabilities = Device->ZeDeviceModuleProperties->fp64flags; if (ZE_DEVICE_FP_FLAG_DENORM & ZeDoubleFPCapabilities) { - DoubleFPValue |= ZER_FP_CAPABILITY_FLAG_DENORM; + DoubleFPValue |= UR_FP_CAPABILITY_FLAG_DENORM; } if (ZE_DEVICE_FP_FLAG_INF_NAN & ZeDoubleFPCapabilities) { - DoubleFPValue |= ZER_FP_CAPABILITY_FLAG_INF_NAN; + DoubleFPValue |= UR_FP_CAPABILITY_FLAG_INF_NAN; } if (ZE_DEVICE_FP_FLAG_ROUND_TO_NEAREST & ZeDoubleFPCapabilities) { - DoubleFPValue |= ZER_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST; + DoubleFPValue |= UR_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST; } if (ZE_DEVICE_FP_FLAG_ROUND_TO_ZERO & ZeDoubleFPCapabilities) { - DoubleFPValue |= ZER_FP_CAPABILITY_FLAG_ROUND_TO_ZERO; + DoubleFPValue |= UR_FP_CAPABILITY_FLAG_ROUND_TO_ZERO; } if (ZE_DEVICE_FP_FLAG_ROUND_TO_INF & ZeDoubleFPCapabilities) { - DoubleFPValue |= ZER_FP_CAPABILITY_FLAG_ROUND_TO_INF; + DoubleFPValue |= UR_FP_CAPABILITY_FLAG_ROUND_TO_INF; } if (ZE_DEVICE_FP_FLAG_FMA & ZeDoubleFPCapabilities) { - DoubleFPValue |= ZER_FP_CAPABILITY_FLAG_FMA; + DoubleFPValue |= UR_FP_CAPABILITY_FLAG_FMA; } if (ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT & ZeDoubleFPCapabilities) { - DoubleFPValue |= ZER_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; + DoubleFPValue |= UR_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; } return ReturnValue(uint64_t{DoubleFPValue}); } - case ZER_DEVICE_INFO_IMAGE2D_MAX_WIDTH: + case UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH: return ReturnValue(size_t{Device->ZeDeviceImageProperties->maxImageDims2D}); - case ZER_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: + case UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: return ReturnValue(size_t{Device->ZeDeviceImageProperties->maxImageDims2D}); - case ZER_DEVICE_INFO_IMAGE3D_MAX_WIDTH: + case UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH: return ReturnValue(size_t{Device->ZeDeviceImageProperties->maxImageDims3D}); - case ZER_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: + case UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: return ReturnValue(size_t{Device->ZeDeviceImageProperties->maxImageDims3D}); - case ZER_DEVICE_INFO_IMAGE3D_MAX_DEPTH: + case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH: return ReturnValue(size_t{Device->ZeDeviceImageProperties->maxImageDims3D}); - case ZER_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: + case UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: return ReturnValue( size_t{Device->ZeDeviceImageProperties->maxImageBufferSize}); - case ZER_EXT_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: + case UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: return ReturnValue( size_t{Device->ZeDeviceImageProperties->maxImageArraySlices}); // Handle SIMD widths. // TODO: can we do better than this? - case ZER_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: - case ZER_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: return ReturnValue(Device->ZeDeviceProperties->physicalEUSimdWidth / 1); - case ZER_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT: - case ZER_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT: return ReturnValue(Device->ZeDeviceProperties->physicalEUSimdWidth / 2); - case ZER_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT: - case ZER_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT: return ReturnValue(Device->ZeDeviceProperties->physicalEUSimdWidth / 4); - case ZER_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG: - case ZER_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG: return ReturnValue(Device->ZeDeviceProperties->physicalEUSimdWidth / 8); - case ZER_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT: - case ZER_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT: return ReturnValue(Device->ZeDeviceProperties->physicalEUSimdWidth / 4); - case ZER_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: - case ZER_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: return ReturnValue(Device->ZeDeviceProperties->physicalEUSimdWidth / 8); - case ZER_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: - case ZER_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: return ReturnValue(Device->ZeDeviceProperties->physicalEUSimdWidth / 2); - case ZER_DEVICE_INFO_MAX_NUM_SUB_GROUPS: { + case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: { // Max_num_sub_Groups = maxTotalGroupSize/min(set of subGroupSizes); uint32_t MinSubGroupSize = Device->ZeDeviceComputeProperties->subGroupSizes[0]; @@ -978,18 +982,18 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( return ReturnValue(Device->ZeDeviceComputeProperties->maxTotalGroupSize / MinSubGroupSize); } - case ZER_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: { + case UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: { // TODO: Not supported yet. Needs to be updated after support is added. return ReturnValue(uint32_t{false}); } - case ZER_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: { + case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: { // ze_device_compute_properties.subGroupSizes is in uint32_t whereas the // expected return is size_t datatype. size_t can be 8 bytes of data. return ReturnValue.template operator()( Device->ZeDeviceComputeProperties->subGroupSizes, Device->ZeDeviceComputeProperties->numSubGroupSizes); } - case ZER_DEVICE_INFO_IL_VERSION: { + case UR_DEVICE_INFO_IL_VERSION: { // Set to a space separated list of IL version strings of the form // _.. // "SPIR-V" is a required IL prefix when cl_khr_il_progam extension is @@ -1006,34 +1010,34 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( std::string ILVersion(SpirvVersionString, Len); return ReturnValue(ILVersion.c_str()); } - case ZER_DEVICE_INFO_USM_HOST_SUPPORT: - case ZER_DEVICE_INFO_USM_DEVICE_SUPPORT: - case ZER_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: - case ZER_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: - case ZER_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: { + case UR_DEVICE_INFO_USM_HOST_SUPPORT: + case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: + case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: + case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: + case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: { auto MapCaps = [](const ze_memory_access_cap_flags_t &ZeCapabilities) { uint64_t Capabilities = 0; if (ZeCapabilities & ZE_MEMORY_ACCESS_CAP_FLAG_RW) - Capabilities |= ZER_EXT_USM_CAPS_ACCESS; + Capabilities |= UR_EXT_USM_CAPS_ACCESS; if (ZeCapabilities & ZE_MEMORY_ACCESS_CAP_FLAG_ATOMIC) - Capabilities |= ZER_EXT_USM_CAPS_ATOMIC_ACCESS; + Capabilities |= UR_EXT_USM_CAPS_ATOMIC_ACCESS; if (ZeCapabilities & ZE_MEMORY_ACCESS_CAP_FLAG_CONCURRENT) - Capabilities |= ZER_EXT_USM_CAPS_CONCURRENT_ACCESS; + Capabilities |= UR_EXT_USM_CAPS_CONCURRENT_ACCESS; if (ZeCapabilities & ZE_MEMORY_ACCESS_CAP_FLAG_CONCURRENT_ATOMIC) - Capabilities |= ZER_EXT_USM_CAPS_CONCURRENT_ATOMIC_ACCESS; + Capabilities |= UR_EXT_USM_CAPS_CONCURRENT_ATOMIC_ACCESS; return Capabilities; }; auto &Props = Device->ZeDeviceMemoryAccessProperties; switch (ParamName) { - case ZER_DEVICE_INFO_USM_HOST_SUPPORT: + case UR_DEVICE_INFO_USM_HOST_SUPPORT: return ReturnValue(MapCaps(Props->hostAllocCapabilities)); - case ZER_DEVICE_INFO_USM_DEVICE_SUPPORT: + case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: return ReturnValue(MapCaps(Props->deviceAllocCapabilities)); - case ZER_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: + case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: return ReturnValue(MapCaps(Props->sharedSingleDeviceAllocCapabilities)); - case ZER_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: + case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: return ReturnValue(MapCaps(Props->sharedCrossDeviceAllocCapabilities)); - case ZER_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: + case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: return ReturnValue(MapCaps(Props->sharedSystemAllocCapabilities)); default: die("piDeviceGetInfo: enexpected ParamName."); @@ -1041,12 +1045,12 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( } // intel extensions for GPU information - case ZER_EXT_DEVICE_INFO_DEVICE_ID: + case UR_DEVICE_INFO_DEVICE_ID: return ReturnValue(uint32_t{Device->ZeDeviceProperties->deviceId}); - case ZER_DEVICE_INFO_PCI_ADDRESS: { + case UR_DEVICE_INFO_PCI_ADDRESS: { if (getenv("ZES_ENABLE_SYSMAN") == nullptr) { zePrint("Set SYCL_ENABLE_PCI=1 to obtain PCI data.\n"); - return ZER_RESULT_INVALID_VALUE; + return UR_RESULT_ERROR_INVALID_VALUE; } ZesStruct ZeDevicePciProperties; ZE_CALL(zesDevicePciGetProperties, (ZeDevice, &ZeDevicePciProperties)); @@ -1060,11 +1064,11 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( return ReturnValue(AddressBuffer); } - case ZER_EXT_DEVICE_INFO_FREE_MEMORY: { + case UR_EXT_DEVICE_INFO_FREE_MEMORY: { if (getenv("ZES_ENABLE_SYSMAN") == nullptr) { setErrorMessage("Set ZES_ENABLE_SYSMAN=1 to obtain free memory", - ZER_RESULT_SUCCESS); - return ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR; + UR_RESULT_SUCCESS); + return UR_EXT_RESULT_ADAPTER_SPECIFIC_ERROR; } // Only report device memory which zeMemAllocDevice can allocate from. // Currently this is only the one enumerated with ordinal 0. @@ -1092,7 +1096,7 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( } return ReturnValue(FreeMemory); } - case ZER_EXT_DEVICE_INFO_MEMORY_CLOCK_RATE: { + case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: { // If there are not any memory modules then return 0. if (Device->ZeDeviceMemoryProperties->first.empty()) return ReturnValue(uint32_t{0}); @@ -1108,7 +1112,7 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( Device->ZeDeviceMemoryProperties->first.end(), Comp); return ReturnValue(uint32_t{MinIt->maxClockRate}); } - case ZER_EXT_DEVICE_INFO_MEMORY_BUS_WIDTH: { + case UR_EXT_DEVICE_INFO_MEMORY_BUS_WIDTH: { // If there are not any memory modules then return 0. if (Device->ZeDeviceMemoryProperties->first.empty()) return ReturnValue(uint32_t{0}); @@ -1124,52 +1128,52 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( Device->ZeDeviceMemoryProperties->first.end(), Comp); return ReturnValue(uint32_t{MinIt->maxBusWidth}); } - case ZER_EXT_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: { - if (Device->QueueGroup[_zer_device_handle_t::queue_group_info_t::Compute] + case UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: { + if (Device->QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute] .ZeIndex >= 0) // Sub-sub-device represents a particular compute index already. return ReturnValue(int32_t{1}); auto ZeDeviceNumIndices = - Device->QueueGroup[_zer_device_handle_t::queue_group_info_t::Compute] + Device->QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute] .ZeProperties.numQueues; return ReturnValue(int32_t(ZeDeviceNumIndices)); } break; - case ZER_DEVICE_INFO_GPU_EU_COUNT: { + case UR_DEVICE_INFO_GPU_EU_COUNT: { uint32_t count = Device->ZeDeviceProperties->numEUsPerSubslice * Device->ZeDeviceProperties->numSubslicesPerSlice * Device->ZeDeviceProperties->numSlices; return ReturnValue(uint32_t{count}); } - case ZER_DEVICE_INFO_GPU_EU_SIMD_WIDTH: + case UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH: return ReturnValue( uint32_t{Device->ZeDeviceProperties->physicalEUSimdWidth}); - case ZER_EXT_DEVICE_INFO_GPU_SLICES: + case UR_EXT_DEVICE_INFO_GPU_SLICES: return ReturnValue(uint32_t{Device->ZeDeviceProperties->numSlices}); - case ZER_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: + case UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: return ReturnValue( uint32_t{Device->ZeDeviceProperties->numSubslicesPerSlice}); - case ZER_EXT_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: + case UR_EXT_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: return ReturnValue(uint32_t{Device->ZeDeviceProperties->numEUsPerSubslice}); - case ZER_EXT_DEVICE_INFO_GPU_HW_THREADS_PER_EU: + case UR_EXT_DEVICE_INFO_GPU_HW_THREADS_PER_EU: return ReturnValue(uint32_t{Device->ZeDeviceProperties->numThreadsPerEU}); - case ZER_EXT_DEVICE_INFO_MAX_MEM_BANDWIDTH: + case UR_EXT_DEVICE_INFO_MAX_MEM_BANDWIDTH: // currently not supported in level zero runtime - return ZER_RESULT_INVALID_VALUE; - case ZER_EXT_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS: { + return UR_RESULT_ERROR_INVALID_VALUE; + case UR_DEVICE_INFO_BFLOAT16: { // bfloat16 math functions are not yet supported on Intel GPUs. return ReturnValue(bool{false}); } // TODO: Implement. - case ZER_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: default: zePrint("Unsupported ParamName in piGetDeviceInfo\n"); zePrint("ParamName=%d(0x%x)\n", ParamName, ParamName); - return ZER_RESULT_INVALID_VALUE; + return UR_RESULT_ERROR_INVALID_VALUE; } - return ZER_RESULT_SUCCESS; + return UR_RESULT_SUCCESS; } // SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE can be set to an integer value, or @@ -1181,7 +1185,7 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( // the copy engines will not be used at all. A value of 1 indicates that all // available copy engines can be used. const std::pair -getRangeOfAllowedCopyEngines(const zer_device_handle_t &Device) { +getRangeOfAllowedCopyEngines(const ur_device_handle_t &Device) { static const char *EnvVar = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE"); // If the environment variable is not set, no copy engines are used when // immediate commandlists are being used. For standard commandlists all are @@ -1213,7 +1217,7 @@ getRangeOfAllowedCopyEngines(const zer_device_handle_t &Device) { return std::pair(LowerCopyEngineIndex, UpperCopyEngineIndex); } -bool CopyEngineRequested(const zer_device_handle_t &Device) { +bool CopyEngineRequested(const ur_device_handle_t &Device) { int LowerCopyQueueIndex = getRangeOfAllowedCopyEngines(Device).first; int UpperCopyQueueIndex = getRangeOfAllowedCopyEngines(Device).second; return ((LowerCopyQueueIndex != -1) || (UpperCopyQueueIndex != -1)); @@ -1252,13 +1256,13 @@ _ur_device_handle_t::useImmediateCommandLists() { } } -zer_result_t _ur_device_handle_t::initialize(int SubSubDeviceOrdinal, - int SubSubDeviceIndex) { +ur_result_t _ur_device_handle_t::initialize(int SubSubDeviceOrdinal, + int SubSubDeviceIndex) { uint32_t numQueueGroups = 0; ZE_CALL(zeDeviceGetCommandQueueGroupProperties, (ZeDevice, &numQueueGroups, nullptr)); if (numQueueGroups == 0) { - return ZER_RESULT_ERROR_UNKNOWN; + return UR_RESULT_ERROR_UNKNOWN; } zePrint("NOTE: Number of queue groups = %d\n", numQueueGroups); std::vector> @@ -1270,9 +1274,9 @@ zer_result_t _ur_device_handle_t::initialize(int SubSubDeviceOrdinal, for (uint32_t i = 0; i < numQueueGroups; i++) { if (QueueGroupProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { - QueueGroup[_zer_device_handle_t::queue_group_info_t::Compute].ZeOrdinal = + QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute].ZeOrdinal = i; - QueueGroup[_zer_device_handle_t::queue_group_info_t::Compute] + QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute] .ZeProperties = QueueGroupProperties[i]; break; } @@ -1283,18 +1287,18 @@ zer_result_t _ur_device_handle_t::initialize(int SubSubDeviceOrdinal, // handle + Level-Zero compute group/engine index]. Only the specified // index queue will be used to submit work to the sub-sub-device. if (SubSubDeviceOrdinal >= 0) { - QueueGroup[_zer_device_handle_t::queue_group_info_t::Compute].ZeOrdinal = + QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute].ZeOrdinal = SubSubDeviceOrdinal; - QueueGroup[_zer_device_handle_t::queue_group_info_t::Compute].ZeIndex = + QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute].ZeIndex = SubSubDeviceIndex; } else { // Proceed with initialization for root and sub-device // How is it possible that there are no "compute" capabilities? - if (QueueGroup[_zer_device_handle_t::queue_group_info_t::Compute] - .ZeOrdinal < 0) { - return ZER_RESULT_ERROR_UNKNOWN; + if (QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute].ZeOrdinal < + 0) { + return UR_RESULT_ERROR_UNKNOWN; } - if (CopyEngineRequested((zer_device_handle_t)this)) { + if (CopyEngineRequested((ur_device_handle_t)this)) { for (uint32_t i = 0; i < numQueueGroups; i++) { if (((QueueGroupProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) == 0) && @@ -1396,16 +1400,16 @@ zer_result_t _ur_device_handle_t::initialize(int SubSubDeviceOrdinal, ImmCommandListsPreferred = false; // (ZeDeviceProperties->deviceId & 0xff0) == 0xbd0; - return ZER_RESULT_SUCCESS; + return UR_RESULT_SUCCESS; } // Get the cached PI device created for the L0 device handle. // Return NULL if no such PI device found. -zer_device_handle_t +ur_device_handle_t _ur_platform_handle_t::getDeviceFromNativeHandle(ze_device_handle_t ZeDevice) { - zer_result_t Res = populateDeviceCacheIfNeeded(); - if (Res != ZER_RESULT_SUCCESS) { + ur_result_t Res = populateDeviceCacheIfNeeded(); + if (Res != UR_RESULT_SUCCESS) { return nullptr; } @@ -1416,7 +1420,7 @@ _ur_platform_handle_t::getDeviceFromNativeHandle(ze_device_handle_t ZeDevice) { // filter out PI sub-sub-devices. std::shared_lock Lock(PiDevicesCacheMutex); auto it = std::find_if(PiDevicesCache.begin(), PiDevicesCache.end(), - [&](std::unique_ptr<_zer_device_handle_t> &D) { + [&](std::unique_ptr &D) { return D.get()->ZeDevice == ZeDevice && (D.get()->RootDevice == nullptr || D.get()->RootDevice->RootDevice == nullptr); @@ -1428,11 +1432,11 @@ _ur_platform_handle_t::getDeviceFromNativeHandle(ze_device_handle_t ZeDevice) { } // Check the device cache and load it if necessary. -zer_result_t _ur_platform_handle_t::populateDeviceCacheIfNeeded() { +ur_result_t _ur_platform_handle_t::populateDeviceCacheIfNeeded() { std::scoped_lock Lock(PiDevicesCacheMutex); if (DeviceCachePopulated) { - return ZER_RESULT_SUCCESS; + return UR_RESULT_SUCCESS; } uint32_t ZeDeviceCount = 0; @@ -1443,10 +1447,10 @@ zer_result_t _ur_platform_handle_t::populateDeviceCacheIfNeeded() { ZE_CALL(zeDeviceGet, (ZeDriver, &ZeDeviceCount, ZeDevices.data())); for (uint32_t I = 0; I < ZeDeviceCount; ++I) { - std::unique_ptr<_zer_device_handle_t> Device( - new _zer_device_handle_t(ZeDevices[I], (zer_platform_handle_t)this)); + std::unique_ptr Device( + new ur_device_handle_t_(ZeDevices[I], (ur_platform_handle_t)this)); auto Result = Device->initialize(); - if (Result != ZER_RESULT_SUCCESS) { + if (Result != UR_RESULT_SUCCESS) { return Result; } @@ -1464,11 +1468,11 @@ zer_result_t _ur_platform_handle_t::populateDeviceCacheIfNeeded() { // Wrap the Level Zero sub-devices into PI sub-devices, and add them to // cache. for (uint32_t I = 0; I < SubDevicesCount; ++I) { - std::unique_ptr<_zer_device_handle_t> PiSubDevice( - new _zer_device_handle_t( - ZeSubdevices[I], (zer_platform_handle_t)this, Device.get())); + std::unique_ptr PiSubDevice( + new ur_device_handle_t_(ZeSubdevices[I], (ur_platform_handle_t)this, + Device.get())); auto Result = PiSubDevice->initialize(); - if (Result != ZER_RESULT_SUCCESS) { + if (Result != UR_RESULT_SUCCESS) { delete[] ZeSubdevices; return Result; } @@ -1480,7 +1484,7 @@ zer_result_t _ur_platform_handle_t::populateDeviceCacheIfNeeded() { ZE_CALL(zeDeviceGetCommandQueueGroupProperties, (PiSubDevice->ZeDevice, &numQueueGroups, nullptr)); if (numQueueGroups == 0) { - return ZER_RESULT_ERROR_UNKNOWN; + return UR_RESULT_ERROR_UNKNOWN; } std::vector QueueGroupProperties( numQueueGroups); @@ -1511,12 +1515,12 @@ zer_result_t _ur_platform_handle_t::populateDeviceCacheIfNeeded() { for (uint32_t J = 0; J < Ordinals.size(); ++J) { for (uint32_t K = 0; K < QueueGroupProperties[Ordinals[J]].numQueues; ++K) { - std::unique_ptr<_zer_device_handle_t> PiSubSubDevice( - new _zer_device_handle_t(ZeSubdevices[I], - (zer_platform_handle_t)this, - PiSubDevice.get())); + std::unique_ptr PiSubSubDevice( + new ur_device_handle_t_(ZeSubdevices[I], + (ur_platform_handle_t)this, + PiSubDevice.get())); auto Result = PiSubSubDevice->initialize(Ordinals[J], K); - if (Result != ZER_RESULT_SUCCESS) { + if (Result != UR_RESULT_SUCCESS) { return Result; } @@ -1538,26 +1542,26 @@ zer_result_t _ur_platform_handle_t::populateDeviceCacheIfNeeded() { PiDevicesCache.push_back(std::move(Device)); } } catch (const std::bad_alloc &) { - return ZER_RESULT_OUT_OF_HOST_MEMORY; + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { - return ZER_RESULT_ERROR_UNKNOWN; + return UR_RESULT_ERROR_UNKNOWN; } DeviceCachePopulated = true; - return ZER_RESULT_SUCCESS; + return UR_RESULT_SUCCESS; } -zer_result_t zerDeviceGetReference(zer_device_handle_t Device) { - PI_ASSERT(Device, ZER_RESULT_INVALID_DEVICE); +ur_result_t urDeviceRetain(ur_device_handle_t Device) { + PI_ASSERT(Device, UR_RESULT_ERROR_INVALID_NULL_HANDLE); // The root-device ref-count remains unchanged (always 1). if (Device->isSubDevice()) { Device->RefCount.increment(); } - return ZER_RESULT_SUCCESS; + return UR_RESULT_SUCCESS; } -zer_result_t zerDeviceRelease(zer_device_handle_t Device) { - PI_ASSERT(Device, ZER_RESULT_INVALID_DEVICE); +ur_result_t urDeviceRelease(ur_device_handle_t Device) { + PI_ASSERT(Device, UR_RESULT_ERROR_INVALID_NULL_HANDLE); // Root devices are destroyed during the piTearDown process. if (Device->isSubDevice()) { @@ -1566,49 +1570,80 @@ zer_result_t zerDeviceRelease(zer_device_handle_t Device) { } } - return ZER_RESULT_SUCCESS; + return UR_RESULT_SUCCESS; +} + +void ZeUSMImportExtension::setZeUSMImport(_ur_platform_handle_t *Platform) { + // Whether env var SYCL_USM_HOSTPTR_IMPORT has been set requesting + // host ptr import during buffer creation. + const char *USMHostPtrImportStr = std::getenv("SYCL_USM_HOSTPTR_IMPORT"); + if (!USMHostPtrImportStr || std::atoi(USMHostPtrImportStr) == 0) + return; + + // Check if USM hostptr import feature is available. + ze_driver_handle_t DriverHandle = Platform->ZeDriver; + if (ZE_CALL_NOCHECK( + zeDriverGetExtensionFunctionAddress, + (DriverHandle, "zexDriverImportExternalPointer", + reinterpret_cast(&zexDriverImportExternalPointer))) == 0) { + ZE_CALL_NOCHECK( + zeDriverGetExtensionFunctionAddress, + (DriverHandle, "zexDriverReleaseImportedPointer", + reinterpret_cast(&zexDriverReleaseImportedPointer))); + // Hostptr import/release is turned on because it has been requested + // by the env var, and this platform supports the APIs. + Enabled = true; + // Hostptr import is only possible if piMemBufferCreate receives a + // hostptr as an argument. The SYCL runtime passes a host ptr + // only when SYCL_HOST_UNIFIED_MEMORY is enabled. Therefore we turn it on. + setEnvVar("SYCL_HOST_UNIFIED_MEMORY", "1"); + } +} +void ZeUSMImportExtension::doZeUSMImport(ze_driver_handle_t DriverHandle, + void *HostPtr, size_t Size) { + ZE_CALL_NOCHECK(zexDriverImportExternalPointer, + (DriverHandle, HostPtr, Size)); +} +void ZeUSMImportExtension::doZeUSMRelease(ze_driver_handle_t DriverHandle, + void *HostPtr) { + ZE_CALL_NOCHECK(zexDriverReleaseImportedPointer, (DriverHandle, HostPtr)); } -ZER_APIEXPORT zer_result_t ZER_APICALL zerDevicePartition( - zer_device_handle_t Device, ///< [in] handle of the device to partition. - zer_device_partition_property_value_t - *Properties, ///< [in] null-terminated array of pair - ///< of the requested partitioning. - uint32_t - *NumDevices, ///< [in,out] pointer to the number of sub-devices. - ///< If count is zero, then the function shall update the - ///< value with the total number of sub-devices available. - ///< If count is greater than the number of sub-devices - ///< available, then the function shall update the value - ///< with the correct number of sub-devices available. - zer_device_handle_t - *OutDevices ///< [out][optional][range(0, *pCount)] array of handle of - ///< devices. If count is less than the number of - ///< sub-devices available, then the function shall only - ///< retrieve that number of sub-devices. +UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( + ur_device_handle_t Device, ///< [in] handle of the device to partition. + const ur_device_partition_property_t + *Properties, ///< [in] null-terminated array of <$_device_partition_t + ///< enum, value> pairs. + uint32_t NumDevices, ///< [in] the number of sub-devices. + ur_device_handle_t + *OutDevices, ///< [out][optional][range(0, NumDevices)] array of handle + ///< of devices. If NumDevices is less than the number of + ///< sub-devices available, then the function shall only + ///< retrieve that number of sub-devices. + uint32_t *pNumDevicesRet ///< [out][optional] pointer to the number of + ///< sub-devices the device can be partitioned into + ///< according to the partitioning property. ) { - PI_ASSERT(NumDevices, ZER_RESULT_INVALID_VALUE); - PI_ASSERT(Device, ZER_RESULT_INVALID_DEVICE); + PI_ASSERT(Device, UR_RESULT_ERROR_INVALID_NULL_HANDLE); // Other partitioning ways are not supported by Level Zero - if (Properties->property == - ZER_DEVICE_PARTITION_PROPERTY_FLAG_BY_AFFINITY_DOMAIN) { - if ((Properties->value != - ZER_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE && - Properties->value != ZER_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA)) - return ZER_RESULT_INVALID_VALUE; - } else if (Properties->property == - ZER_EXT_DEVICE_PARTITION_PROPERTY_FLAG_BY_CSLICE) { - if (Properties->value != 0) - return ZER_RESULT_INVALID_VALUE; + if (Properties[0] == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { + if ((Properties[1] != UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE && + Properties[1] != UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA)) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + } else if (Properties[0] == UR_EXT_DEVICE_PARTITION_PROPERTY_FLAG_BY_CSLICE) { + if (Properties[1] != 0) { + return UR_RESULT_ERROR_INVALID_VALUE; + } } else { - return ZER_RESULT_INVALID_VALUE; + return UR_RESULT_ERROR_INVALID_VALUE; } // Devices cache is normally created in piDevicesGet but still make // sure that cache is populated. // auto Res = Device->Platform->populateDeviceCacheIfNeeded(); - if (Res != ZER_RESULT_SUCCESS) { + if (Res != UR_RESULT_SUCCESS) { return Res; } @@ -1621,35 +1656,37 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDevicePartition( // SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING overrides that // still expose CSlices in partitioning by affinity domain for compatibility // reasons. - if (Properties->property == - ZER_DEVICE_PARTITION_PROPERTY_FLAG_BY_AFFINITY_DOMAIN && + if (Properties[0] == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN && !ExposeCSliceInAffinityPartitioning) { - if (Device->isSubDevice()) + if (Device->isSubDevice()) { return 0; + } } - if (Properties->property == - ZER_EXT_DEVICE_PARTITION_PROPERTY_FLAG_BY_CSLICE) { + if (Properties[0] == UR_EXT_DEVICE_PARTITION_PROPERTY_FLAG_BY_CSLICE) { // Not a CSlice-based partitioning. - if (!Device->SubDevices[0]->isCCS()) + if (!Device->SubDevices[0]->isCCS()) { return 0; + } } return Device->SubDevices.size(); }(); - if (*NumDevices) { - // TODO: Consider support for partitioning to <= total sub-devices. - // Currently supported partitioning (by affinity domain/numa) would always - // partition to all sub-devices. - // - PI_ASSERT(*NumDevices == EffectiveNumDevices, ZER_RESULT_INVALID_VALUE); + // TODO: Consider support for partitioning to <= total sub-devices. + // Currently supported partitioning (by affinity domain/numa) would always + // partition to all sub-devices. + // + if (NumDevices !=0) + PI_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE); - for (uint32_t I = 0; I < *NumDevices; I++) { - OutDevices[I] = Device->SubDevices[I]; - // reusing the same pi_device needs to increment the reference count - zerDeviceGetReference(OutDevices[I]); - } + for (uint32_t I = 0; I < NumDevices; I++) { + OutDevices[I] = Device->SubDevices[I]; + // reusing the same pi_device needs to increment the reference count + urDeviceRetain(OutDevices[I]); + } + + if (pNumDevicesRet) { + *pNumDevicesRet = EffectiveNumDevices; } - *NumDevices = EffectiveNumDevices; - return ZER_RESULT_SUCCESS; + return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.hpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.hpp index ed90716a5a987..dfd0f272390f8 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.hpp @@ -16,8 +16,8 @@ #include #include +#include #include -#include #include // Returns the ze_structure_type_t to use in .stype of a structured descriptor. @@ -81,40 +81,41 @@ class ZeCall { }; // Map Level Zero runtime error code to UR error code. -static zer_result_t ze2urResult(ze_result_t ZeResult) { - static std::unordered_map ErrorMapping = { - {ZE_RESULT_SUCCESS, ZER_RESULT_SUCCESS}, - {ZE_RESULT_ERROR_DEVICE_LOST, ZER_RESULT_ERROR_DEVICE_LOST}, - {ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, ZER_RESULT_INVALID_OPERATION}, - {ZE_RESULT_ERROR_NOT_AVAILABLE, ZER_RESULT_INVALID_OPERATION}, - {ZE_RESULT_ERROR_UNINITIALIZED, ZER_RESULT_INVALID_PLATFORM}, - {ZE_RESULT_ERROR_INVALID_ARGUMENT, ZER_RESULT_ERROR_INVALID_ARGUMENT}, - {ZE_RESULT_ERROR_INVALID_NULL_POINTER, ZER_RESULT_INVALID_VALUE}, - {ZE_RESULT_ERROR_INVALID_SIZE, ZER_RESULT_INVALID_VALUE}, - {ZE_RESULT_ERROR_UNSUPPORTED_SIZE, ZER_RESULT_INVALID_VALUE}, - {ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT, ZER_RESULT_INVALID_VALUE}, +static ur_result_t ze2urResult(ze_result_t ZeResult) { + static std::unordered_map ErrorMapping = { + {ZE_RESULT_SUCCESS, UR_RESULT_SUCCESS}, + {ZE_RESULT_ERROR_DEVICE_LOST, UR_RESULT_ERROR_DEVICE_LOST}, + {ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, + UR_RESULT_ERROR_INVALID_OPERATION}, + {ZE_RESULT_ERROR_NOT_AVAILABLE, UR_RESULT_ERROR_INVALID_OPERATION}, + {ZE_RESULT_ERROR_UNINITIALIZED, UR_RESULT_ERROR_INVALID_PLATFORM}, + {ZE_RESULT_ERROR_INVALID_ARGUMENT, UR_RESULT_ERROR_INVALID_ARGUMENT}, + {ZE_RESULT_ERROR_INVALID_NULL_POINTER, UR_RESULT_ERROR_INVALID_VALUE}, + {ZE_RESULT_ERROR_INVALID_SIZE, UR_RESULT_ERROR_INVALID_VALUE}, + {ZE_RESULT_ERROR_UNSUPPORTED_SIZE, UR_RESULT_ERROR_INVALID_VALUE}, + {ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT, UR_RESULT_ERROR_INVALID_VALUE}, {ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT, - ZER_RESULT_INVALID_EVENT}, - {ZE_RESULT_ERROR_INVALID_ENUMERATION, ZER_RESULT_INVALID_VALUE}, - {ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, ZER_RESULT_INVALID_VALUE}, - {ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT, ZER_RESULT_INVALID_VALUE}, - {ZE_RESULT_ERROR_INVALID_NATIVE_BINARY, ZER_RESULT_INVALID_BINARY}, - {ZE_RESULT_ERROR_INVALID_KERNEL_NAME, ZER_RESULT_INVALID_KERNEL_NAME}, + UR_RESULT_ERROR_INVALID_EVENT}, + {ZE_RESULT_ERROR_INVALID_ENUMERATION, UR_RESULT_ERROR_INVALID_VALUE}, + {ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, UR_RESULT_ERROR_INVALID_VALUE}, + {ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT, UR_RESULT_ERROR_INVALID_VALUE}, + {ZE_RESULT_ERROR_INVALID_NATIVE_BINARY, UR_RESULT_ERROR_INVALID_BINARY}, + {ZE_RESULT_ERROR_INVALID_KERNEL_NAME, + UR_RESULT_ERROR_INVALID_KERNEL_NAME}, {ZE_RESULT_ERROR_INVALID_FUNCTION_NAME, - ZER_RESULT_ERROR_INVALID_FUNCTION_NAME}, - {ZE_RESULT_ERROR_OVERLAPPING_REGIONS, ZER_RESULT_INVALID_OPERATION}, + UR_RESULT_ERROR_INVALID_FUNCTION_NAME}, + {ZE_RESULT_ERROR_OVERLAPPING_REGIONS, UR_RESULT_ERROR_INVALID_OPERATION}, {ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, - ZER_RESULT_INVALID_WORK_GROUP_SIZE}, + UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE}, {ZE_RESULT_ERROR_MODULE_BUILD_FAILURE, - ZER_RESULT_ERROR_MODULE_BUILD_FAILURE}, + UR_RESULT_ERROR_MODULE_BUILD_FAILURE}, {ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, - ZER_RESULT_ERROR_OUT_OF_DEVICE_MEMORY}, - {ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, - ZER_RESULT_ERROR_OUT_OF_HOST_MEMORY}}; + UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY}, + {ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, UR_RESULT_ERROR_OUT_OF_HOST_MEMORY}}; auto It = ErrorMapping.find(ZeResult); if (It == ErrorMapping.end()) { - return ZER_RESULT_ERROR_UNKNOWN; + return UR_RESULT_ERROR_UNKNOWN; } return It->second; } @@ -145,14 +146,14 @@ bool setEnvVar(const char *name, const char *value); ZeCall().doCall(ZeName ZeArgs, #ZeName, #ZeArgs, false) struct _ur_platform_handle_t; -using ur_platform_handle_t = _ur_platform_handle_t *; +// using ur_platform_handle_t = _ur_platform_handle_t *; struct _ur_device_handle_t; -using ur_device_handle_t = _ur_device_handle_t *; +// using ur_device_handle_t = _ur_device_handle_t *; struct _ur_platform_handle_t : public _ur_platform { _ur_platform_handle_t(ze_driver_handle_t Driver) : ZeDriver{Driver} {} // Performs initialization of a newly constructed PI platform. - zer_result_t initialize(); + ur_result_t initialize(); // Level Zero lacks the notion of a platform, but there is a driver, which is // a pretty good fit to keep here. @@ -171,21 +172,21 @@ struct _ur_platform_handle_t : public _ur_platform { bool ZeDriverModuleProgramExtensionFound{false}; // Cache UR devices for reuse - std::vector> PiDevicesCache; + std::vector> PiDevicesCache; pi_shared_mutex PiDevicesCacheMutex; bool DeviceCachePopulated = false; // Check the device cache and load it if necessary. - zer_result_t populateDeviceCacheIfNeeded(); + ur_result_t populateDeviceCacheIfNeeded(); // Return the PI device from cache that represents given native device. // If not found, then nullptr is returned. - zer_device_handle_t getDeviceFromNativeHandle(ze_device_handle_t); + ur_device_handle_t getDeviceFromNativeHandle(ze_device_handle_t); }; struct _ur_device_handle_t : _pi_object { - _ur_device_handle_t(ze_device_handle_t Device, zer_platform_handle_t Plt, - zer_device_handle_t ParentDevice = nullptr) + _ur_device_handle_t(ze_device_handle_t Device, ur_platform_handle_t Plt, + ur_device_handle_t ParentDevice = nullptr) : ZeDevice{Device}, Platform{Plt}, RootDevice{ParentDevice}, ImmCommandListsPreferred{false}, ZeDeviceProperties{}, ZeDeviceComputeProperties{} { @@ -240,8 +241,8 @@ struct _ur_device_handle_t : _pi_object { // Optional param `SubSubDeviceOrdinal` `SubSubDeviceIndex` are the compute // command queue ordinal and index respectively, used to initialize // sub-sub-devices. - zer_result_t initialize(int SubSubDeviceOrdinal = -1, - int SubSubDeviceIndex = -1); + ur_result_t initialize(int SubSubDeviceOrdinal = -1, + int SubSubDeviceIndex = -1); // Level Zero device handle. // This field is only set at _ur_device_handle_t creation time, and cannot @@ -253,19 +254,19 @@ struct _ur_device_handle_t : _pi_object { // reuse The order of sub-devices in this vector is repeated from the // ze_device_handle_t array that are returned from zeDeviceGetSubDevices() // call, which will always return sub-devices in the fixed same order. - std::vector SubDevices; + std::vector SubDevices; // PI platform to which this device belongs. // This field is only set at _ur_device_handle_t creation time, and cannot // change. Therefore it can be accessed without holding a lock on this // _ur_device_handle_t. - zer_platform_handle_t Platform; + ur_platform_handle_t Platform; // Root-device of a sub-device, null if this is not a sub-device. // This field is only set at _ur_device_handle_t creation time, and cannot // change. Therefore it can be accessed without holding a lock on this // _ur_device_handle_t. - const zer_device_handle_t RootDevice; + const ur_device_handle_t RootDevice; // Whether to use immediate commandlists for queues on this device. // For some devices (e.g. PVC) immediate commandlists are preferred. @@ -307,9 +308,9 @@ struct _ur_device_handle_t : _pi_object { ZeCache> ZeDeviceCacheProperties; }; -// TODO: make it into a zer_device_handle_t class member +// TODO: make it into a ur_device_handle_t class member const std::pair -getRangeOfAllowedCopyEngines(const zer_device_handle_t &Device); +getRangeOfAllowedCopyEngines(const ur_device_handle_t &Device); class ZeUSMImportExtension { // Pointers to functions that import/release host memory into USM @@ -324,40 +325,10 @@ class ZeUSMImportExtension { ZeUSMImportExtension() : Enabled{false} {} - void setZeUSMImport(ur_platform_handle_t Platform) { - // Whether env var SYCL_USM_HOSTPTR_IMPORT has been set requesting - // host ptr import during buffer creation. - const char *USMHostPtrImportStr = std::getenv("SYCL_USM_HOSTPTR_IMPORT"); - if (!USMHostPtrImportStr || std::atoi(USMHostPtrImportStr) == 0) - return; - - // Check if USM hostptr import feature is available. - ze_driver_handle_t DriverHandle = Platform->ZeDriver; - if (ZE_CALL_NOCHECK(zeDriverGetExtensionFunctionAddress, - (DriverHandle, "zexDriverImportExternalPointer", - reinterpret_cast( - &zexDriverImportExternalPointer))) == 0) { - ZE_CALL_NOCHECK( - zeDriverGetExtensionFunctionAddress, - (DriverHandle, "zexDriverReleaseImportedPointer", - reinterpret_cast(&zexDriverReleaseImportedPointer))); - // Hostptr import/release is turned on because it has been requested - // by the env var, and this platform supports the APIs. - Enabled = true; - // Hostptr import is only possible if piMemBufferCreate receives a - // hostptr as an argument. The SYCL runtime passes a host ptr - // only when SYCL_HOST_UNIFIED_MEMORY is enabled. Therefore we turn it on. - setEnvVar("SYCL_HOST_UNIFIED_MEMORY", "1"); - } - } + void setZeUSMImport(_ur_platform_handle_t *Platform); void doZeUSMImport(ze_driver_handle_t DriverHandle, void *HostPtr, - size_t Size) { - ZE_CALL_NOCHECK(zexDriverImportExternalPointer, - (DriverHandle, HostPtr, Size)); - } - void doZeUSMRelease(ze_driver_handle_t DriverHandle, void *HostPtr) { - ZE_CALL_NOCHECK(zexDriverReleaseImportedPointer, (DriverHandle, HostPtr)); - } + size_t Size); + void doZeUSMRelease(ze_driver_handle_t DriverHandle, void *HostPtr); }; // Helper wrapper for working with USM import extension in Level Zero. diff --git a/sycl/plugins/unified_runtime/ur/ur.cpp b/sycl/plugins/unified_runtime/ur/ur.cpp index 692f26a7860d2..4997f72063d34 100644 --- a/sycl/plugins/unified_runtime/ur/ur.cpp +++ b/sycl/plugins/unified_runtime/ur/ur.cpp @@ -21,24 +21,24 @@ bool PrintTrace = [] { }(); // Apparatus for maintaining immutable cache of platforms. -std::vector *PiPlatformsCache = - new std::vector; +std::vector *PiPlatformsCache = + new std::vector; SpinLock *PiPlatformsCacheMutex = new SpinLock; bool PiPlatformCachePopulated = false; // Global variables for ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR -thread_local zer_result_t ErrorMessageCode = ZER_RESULT_SUCCESS; +thread_local ur_result_t ErrorMessageCode = UR_RESULT_SUCCESS; thread_local char ErrorMessage[MaxMessageSize]; // Utility function for setting a message and warning [[maybe_unused]] void setErrorMessage(const char *message, - zer_result_t error_code) { + ur_result_t error_code) { assert(strlen(message) <= MaxMessageSize); strcpy(ErrorMessage, message); ErrorMessageCode = error_code; } -zer_result_t zerPluginGetLastError(char **message) { +ur_result_t zerPluginGetLastError(char **message) { *message = &ErrorMessage[0]; return ErrorMessageCode; } diff --git a/sycl/plugins/unified_runtime/ur/ur.hpp b/sycl/plugins/unified_runtime/ur/ur.hpp index 2bafa14ba5898..91c598e43fe05 100644 --- a/sycl/plugins/unified_runtime/ur/ur.hpp +++ b/sycl/plugins/unified_runtime/ur/ur.hpp @@ -18,45 +18,47 @@ #include #include -#include +#include // TODO: promote all of the below extensions to the Unified Runtime // and get rid of these ZER_EXT constants. -const int ZER_EXT_DEVICE_INFO_END = ZER_DEVICE_INFO_FORCE_UINT32; -const int ZER_EXT_DEVICE_INFO_BUILD_ON_SUBDEVICE = ZER_EXT_DEVICE_INFO_END - 1; -const int ZER_EXT_DEVICE_INFO_MAX_WORK_GROUPS_3D = ZER_EXT_DEVICE_INFO_END - 2; -const int ZER_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = - ZER_EXT_DEVICE_INFO_END - 3; -const int ZER_EXT_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS = - ZER_EXT_DEVICE_INFO_END - 4; -const int ZER_EXT_DEVICE_INFO_MAX_MEM_BANDWIDTH = ZER_EXT_DEVICE_INFO_END - 6; -const int ZER_EXT_DEVICE_INFO_GPU_HW_THREADS_PER_EU = - ZER_EXT_DEVICE_INFO_END - 7; -const int ZER_EXT_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE = - ZER_EXT_DEVICE_INFO_END - 8; -const int ZER_EXT_DEVICE_INFO_GPU_SLICES = ZER_EXT_DEVICE_INFO_END - 9; -const int ZER_EXT_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES = - ZER_EXT_DEVICE_INFO_END - 10; -const int ZER_EXT_DEVICE_INFO_MEMORY_BUS_WIDTH = ZER_EXT_DEVICE_INFO_END - 11; -const int ZER_EXT_DEVICE_INFO_MEMORY_CLOCK_RATE = ZER_EXT_DEVICE_INFO_END - 12; -const int ZER_EXT_DEVICE_INFO_FREE_MEMORY = ZER_EXT_DEVICE_INFO_END - 13; -const int ZER_EXT_DEVICE_INFO_DEVICE_ID = ZER_EXT_DEVICE_INFO_END - 14; -const int ZER_EXT_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE = - ZER_DEVICE_INFO_IMAGE_MAX_ARRAR_SIZE; - -const int ZER_EXT_RESULT_END = 0x1000; -const zer_result_t ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR = - zer_result_t(ZER_EXT_RESULT_END - 1); - -const int ZER_EXT_USM_CAPS_ACCESS = 1 << 0; -const int ZER_EXT_USM_CAPS_ATOMIC_ACCESS = 1 << 1; -const int ZER_EXT_USM_CAPS_CONCURRENT_ACCESS = 1 << 2; -const int ZER_EXT_USM_CAPS_CONCURRENT_ATOMIC_ACCESS = 1 << 3; - -const zer_device_partition_property_flag_t - ZER_EXT_DEVICE_PARTITION_PROPERTY_FLAG_BY_CSLICE = - zer_device_partition_property_flag_t( - ZER_DEVICE_PARTITION_PROPERTY_FLAG_FORCE_UINT32 - 1); +const int UR_EXT_DEVICE_INFO_END = UR_DEVICE_INFO_FORCE_UINT32; +const int UR_EXT_DEVICE_INFO_BUILD_ON_SUBDEVICE = UR_EXT_DEVICE_INFO_END - 1; +const int UR_EXT_DEVICE_INFO_MAX_WORK_GROUPS_3D = UR_EXT_DEVICE_INFO_END - 2; +// const int UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = +// UR_EXT_DEVICE_INFO_END - 3; +// const int ZER_EXT_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS = +// UR_EXT_DEVICE_INFO_END - 4; +const int UR_EXT_DEVICE_INFO_MAX_MEM_BANDWIDTH = UR_EXT_DEVICE_INFO_END - 6; +const int UR_EXT_DEVICE_INFO_GPU_HW_THREADS_PER_EU = UR_EXT_DEVICE_INFO_END - 7; +const int UR_EXT_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE = + UR_EXT_DEVICE_INFO_END - 8; +const int UR_EXT_DEVICE_INFO_GPU_SLICES = UR_EXT_DEVICE_INFO_END - 9; +// const int UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES = +// UR_EXT_DEVICE_INFO_END - 10; +const int UR_EXT_DEVICE_INFO_MEMORY_BUS_WIDTH = UR_EXT_DEVICE_INFO_END - 11; +// const int ZER_EXT_DEVICE_INFO_MEMORY_CLOCK_RATE = UR_EXT_DEVICE_INFO_END - +// 12; +const int UR_EXT_DEVICE_INFO_FREE_MEMORY = UR_EXT_DEVICE_INFO_END - 13; +// const int ZER_EXT_DEVICE_INFO_DEVICE_ID = UR_EXT_DEVICE_INFO_END - 14; +// const int ZER_EXT_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE = +// UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE; + +const ur_device_info_t UR_EXT_DEVICE_INFO_OPENCL_C_VERSION = + (ur_device_info_t)0x103D; + +const int UR_EXT_RESULT_END = 0x1000; +const ur_result_t UR_EXT_RESULT_ADAPTER_SPECIFIC_ERROR = + ur_result_t(UR_EXT_RESULT_END - 1); + +const int UR_EXT_USM_CAPS_ACCESS = 1 << 0; +const int UR_EXT_USM_CAPS_ATOMIC_ACCESS = 1 << 1; +const int UR_EXT_USM_CAPS_CONCURRENT_ACCESS = 1 << 2; +const int UR_EXT_USM_CAPS_CONCURRENT_ATOMIC_ACCESS = 1 << 3; + +const ur_device_partition_property_t + UR_EXT_DEVICE_PARTITION_PROPERTY_FLAG_BY_CSLICE = + ur_device_partition_property_t(UR_DEVICE_PARTITION_FORCE_UINT32 - 1); // Terminates the process with a catastrophic error message. [[noreturn]] inline void die(const char *Message) { @@ -247,21 +249,21 @@ extern bool PrintTrace; // deallocate them automatically at the end of the main program. // The heap memory allocated for these global variables reclaimed only at // explicit tear-down. -extern std::vector *PiPlatformsCache; +extern std::vector *PiPlatformsCache; extern SpinLock *PiPlatformsCacheMutex; extern bool PiPlatformCachePopulated; // The getInfo*/ReturnHelper facilities provide shortcut way of // writing return bytes for the various getInfo APIs. template -zer_result_t getInfoImpl(size_t param_value_size, void *param_value, - size_t *param_value_size_ret, T value, - size_t value_size, Assign &&assign_func) { +ur_result_t getInfoImpl(size_t param_value_size, void *param_value, + size_t *param_value_size_ret, T value, + size_t value_size, Assign &&assign_func) { if (param_value != nullptr) { if (param_value_size < value_size) { - return ZER_RESULT_INVALID_VALUE; + return UR_RESULT_ERROR_INVALID_VALUE; } assign_func(param_value, value, value_size); @@ -271,12 +273,12 @@ zer_result_t getInfoImpl(size_t param_value_size, void *param_value, *param_value_size_ret = value_size; } - return ZER_RESULT_SUCCESS; + return UR_RESULT_SUCCESS; } template -zer_result_t getInfo(size_t param_value_size, void *param_value, - size_t *param_value_size_ret, T value) { +ur_result_t getInfo(size_t param_value_size, void *param_value, + size_t *param_value_size_ret, T value) { auto assignment = [](void *param_value, T value, size_t value_size) { (void)value_size; @@ -288,17 +290,17 @@ zer_result_t getInfo(size_t param_value_size, void *param_value, } template -zer_result_t getInfoArray(size_t array_length, size_t param_value_size, - void *param_value, size_t *param_value_size_ret, - const T *value) { +ur_result_t getInfoArray(size_t array_length, size_t param_value_size, + void *param_value, size_t *param_value_size_ret, + const T *value) { return getInfoImpl(param_value_size, param_value, param_value_size_ret, value, array_length * sizeof(T), memcpy); } template -zer_result_t getInfoArray(size_t array_length, size_t param_value_size, - void *param_value, size_t *param_value_size_ret, - const T *value) { +ur_result_t getInfoArray(size_t array_length, size_t param_value_size, + void *param_value, size_t *param_value_size_ret, + const T *value) { if (param_value) { memset(param_value, 0, param_value_size); for (uint32_t I = 0; I < array_length; I++) @@ -306,11 +308,11 @@ zer_result_t getInfoArray(size_t array_length, size_t param_value_size, } if (param_value_size_ret) *param_value_size_ret = array_length * sizeof(RetType); - return ZER_RESULT_SUCCESS; + return UR_RESULT_SUCCESS; } template <> -inline zer_result_t +inline ur_result_t getInfo(size_t param_value_size, void *param_value, size_t *param_value_size_ret, const char *value) { return getInfoArray(strlen(value) + 1, param_value_size, param_value, @@ -331,19 +333,19 @@ class UrReturnHelper { param_value_size_ret(param_value_size) {} // Scalar return value - template zer_result_t operator()(const T &t) { + template ur_result_t operator()(const T &t) { return getInfo(param_value_size, param_value, param_value_size_ret, t); } // Array return value - template zer_result_t operator()(const T *t, size_t s) { + template ur_result_t operator()(const T *t, size_t s) { return getInfoArray(s, param_value_size, param_value, param_value_size_ret, t); } // Array return value where element type is differrent from T template - zer_result_t operator()(const T *t, size_t s) { + ur_result_t operator()(const T *t, size_t s) { return getInfoArray(s, param_value_size, param_value, param_value_size_ret, t); } @@ -356,13 +358,9 @@ class UrReturnHelper { // Global variables for ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR constexpr size_t MaxMessageSize = 256; -extern thread_local zer_result_t ErrorMessageCode; +extern thread_local ur_result_t ErrorMessageCode; extern thread_local char ErrorMessage[MaxMessageSize]; // Utility function for setting a message and warning [[maybe_unused]] void setErrorMessage(const char *message, - zer_result_t error_code); - -// Returns plugin specific error and warning messages -// TODO: promote to Unified Runtime API -zer_result_t zerPluginGetLastError(char **message); + ur_result_t error_code); diff --git a/sycl/plugins/unified_runtime/ur_bindings.hpp b/sycl/plugins/unified_runtime/ur_bindings.hpp index 886848d896d56..8ba693d97c563 100755 --- a/sycl/plugins/unified_runtime/ur_bindings.hpp +++ b/sycl/plugins/unified_runtime/ur_bindings.hpp @@ -8,13 +8,13 @@ #pragma once #include -#include +#include // Make the Unified Runtime handles definition complete. // This is used in various "create" API where new handles are allocated. -struct _zer_platform_handle_t : public _ur_platform_handle_t { +struct ur_platform_handle_t_ : public _ur_platform_handle_t { using _ur_platform_handle_t::_ur_platform_handle_t; }; -struct _zer_device_handle_t : public _ur_device_handle_t { +struct ur_device_handle_t_ : public _ur_device_handle_t { using _ur_device_handle_t::_ur_device_handle_t; };