diff --git a/sycl/plugins/hip/CMakeLists.txt b/sycl/plugins/hip/CMakeLists.txt index 0b94e09460b9b..22bf6e1c578f3 100644 --- a/sycl/plugins/hip/CMakeLists.txt +++ b/sycl/plugins/hip/CMakeLists.txt @@ -84,44 +84,16 @@ endif() # Set includes used in added library (rocmdrv) set(HIP_HEADERS "${PI_HIP_INCLUDE_DIR};${PI_HIP_HSA_INCLUDE_DIR}") +# Get the HIP sources so they can be shared with HIP PI plugin +get_target_property(UR_HIP_ADAPTER_SOURCES ur_adapter_hip SOURCES) + # Create pi_hip library add_sycl_plugin(hip SOURCES # Some code is shared with the UR adapter "../unified_runtime/pi2ur.hpp" "../unified_runtime/pi2ur.cpp" - "../unified_runtime/ur/ur.hpp" - "../unified_runtime/ur/ur.cpp" - "../unified_runtime/ur/adapters/hip/adapter.cpp" - "../unified_runtime/ur/adapters/hip/adapter.hpp" - "../unified_runtime/ur/adapters/hip/command_buffer.cpp" - "../unified_runtime/ur/adapters/hip/command_buffer.hpp" - "../unified_runtime/ur/adapters/hip/common.cpp" - "../unified_runtime/ur/adapters/hip/common.hpp" - "../unified_runtime/ur/adapters/hip/context.cpp" - "../unified_runtime/ur/adapters/hip/context.hpp" - "../unified_runtime/ur/adapters/hip/device.cpp" - "../unified_runtime/ur/adapters/hip/device.hpp" - "../unified_runtime/ur/adapters/hip/enqueue.cpp" - "../unified_runtime/ur/adapters/hip/event.cpp" - "../unified_runtime/ur/adapters/hip/event.hpp" - "../unified_runtime/ur/adapters/hip/image.cpp" - "../unified_runtime/ur/adapters/hip/kernel.cpp" - "../unified_runtime/ur/adapters/hip/kernel.hpp" - "../unified_runtime/ur/adapters/hip/memory.cpp" - "../unified_runtime/ur/adapters/hip/memory.hpp" - "../unified_runtime/ur/adapters/hip/platform.cpp" - "../unified_runtime/ur/adapters/hip/platform.hpp" - "../unified_runtime/ur/adapters/hip/program.cpp" - "../unified_runtime/ur/adapters/hip/program.hpp" - "../unified_runtime/ur/adapters/hip/queue.cpp" - "../unified_runtime/ur/adapters/hip/queue.hpp" - "../unified_runtime/ur/adapters/hip/sampler.cpp" - "../unified_runtime/ur/adapters/hip/sampler.hpp" - "../unified_runtime/ur/adapters/hip/ur_interface_loader.cpp" - "../unified_runtime/ur/adapters/hip/usm.cpp" - "../unified_runtime/ur/adapters/hip/usm.hpp" - "../unified_runtime/ur/adapters/hip/usm_p2p.cpp" + ${UR_HIP_ADAPTER_SOURCES} "${sycl_inc_dir}/sycl/detail/pi.h" "${sycl_inc_dir}/sycl/detail/pi.hpp" "pi_hip.hpp" @@ -129,6 +101,7 @@ add_sycl_plugin(hip INCLUDE_DIRS ${sycl_plugin_dir} ${CMAKE_CURRENT_SOURCE_DIR}/../unified_runtime + ${UNIFIED_RUNTIME_SOURCE_DIR}/source/ # for adapters/hip LIBRARIES UnifiedRuntime-Headers UnifiedRuntimeCommon @@ -139,8 +112,6 @@ add_sycl_plugin(hip set_target_properties(pi_hip PROPERTIES LINKER_LANGUAGE CXX) if("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "AMD") - # Import HIP runtime library - add_library(rocmdrv SHARED IMPORTED GLOBAL) set_target_properties( rocmdrv PROPERTIES diff --git a/sycl/plugins/hip/pi_hip.hpp b/sycl/plugins/hip/pi_hip.hpp index 3ab21101228fe..018d069f5fe7f 100644 --- a/sycl/plugins/hip/pi_hip.hpp +++ b/sycl/plugins/hip/pi_hip.hpp @@ -39,16 +39,16 @@ #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include "pi2ur.hpp" diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index df3a160840325..b6b0c4f8abee7 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -4,7 +4,7 @@ if (NOT DEFINED UNIFIED_RUNTIME_LIBRARY OR NOT DEFINED UNIFIED_RUNTIME_INCLUDE_D include(FetchContent) set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - set(UNIFIED_RUNTIME_TAG b38855ed815ffd076bfde5e5e06170ca4f723dc1) + set(UNIFIED_RUNTIME_TAG e6343f4cca9a37b17bc63f3a81968ac3f486be8a) set(UR_BUILD_ADAPTER_L0 ON) @@ -12,6 +12,9 @@ if (NOT DEFINED UNIFIED_RUNTIME_LIBRARY OR NOT DEFINED UNIFIED_RUNTIME_INCLUDE_D set(UR_BUILD_ADAPTER_CUDA ON) endif() + if ("hip" IN_LIST SYCL_ENABLE_PLUGINS) + set(UR_BUILD_ADAPTER_HIP ON) + endif() set(UMF_ENABLE_POOL_TRACKING ON) message(STATUS "Will fetch Unified Runtime from ${UNIFIED_RUNTIME_REPO}") FetchContent_Declare(unified-runtime @@ -83,80 +86,14 @@ add_sycl_plugin(unified_runtime ) add_dependencies(sycl-runtime-libraries ur_adapter_level_zero) +if("hip" IN_LIST SYCL_ENABLE_PLUGINS) + add_dependencies(sycl-runtime-libraries ur_adapter_hip) +endif() if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS) add_dependencies(sycl-runtime-libraries ur_adapter_cuda) endif() -if ("hip" IN_LIST SYCL_ENABLE_PLUGINS) - # Build HIP adapter - add_sycl_library("ur_adapter_hip" SHARED - SOURCES - "ur/ur.hpp" - "ur/ur.cpp" - "ur/adapters/hip/adapter.cpp" - "ur/adapters/hip/adapter.hpp" - "ur/adapters/hip/command_buffer.cpp" - "ur/adapters/hip/command_buffer.hpp" - "ur/adapters/hip/common.cpp" - "ur/adapters/hip/common.hpp" - "ur/adapters/hip/context.cpp" - "ur/adapters/hip/context.hpp" - "ur/adapters/hip/device.cpp" - "ur/adapters/hip/device.hpp" - "ur/adapters/hip/enqueue.cpp" - "ur/adapters/hip/event.cpp" - "ur/adapters/hip/event.hpp" - "ur/adapters/hip/image.cpp" - "ur/adapters/hip/kernel.cpp" - "ur/adapters/hip/kernel.hpp" - "ur/adapters/hip/memory.cpp" - "ur/adapters/hip/memory.hpp" - "ur/adapters/hip/platform.cpp" - "ur/adapters/hip/platform.hpp" - "ur/adapters/hip/program.cpp" - "ur/adapters/hip/program.hpp" - "ur/adapters/hip/queue.cpp" - "ur/adapters/hip/queue.hpp" - "ur/adapters/hip/sampler.cpp" - "ur/adapters/hip/sampler.hpp" - "ur/adapters/hip/ur_interface_loader.cpp" - "ur/adapters/hip/usm.cpp" - "ur/adapters/hip/usm.hpp" - "ur/adapters/hip/usm_p2p.cpp" - INCLUDE_DIRS - ${sycl_inc_dir} - LIBRARIES - UnifiedRuntime-Headers - UnifiedRuntimeCommon - Threads::Threads - ) - - set_target_properties("ur_adapter_hip" PROPERTIES - VERSION "0.0.0" - SOVERSION "0" - ) - - if(UMF_ENABLE_POOL_TRACKING) - target_compile_definitions("ur_adapter_hip" PRIVATE - UMF_ENABLE_POOL_TRACKING) - else() - message(WARNING "HIP adapter USM pools are disabled, set UMF_ENABLE_POOL_TRACKING to enable them") - endif() - - if("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "AMD") - target_link_libraries(ur_adapter_hip PUBLIC rocmdrv) - # Set HIP define to select AMD platform - target_compile_definitions(ur_adapter_hip PRIVATE __HIP_PLATFORM_AMD__) - elseif("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "NVIDIA") - target_link_libraries(ur_adapter_hip PUBLIC cudadrv cudart) - # Set HIP define to select NVIDIA platform - target_compile_definitions(ur_adapter_hip PRIVATE __HIP_PLATFORM_NVIDIA__) - else() - message(FATAL_ERROR "Unspecified PI HIP platform please set SYCL_BUILD_PI_HIP_PLATFORM to 'AMD' or 'NVIDIA'") - endif() -endif() - if("native_cpu" IN_LIST SYCL_ENABLE_PLUGINS) add_sycl_library("ur_adapter_native_cpu" SHARED SOURCES diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/README.md b/sycl/plugins/unified_runtime/ur/adapters/hip/README.md new file mode 100644 index 0000000000000..fda2d3c337913 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/hip/README.md @@ -0,0 +1,7 @@ +# HIP adapter +The source for the HIP adapter has been moved to the +[adapters](https://github.com/oneapi-src/unified-runtime/tree/adapters) branch +of the [Unified Runtime](https://github.com/oneapi-src/unified-runtime/) repo. +Changes can be made by opening pull requests against that branch, and updating +the Unified Runtime commit in the parent +[CMakeLists.txt](../../../CMakeLists.txt). diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/adapter.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/adapter.cpp deleted file mode 100644 index 9854b5d96d3ae..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/adapter.cpp +++ /dev/null @@ -1,78 +0,0 @@ -//===--------- adapter.cpp - HIP Adapter ----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "adapter.hpp" -#include "common.hpp" - -#include -#include - -struct ur_adapter_handle_t_ { - std::atomic RefCount = 0; -}; - -ur_adapter_handle_t_ adapter{}; - -UR_APIEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t, - ur_loader_config_handle_t) { - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urTearDown(void *) { - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( - uint32_t, ur_adapter_handle_t *phAdapters, uint32_t *pNumAdapters) { - if (phAdapters) { - adapter.RefCount++; - *phAdapters = &adapter; - } - if (pNumAdapters) { - *pNumAdapters = 1; - } - - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { - // No state to clean up so we don't need to check for 0 references - adapter.RefCount--; - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { - adapter.RefCount++; - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError( - ur_adapter_handle_t, const char **ppMessage, int32_t *pError) { - *ppMessage = ErrorMessage; - *pError = ErrorMessageCode; - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, - ur_adapter_info_t propName, - size_t propSize, - void *pPropValue, - size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - - switch (propName) { - case UR_ADAPTER_INFO_BACKEND: - return ReturnValue(UR_ADAPTER_BACKEND_HIP); - case UR_ADAPTER_INFO_REFERENCE_COUNT: - return ReturnValue(adapter.RefCount.load()); - default: - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - - return UR_RESULT_SUCCESS; -} diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/adapter.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/adapter.hpp deleted file mode 100644 index a1892eb36fc65..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/adapter.hpp +++ /dev/null @@ -1,11 +0,0 @@ -//===--------- adapter.hpp - HIP Adapter ----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -struct ur_adapter_handle_t_; - -extern ur_adapter_handle_t_ adapter; diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/command_buffer.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/command_buffer.cpp deleted file mode 100644 index 305664ea13286..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/command_buffer.cpp +++ /dev/null @@ -1,129 +0,0 @@ -//===--------- command_buffer.cpp - HIP Adapter ---------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "command_buffer.hpp" -#include "common.hpp" - -/// Stub implementations of UR experimental feature command-buffers - -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t, ur_device_handle_t, - const ur_exp_command_buffer_desc_t *, ur_exp_command_buffer_handle_t *) { - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for HIP adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferRetainExp(ur_exp_command_buffer_handle_t) { - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for HIP adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t) { - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for HIP adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t) { - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for HIP adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( - ur_exp_command_buffer_handle_t, ur_kernel_handle_t, uint32_t, - const size_t *, const size_t *, const size_t *, uint32_t, - const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for HIP adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( - ur_exp_command_buffer_handle_t, void *, const void *, size_t, uint32_t, - const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for HIP adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( - ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, size_t, - size_t, size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for HIP adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( - ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, - ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, size_t, size_t, - size_t, size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for HIP adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( - ur_exp_command_buffer_handle_t, ur_mem_handle_t, size_t, size_t, - const void *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for HIP adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( - ur_exp_command_buffer_handle_t, ur_mem_handle_t, size_t, size_t, void *, - uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for HIP adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( - ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, - ur_rect_offset_t, ur_rect_region_t, size_t, size_t, size_t, size_t, void *, - uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for HIP adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( - ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, - ur_rect_offset_t, ur_rect_region_t, size_t, size_t, size_t, size_t, void *, - uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for HIP adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( - ur_exp_command_buffer_handle_t, ur_queue_handle_t, uint32_t, - const ur_event_handle_t *, ur_event_handle_t *) { - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for HIP adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/command_buffer.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/command_buffer.hpp deleted file mode 100644 index 004be3cb8996e..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/command_buffer.hpp +++ /dev/null @@ -1,13 +0,0 @@ -//===--------- command_buffer.hpp - HIP Adapter ---------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -/// Stub implementation of command-buffers for HIP - -struct ur_exp_command_buffer_handle_t_ {}; diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/common.cpp deleted file mode 100644 index 9bcf9e8a5ceda..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/common.cpp +++ /dev/null @@ -1,125 +0,0 @@ -//===--------- common.cpp - HIP Adapter -----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#include "common.hpp" - -#include - -ur_result_t mapErrorUR(hipError_t Result) { - switch (Result) { - case hipSuccess: - return UR_RESULT_SUCCESS; - case hipErrorInvalidContext: - return UR_RESULT_ERROR_INVALID_CONTEXT; - case hipErrorInvalidDevice: - return UR_RESULT_ERROR_INVALID_DEVICE; - case hipErrorInvalidValue: - return UR_RESULT_ERROR_INVALID_VALUE; - case hipErrorOutOfMemory: - return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - case hipErrorLaunchOutOfResources: - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - default: - return UR_RESULT_ERROR_UNKNOWN; - } -} - -void checkErrorUR(hipError_t Result, const char *Function, int Line, - const char *File) { - if (Result == hipSuccess) { - return; - } - - if (std::getenv("SYCL_PI_SUPPRESS_ERROR_MESSAGE") == nullptr || - std::getenv("UR_SUPPRESS_ERROR_MESSAGE") == nullptr) { - const char *ErrorString = nullptr; - const char *ErrorName = nullptr; - ErrorName = hipGetErrorName(Result); - ErrorString = hipGetErrorString(Result); - std::cerr << "\nUR HIP ERROR:" - << "\n\tValue: " << Result - << "\n\tName: " << ErrorName - << "\n\tDescription: " << ErrorString - << "\n\tFunction: " << Function - << "\n\tSource Location: " << File << ":" << Line << "\n\n"; - } - - if (std::getenv("PI_HIP_ABORT") != nullptr || - std::getenv("UR_HIP_ABORT") != nullptr) { - std::abort(); - } - - throw mapErrorUR(Result); -} - -void checkErrorUR(ur_result_t Result, const char *Function, int Line, - const char *File) { - if (Result == UR_RESULT_SUCCESS) { - return; - } - - if (std::getenv("SYCL_PI_SUPPRESS_ERROR_MESSAGE") == nullptr || - std::getenv("UR_SUPPRESS_ERROR_MESSAGE") == nullptr) { - std::cerr << "\nUR HIP ERROR:" - << "\n\tValue: " << Result - << "\n\tFunction: " << Function - << "\n\tSource Location: " << File << ":" << Line << "\n\n"; - } - - if (std::getenv("PI_HIP_ABORT") != nullptr || - std::getenv("UR_HIP_ABORT") != nullptr) { - std::abort(); - } - - throw Result; -} - -hipError_t getHipVersionString(std::string &Version) { - int DriverVersion = 0; - auto Result = hipDriverGetVersion(&DriverVersion); - if (Result != hipSuccess) { - return Result; - } - // The version is returned as (1000 major + 10 minor). - std::stringstream Stream; - Stream << "HIP " << DriverVersion / 1000 << "." << DriverVersion % 1000 / 10; - Version = Stream.str(); - return Result; -} - -void detail::ur::die(const char *pMessage) { - std::cerr << "ur_die: " << pMessage << '\n'; - std::terminate(); -} - -void detail::ur::assertion(bool Condition, const char *pMessage) { - if (!Condition) - die(pMessage); -} - -void detail::ur::hipPrint(const char *pMessage) { - std::cerr << "ur_print: " << pMessage << '\n'; -} - -// Global variables for UR_RESULT_ADAPTER_SPECIFIC_ERROR -thread_local ur_result_t ErrorMessageCode = UR_RESULT_SUCCESS; -thread_local char ErrorMessage[MaxMessageSize]; - -// Utility function for setting a message and warning -[[maybe_unused]] void setErrorMessage(const char *pMessage, - ur_result_t ErrorCode) { - assert(strlen(pMessage) < MaxMessageSize); - strncpy(ErrorMessage, pMessage, MaxMessageSize - 1); - ErrorMessageCode = ErrorCode; -} - -// Returns plugin specific error and warning messages; common implementation -// that can be shared between adapters -ur_result_t urGetLastResult(ur_platform_handle_t, const char **ppMessage) { - *ppMessage = &ErrorMessage[0]; - return ErrorMessageCode; -} diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/common.hpp deleted file mode 100644 index 2083c4674ba41..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/common.hpp +++ /dev/null @@ -1,178 +0,0 @@ -//===--------- common.hpp - HIP Adapter -----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -#include -#include - -// Hipify doesn't support cuArrayGetDescriptor, on AMD the hipArray can just be -// indexed, but on NVidia it is an opaque type and needs to go through -// cuArrayGetDescriptor so implement a utility function to get the array -// properties -inline void getArrayDesc(hipArray *Array, hipArray_Format &Format, - size_t &Channels) { -#if defined(__HIP_PLATFORM_AMD__) - Format = Array->Format; - Channels = Array->NumChannels; -#elif defined(__HIP_PLATFORM_NVIDIA__) - CUDA_ARRAY_DESCRIPTOR ArrayDesc; - cuArrayGetDescriptor(&ArrayDesc, (CUarray)Array); - - Format = ArrayDesc.Format; - Channels = ArrayDesc.NumChannels; -#else -#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); -#endif -} - -// HIP on NVIDIA headers guard hipArray3DCreate behind __CUDACC__, this does not -// seem to be required and we're not using nvcc to build the UR HIP adapter so -// add the translation function here -#if defined(__HIP_PLATFORM_NVIDIA__) && !defined(__CUDACC__) -inline static hipError_t -hipArray3DCreate(hiparray *pHandle, - const HIP_ARRAY3D_DESCRIPTOR *pAllocateArray) { - return hipCUResultTohipError(cuArray3DCreate(pHandle, pAllocateArray)); -} -#endif - -// hipArray gets turned into cudaArray when using the HIP NVIDIA platform, and -// some CUDA APIs use cudaArray* and others use CUarray, these two represent the -// same type, however when building cudaArray appears as an opaque type, so it -// needs to be explicitly casted to CUarray. In order for this to work for both -// AMD and NVidia we introduce an second hipArray type that will be CUarray for -// NVIDIA and hipArray* for AMD so that we can place the explicit casts when -// necessary for NVIDIA and they will be no-ops for AMD. -#if defined(__HIP_PLATFORM_NVIDIA__) -typedef CUarray hipCUarray; -#elif defined(__HIP_PLATFORM_AMD__) -typedef hipArray *hipCUarray; -#else -#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); -#endif - -// Add missing HIP to CUDA defines -#if defined(__HIP_PLATFORM_NVIDIA__) -#define hipMemoryType CUmemorytype -#define hipMemoryTypeHost CU_MEMORYTYPE_HOST -#define hipMemoryTypeDevice CU_MEMORYTYPE_DEVICE -#define hipMemoryTypeArray CU_MEMORYTYPE_ARRAY -#define hipMemoryTypeUnified CU_MEMORYTYPE_UNIFIED -#endif - -ur_result_t mapErrorUR(hipError_t Result); - -void checkErrorUR(hipError_t Result, const char *Function, int Line, - const char *File); -void checkErrorUR(ur_result_t Result, const char *Function, int Line, - const char *File); - -#define UR_CHECK_ERROR(result) \ - checkErrorUR(result, __func__, __LINE__, __FILE__) - -hipError_t getHipVersionString(std::string &Version); - -constexpr size_t MaxMessageSize = 256; -extern thread_local ur_result_t ErrorMessageCode; -extern thread_local char ErrorMessage[MaxMessageSize]; - -// Utility function for setting a message and warning -[[maybe_unused]] void setErrorMessage(const char *Message, - ur_result_t ErrorCode); - -/// ------ Error handling, matching OpenCL plugin semantics. -namespace detail { -namespace ur { - -// Report error and no return (keeps compiler from printing warnings). -// TODO: Probably change that to throw a catchable exception, -// but for now it is useful to see every failure. -// -[[noreturn]] void die(const char *pMessage); - -// Reports error messages -void hipPrint(const char *pMessage); - -void assertion(bool Condition, const char *pMessage = nullptr); - -} // namespace ur -} // namespace detail - -/// RAII object that calls the reference count release function on the held UR -/// object on destruction. -/// -/// The `dismiss` function stops the release from happening on destruction. -template class ReleaseGuard { -private: - T Captive; - - static ur_result_t callRelease(ur_device_handle_t Captive) { - return urDeviceRelease(Captive); - } - - static ur_result_t callRelease(ur_context_handle_t Captive) { - return urContextRelease(Captive); - } - - static ur_result_t callRelease(ur_mem_handle_t Captive) { - return urMemRelease(Captive); - } - - static ur_result_t callRelease(ur_program_handle_t Captive) { - return urProgramRelease(Captive); - } - - static ur_result_t callRelease(ur_kernel_handle_t Captive) { - return urKernelRelease(Captive); - } - - static ur_result_t callRelease(ur_queue_handle_t Captive) { - return urQueueRelease(Captive); - } - - static ur_result_t callRelease(ur_event_handle_t Captive) { - return urEventRelease(Captive); - } - -public: - ReleaseGuard() = delete; - /// Obj can be `nullptr`. - explicit ReleaseGuard(T Obj) : Captive(Obj) {} - ReleaseGuard(ReleaseGuard &&Other) noexcept : Captive(Other.Captive) { - Other.Captive = nullptr; - } - - ReleaseGuard(const ReleaseGuard &) = delete; - - /// Calls the related UR object release function if the object held is not - /// `nullptr` or if `dismiss` has not been called. - ~ReleaseGuard() { - if (Captive != nullptr) { - ur_result_t ret = callRelease(Captive); - if (ret != UR_RESULT_SUCCESS) { - // A reported HIP error is either an implementation or an asynchronous - // HIP error for which it is unclear if the function that reported it - // succeeded or not. Either way, the state of the program is compromised - // and likely unrecoverable. - detail::ur::die("Unrecoverable program state reached in piMemRelease"); - } - } - } - - ReleaseGuard &operator=(const ReleaseGuard &) = delete; - - ReleaseGuard &operator=(ReleaseGuard &&Other) { - Captive = Other.Captive; - Other.Captive = nullptr; - return *this; - } - - /// End the guard and do not release the reference count of the held - /// UR object. - void dismiss() { Captive = nullptr; } -}; diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/context.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/context.cpp deleted file mode 100644 index 17a752e7a91f9..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/context.cpp +++ /dev/null @@ -1,140 +0,0 @@ -//===--------- context.cpp - HIP Adapter ----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "context.hpp" -#include "usm.hpp" - -void ur_context_handle_t_::addPool(ur_usm_pool_handle_t Pool) { - std::lock_guard Lock(Mutex); - PoolHandles.insert(Pool); -} - -void ur_context_handle_t_::removePool(ur_usm_pool_handle_t Pool) { - std::lock_guard Lock(Mutex); - PoolHandles.erase(Pool); -} - -ur_usm_pool_handle_t -ur_context_handle_t_::getOwningURPool(umf_memory_pool_t *UMFPool) { - std::lock_guard Lock(Mutex); - for (auto &Pool : PoolHandles) { - if (Pool->hasUMFPool(UMFPool)) { - return Pool; - } - } - return nullptr; -} - -/// Create a UR HIP context. -/// -/// By default creates a scoped context and keeps the last active HIP context -/// on top of the HIP context stack. -/// -UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( - uint32_t DeviceCount, const ur_device_handle_t *phDevices, - const ur_context_properties_t *, ur_context_handle_t *phContext) { - std::ignore = DeviceCount; - assert(DeviceCount == 1); - ur_result_t RetErr = UR_RESULT_SUCCESS; - - std::unique_ptr ContextPtr{nullptr}; - try { - // Create a scoped context. - ContextPtr = std::unique_ptr( - new ur_context_handle_t_{*phDevices}); - - static std::once_flag InitFlag; - std::call_once( - InitFlag, - [](ur_result_t &) { - // Use default stream to record base event counter - UR_CHECK_ERROR(hipEventCreateWithFlags(&ur_platform_handle_t_::EvBase, - hipEventDefault)); - UR_CHECK_ERROR(hipEventRecord(ur_platform_handle_t_::EvBase, 0)); - }, - RetErr); - - *phContext = ContextPtr.release(); - } catch (ur_result_t Err) { - RetErr = Err; - } catch (...) { - RetErr = UR_RESULT_ERROR_OUT_OF_RESOURCES; - } - return RetErr; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, - size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - - switch (uint32_t{propName}) { - case UR_CONTEXT_INFO_NUM_DEVICES: - return ReturnValue(1); - case UR_CONTEXT_INFO_DEVICES: - return ReturnValue(hContext->getDevice()); - case UR_CONTEXT_INFO_REFERENCE_COUNT: - return ReturnValue(hContext->getReferenceCount()); - case UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: - case UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: - case UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: - case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { - // These queries should be dealt with in context_impl.cpp by calling the - // queries of each device separately and building the intersection set. - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - case UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: - // 2D USM memcpy is supported. - return ReturnValue(true); - case UR_CONTEXT_INFO_USM_FILL2D_SUPPORT: - // 2D USM operations currently not supported. - return ReturnValue(false); - - default: - break; - } - - return UR_RESULT_ERROR_INVALID_ENUMERATION; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urContextRelease(ur_context_handle_t hContext) { - if (hContext->decrementReferenceCount() == 0) { - delete hContext; - } - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urContextRetain(ur_context_handle_t hContext) { - assert(hContext->getReferenceCount() > 0); - - hContext->incrementReferenceCount(); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( - ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { - *phNativeContext = reinterpret_cast( - hContext->getDevice()->getNativeContext()); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( - ur_native_handle_t, uint32_t, const ur_device_handle_t *, - const ur_context_native_properties_t *, ur_context_handle_t *) { - return UR_RESULT_ERROR_INVALID_OPERATION; -} - -UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( - ur_context_handle_t hContext, ur_context_extended_deleter_t pfnDeleter, - void *pUserData) { - hContext->setExtendedDeleter(pfnDeleter, pUserData); - return UR_RESULT_SUCCESS; -} diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/context.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/context.hpp deleted file mode 100644 index be4607b154099..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/context.hpp +++ /dev/null @@ -1,208 +0,0 @@ -//===--------- context.hpp - HIP Adapter ----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -#include -#include - -#include "common.hpp" -#include "device.hpp" -#include "platform.hpp" - -#include - -typedef void (*ur_context_extended_deleter_t)(void *UserData); - -/// UR context mapping to a HIP context object. -/// -/// There is no direct mapping between a HIP context and a UR context. -/// The main differences are described below: -/// -/// HIP context vs UR context -/// -/// One of the main differences between the UR API and the HIP driver API is -/// that the second modifies the state of the threads by assigning -/// `hipCtx_t` objects to threads. `hipCtx_t` objects store data associated -/// with a given device and control access to said device from the user side. -/// UR API context are objects that are passed to functions, and not bound -/// to threads. -/// The ur_context_handle_t_ object doesn't implement this behavior. It only -/// holds the HIP context data. The RAII object \ref ScopedContext implements -/// the active context behavior. -/// -/// Primary vs UserDefined context -/// -/// HIP has two different types of context, the Primary context, -/// which is usable by all threads on a given process for a given device, and -/// the aforementioned custom contexts. -/// The HIP documentation, and performance analysis, suggest using the Primary -/// context whenever possible. The Primary context is also used by the HIP -/// Runtime API. For UR applications to interop with HIP Runtime API, they have -/// to use the primary context - and make that active in the thread. The -/// `ur_context_handle_t_` object can be constructed with a `kind` parameter -/// that allows to construct a Primary or `UserDefined` context, so that -/// the UR object interface is always the same. -/// -/// Destructor callback -/// -/// Required to implement CP023, SYCL Extended Context Destruction, -/// the UR Context can store a number of callback functions that will be -/// called upon destruction of the UR Context. -/// See proposal for details. -/// https://github.com/codeplaysoftware/standards-proposals/blob/master/extended-context-destruction/index.md -/// -struct ur_context_handle_t_ { - - struct deleter_data { - ur_context_extended_deleter_t Function; - void *UserData; - - void operator()() { Function(UserData); } - }; - - using native_type = hipCtx_t; - - ur_device_handle_t DeviceId; - std::atomic_uint32_t RefCount; - - ur_context_handle_t_(ur_device_handle_t DevId) - : DeviceId{DevId}, RefCount{1} { - urDeviceRetain(DeviceId); - }; - - ~ur_context_handle_t_() { urDeviceRelease(DeviceId); } - - void invokeExtendedDeleters() { - std::lock_guard Guard(Mutex); - for (auto &Deleter : ExtendedDeleters) { - Deleter(); - } - } - - void setExtendedDeleter(ur_context_extended_deleter_t Function, - void *UserData) { - std::lock_guard Guard(Mutex); - ExtendedDeleters.emplace_back(deleter_data{Function, UserData}); - } - - ur_device_handle_t getDevice() const noexcept { return DeviceId; } - - uint32_t incrementReferenceCount() noexcept { return ++RefCount; } - - uint32_t decrementReferenceCount() noexcept { return --RefCount; } - - uint32_t getReferenceCount() const noexcept { return RefCount; } - - void addPool(ur_usm_pool_handle_t Pool); - - void removePool(ur_usm_pool_handle_t Pool); - - ur_usm_pool_handle_t getOwningURPool(umf_memory_pool_t *UMFPool); - - /// We need to keep track of USM mappings in AMD HIP, as certain extra - /// synchronization *is* actually required for correctness. - /// During kernel enqueue we must dispatch a prefetch for each kernel argument - /// that points to a USM mapping to ensure the mapping is correctly - /// populated on the device (https://github.com/intel/llvm/issues/7252). Thus, - /// we keep track of mappings in the context, and then check against them just - /// before the kernel is launched. The stream against which the kernel is - /// launched is not known until enqueue time, but the USM mappings can happen - /// at any time. Thus, they are tracked on the context used for the urUSM* - /// mapping. - /// - /// The three utility function are simple wrappers around a mapping from a - /// pointer to a size. - void addUSMMapping(void *Ptr, size_t Size) { - std::lock_guard Guard(Mutex); - assert(USMMappings.find(Ptr) == USMMappings.end() && - "mapping already exists"); - USMMappings[Ptr] = Size; - } - - void removeUSMMapping(const void *Ptr) { - std::lock_guard guard(Mutex); - auto It = USMMappings.find(Ptr); - if (It != USMMappings.end()) - USMMappings.erase(It); - } - - std::pair getUSMMapping(const void *Ptr) { - std::lock_guard Guard(Mutex); - auto It = USMMappings.find(Ptr); - // The simple case is the fast case... - if (It != USMMappings.end()) - return *It; - - // ... but in the failure case we have to fall back to a full scan to search - // for "offset" pointers in case the user passes in the middle of an - // allocation. We have to do some not-so-ordained-by-the-standard ordered - // comparisons of pointers here, but it'll work on all platforms we support. - uintptr_t PtrVal = (uintptr_t)Ptr; - for (std::pair Pair : USMMappings) { - uintptr_t BaseAddr = (uintptr_t)Pair.first; - uintptr_t EndAddr = BaseAddr + Pair.second; - if (PtrVal > BaseAddr && PtrVal < EndAddr) { - // If we've found something now, offset *must* be nonzero - assert(Pair.second); - return Pair; - } - } - return {nullptr, 0}; - } - -private: - std::mutex Mutex; - std::vector ExtendedDeleters; - std::unordered_map USMMappings; - std::set PoolHandles; -}; - -namespace { -/// RAII type to guarantee recovering original HIP context -/// Scoped context is used across all UR HIP plugin implementation -/// to activate the UR Context on the current thread, matching the -/// HIP driver semantics where the context used for the HIP Driver -/// API is the one active on the thread. -/// The implementation tries to avoid replacing the hipCtx_t if it cans -class ScopedContext { - hipCtx_t Original; - bool NeedToRecover; - -public: - ScopedContext(ur_device_handle_t hDevice) : NeedToRecover{false} { - - if (!hDevice) { - throw UR_RESULT_ERROR_INVALID_DEVICE; - } - - // FIXME when multi device context are supported in HIP adapter - hipCtx_t Desired = hDevice->getNativeContext(); - UR_CHECK_ERROR(hipCtxGetCurrent(&Original)); - if (Original != Desired) { - // Sets the desired context as the active one for the thread - UR_CHECK_ERROR(hipCtxSetCurrent(Desired)); - if (Original == nullptr) { - // No context is installed on the current thread - // This is the most common case. We can activate the context in the - // thread and leave it there until all the UR context referring to the - // same underlying HIP context are destroyed. This emulates - // the behaviour of the HIP runtime api, and avoids costly context - // switches. No action is required on this side of the if. - } else { - NeedToRecover = true; - } - } - } - - ~ScopedContext() { - if (NeedToRecover) { - UR_CHECK_ERROR(hipCtxSetCurrent(Original)); - } - } -}; -} // namespace diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/device.cpp deleted file mode 100644 index 348123f29d18a..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/device.cpp +++ /dev/null @@ -1,957 +0,0 @@ -//===--------- device.cpp - HIP Adapter -----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "device.hpp" -#include "context.hpp" -#include "event.hpp" - -#include - -int getAttribute(ur_device_handle_t Device, hipDeviceAttribute_t Attribute) { - int Value; - UR_CHECK_ERROR(hipDeviceGetAttribute(&Value, Attribute, Device->get())); - return Value; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, - ur_device_info_t propName, - size_t propSize, - void *pPropValue, - size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - - static constexpr uint32_t MaxWorkItemDimensions = 3u; - - switch ((uint32_t)propName) { - case UR_DEVICE_INFO_TYPE: { - return ReturnValue(UR_DEVICE_TYPE_GPU); - } - case UR_DEVICE_INFO_VENDOR_ID: { -#if defined(__HIP_PLATFORM_AMD__) - uint32_t VendorId = 4098u; -#elif defined(__HIP_PLATFORM_NVIDIA__) - uint32_t VendorId = 4318u; -#else - uint32_t VendorId = 0u; -#endif - return ReturnValue(VendorId); - } - case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: { - int ComputeUnits = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &ComputeUnits, hipDeviceAttributeMultiprocessorCount, hDevice->get())); - detail::ur::assertion(ComputeUnits >= 0); - return ReturnValue(static_cast(ComputeUnits)); - } - case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: { - return ReturnValue(MaxWorkItemDimensions); - } - case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: { - struct { - size_t sizes[MaxWorkItemDimensions]; - } return_sizes; - - int MaxX = 0, MaxY = 0, MaxZ = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute(&MaxX, hipDeviceAttributeMaxBlockDimX, - hDevice->get())); - detail::ur::assertion(MaxX >= 0); - - UR_CHECK_ERROR(hipDeviceGetAttribute(&MaxY, hipDeviceAttributeMaxBlockDimY, - hDevice->get())); - detail::ur::assertion(MaxY >= 0); - - UR_CHECK_ERROR(hipDeviceGetAttribute(&MaxZ, hipDeviceAttributeMaxBlockDimZ, - hDevice->get())); - detail::ur::assertion(MaxZ >= 0); - - return_sizes.sizes[0] = size_t(MaxX); - return_sizes.sizes[1] = size_t(MaxY); - return_sizes.sizes[2] = size_t(MaxZ); - return ReturnValue(return_sizes); - } - - case UR_DEVICE_INFO_MAX_WORK_GROUPS_3D: { - struct { - size_t sizes[MaxWorkItemDimensions]; - } return_sizes; - - int MaxX = 0, MaxY = 0, MaxZ = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute(&MaxX, hipDeviceAttributeMaxGridDimX, - hDevice->get())); - detail::ur::assertion(MaxX >= 0); - - UR_CHECK_ERROR(hipDeviceGetAttribute(&MaxY, hipDeviceAttributeMaxGridDimY, - hDevice->get())); - detail::ur::assertion(MaxY >= 0); - - UR_CHECK_ERROR(hipDeviceGetAttribute(&MaxZ, hipDeviceAttributeMaxGridDimZ, - hDevice->get())); - detail::ur::assertion(MaxZ >= 0); - - return_sizes.sizes[0] = size_t(MaxX); - return_sizes.sizes[1] = size_t(MaxY); - return_sizes.sizes[2] = size_t(MaxZ); - return ReturnValue(return_sizes); - } - - case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE: { - int MaxWorkGroupSize = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute(&MaxWorkGroupSize, - hipDeviceAttributeMaxThreadsPerBlock, - hDevice->get())); - - detail::ur::assertion(MaxWorkGroupSize >= 0); - - return ReturnValue(size_t(MaxWorkGroupSize)); - } - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: { - return ReturnValue(1u); - } - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT: { - return ReturnValue(1u); - } - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT: { - return ReturnValue(1u); - } - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG: { - return ReturnValue(1u); - } - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT: { - return ReturnValue(1u); - } - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: { - return ReturnValue(1u); - } - case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: { - return ReturnValue(0u); - } - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: { - return ReturnValue(1u); - } - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT: { - return ReturnValue(1u); - } - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT: { - return ReturnValue(1u); - } - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG: { - return ReturnValue(1u); - } - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT: { - return ReturnValue(1u); - } - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: { - return ReturnValue(1u); - } - case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: { - return ReturnValue(0u); - } - case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: { - // Number of sub-groups = max block size / warp size + possible remainder - int MaxThreads = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &MaxThreads, hipDeviceAttributeMaxThreadsPerBlock, hDevice->get())); - int WarpSize = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute(&WarpSize, hipDeviceAttributeWarpSize, - hDevice->get())); - int MaxWarps = (MaxThreads + WarpSize - 1) / WarpSize; - return ReturnValue(MaxWarps); - } - case UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: { - // Volta provides independent thread scheduling - // TODO: Revisit for previous generation GPUs - int Major = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &Major, hipDeviceAttributeComputeCapabilityMajor, hDevice->get())); - bool IFP = (Major >= 7); - return ReturnValue(IFP); - } - case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: { - int WarpSize = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute(&WarpSize, hipDeviceAttributeWarpSize, - hDevice->get())); - size_t Sizes[1] = {static_cast(WarpSize)}; - return ReturnValue(Sizes, 1); - } - case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: { - int ClockFreq = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &ClockFreq, hipDeviceAttributeClockRate, hDevice->get())); - detail::ur::assertion(ClockFreq >= 0); - return ReturnValue(static_cast(ClockFreq) / 1000u); - } - case UR_DEVICE_INFO_ADDRESS_BITS: { - auto Bits = uint32_t{std::numeric_limits::digits}; - return ReturnValue(Bits); - } - case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: { - // Max size of memory object allocation in bytes. - // The minimum value is max(min(1024 × 1024 × - // 1024, 1/4th of CL_DEVICE_GLOBAL_MEM_SIZE), - // 32 × 1024 × 1024) for devices that are not of type - // CL_DEVICE_TYPE_CUSTOM. - - size_t Global = 0; - detail::ur::assertion(hipDeviceTotalMem(&Global, hDevice->get()) == - hipSuccess); - - auto QuarterGlobal = static_cast(Global / 4u); - - auto MaxAlloc = std::max(std::min(1024u * 1024u * 1024u, QuarterGlobal), - 32u * 1024u * 1024u); - - return ReturnValue(uint64_t{MaxAlloc}); - } - case UR_DEVICE_INFO_IMAGE_SUPPORTED: { - return ReturnValue(uint32_t{true}); - } - case UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS: { - // This call doesn't match to HIP as it doesn't have images, but instead - // surfaces and textures. No clear call in the HIP API to determine this, - // but some searching found as of SM 2.x 128 are supported. - return ReturnValue(128u); - } - case UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: { - // This call doesn't match to HIP as it doesn't have images, but instead - // surfaces and textures. No clear call in the HIP API to determine this, - // but some searching found as of SM 2.x 128 are supported. - return ReturnValue(128u); - } - case UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: { - // Take the smaller of maximum surface and maximum texture height. - int TexHeight = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &TexHeight, hipDeviceAttributeMaxTexture2DHeight, hDevice->get())); - detail::ur::assertion(TexHeight >= 0); - int SurfHeight = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &SurfHeight, hipDeviceAttributeMaxTexture2DHeight, hDevice->get())); - detail::ur::assertion(SurfHeight >= 0); - - int Min = std::min(TexHeight, SurfHeight); - - return ReturnValue(static_cast(Min)); - } - case UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH: { - // Take the smaller of maximum surface and maximum texture width. - int TexWidth = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &TexWidth, hipDeviceAttributeMaxTexture2DWidth, hDevice->get())); - detail::ur::assertion(TexWidth >= 0); - int SurfWidth = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &SurfWidth, hipDeviceAttributeMaxTexture2DWidth, hDevice->get())); - detail::ur::assertion(SurfWidth >= 0); - - int Min = std::min(TexWidth, SurfWidth); - - return ReturnValue(static_cast(Min)); - } - case UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: { - // Take the smaller of maximum surface and maximum texture height. - int TexHeight = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &TexHeight, hipDeviceAttributeMaxTexture3DHeight, hDevice->get())); - detail::ur::assertion(TexHeight >= 0); - int SurfHeight = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &SurfHeight, hipDeviceAttributeMaxTexture3DHeight, hDevice->get())); - detail::ur::assertion(SurfHeight >= 0); - - int Min = std::min(TexHeight, SurfHeight); - - return ReturnValue(static_cast(Min)); - } - case UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH: { - // Take the smaller of maximum surface and maximum texture width. - int TexWidth = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &TexWidth, hipDeviceAttributeMaxTexture3DWidth, hDevice->get())); - detail::ur::assertion(TexWidth >= 0); - int SurfWidth = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &SurfWidth, hipDeviceAttributeMaxTexture3DWidth, hDevice->get())); - detail::ur::assertion(SurfWidth >= 0); - - int Min = std::min(TexWidth, SurfWidth); - - return ReturnValue(static_cast(Min)); - } - case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH: { - // Take the smaller of maximum surface and maximum texture depth. - int TexDepth = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &TexDepth, hipDeviceAttributeMaxTexture3DDepth, hDevice->get())); - detail::ur::assertion(TexDepth >= 0); - int SurfDepth = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &SurfDepth, hipDeviceAttributeMaxTexture3DDepth, hDevice->get())); - detail::ur::assertion(SurfDepth >= 0); - - int Min = std::min(TexDepth, SurfDepth); - - return ReturnValue(static_cast(Min)); - } - case UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: { - // Take the smaller of maximum surface and maximum texture width. - int TexWidth = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &TexWidth, hipDeviceAttributeMaxTexture1DWidth, hDevice->get())); - detail::ur::assertion(TexWidth >= 0); - int SurfWidth = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &SurfWidth, hipDeviceAttributeMaxTexture1DWidth, hDevice->get())); - detail::ur::assertion(SurfWidth >= 0); - - int Min = std::min(TexWidth, SurfWidth); - - return ReturnValue(static_cast(Min)); - } - case UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: { - return ReturnValue(0lu); - } - case UR_DEVICE_INFO_MAX_SAMPLERS: { - // This call is kind of meaningless for HIP, as samplers don't exist. - // Closest thing is textures, which is 128. - return ReturnValue(128u); - } - case UR_DEVICE_INFO_MAX_PARAMETER_SIZE: { - // __global__ function parameters are passed to the device via constant - // memory and are limited to 4 KB. - return ReturnValue(4000lu); - } - case UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: { - int MemBaseAddrAlign = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &MemBaseAddrAlign, hipDeviceAttributeTextureAlignment, hDevice->get())); - // Multiply by 8 as clGetDeviceInfo returns this value in bits - MemBaseAddrAlign *= 8; - return ReturnValue(MemBaseAddrAlign); - } - case UR_DEVICE_INFO_HALF_FP_CONFIG: { - return ReturnValue(0u); - } - case UR_DEVICE_INFO_SINGLE_FP_CONFIG: { - uint64_t Config = - UR_DEVICE_FP_CAPABILITY_FLAG_DENORM | - UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN | - UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST | - UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO | - UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF | - UR_DEVICE_FP_CAPABILITY_FLAG_FMA | - UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; - return ReturnValue(Config); - } - case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: { - uint64_t Config = UR_DEVICE_FP_CAPABILITY_FLAG_DENORM | - UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN | - UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST | - UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO | - UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF | - UR_DEVICE_FP_CAPABILITY_FLAG_FMA; - return ReturnValue(Config); - } - case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: { - return ReturnValue(UR_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE); - } - case UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: { - // The value is dohipmented for all existing GPUs in the HIP programming - // guidelines, section "H.3.2. Global Memory". - return ReturnValue(128u); - } - case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: { - int CacheSize = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &CacheSize, hipDeviceAttributeL2CacheSize, hDevice->get())); - detail::ur::assertion(CacheSize >= 0); - // The L2 cache is global to the GPU. - return ReturnValue(static_cast(CacheSize)); - } - case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: { - size_t Bytes = 0; - // Runtime API has easy access to this value, driver API info is scarse. - UR_CHECK_ERROR(hipDeviceTotalMem(&Bytes, hDevice->get())); - return ReturnValue(uint64_t{Bytes}); - } - case UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: { - int ConstantMemory = 0; - - // hipDeviceGetAttribute takes a int*, however the size of the constant - // memory on AMD GPU may be larger than what can fit in the positive part - // of a signed integer, so use an unsigned integer and cast the pointer to - // int*. - UR_CHECK_ERROR(hipDeviceGetAttribute(&ConstantMemory, - hipDeviceAttributeTotalConstantMemory, - hDevice->get())); - detail::ur::assertion(ConstantMemory >= 0); - - return ReturnValue(static_cast(ConstantMemory)); - } - case UR_DEVICE_INFO_MAX_CONSTANT_ARGS: { - // TODO: is there a way to retrieve this from HIP driver API? - // Hard coded to value returned by clinfo for OpenCL 1.2 HIP | GeForce GTX - // 1060 3GB - return ReturnValue(9u); - } - case UR_DEVICE_INFO_LOCAL_MEM_TYPE: { - return ReturnValue(UR_DEVICE_LOCAL_MEM_TYPE_LOCAL); - } - case UR_DEVICE_INFO_LOCAL_MEM_SIZE: { - // OpenCL's "local memory" maps most closely to HIP's "shared memory". - // HIP has its own definition of "local memory", which maps to OpenCL's - // "private memory". - int LocalMemSize = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &LocalMemSize, hipDeviceAttributeMaxSharedMemoryPerBlock, - hDevice->get())); - detail::ur::assertion(LocalMemSize >= 0); - return ReturnValue(static_cast(LocalMemSize)); - } - case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: { - int EccEnabled = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &EccEnabled, hipDeviceAttributeEccEnabled, hDevice->get())); - - detail::ur::assertion((EccEnabled == 0) | (EccEnabled == 1)); - auto Result = static_cast(EccEnabled); - return ReturnValue(Result); - } - case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: { - int IsIntegrated = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &IsIntegrated, hipDeviceAttributeIntegrated, hDevice->get())); - - detail::ur::assertion((IsIntegrated == 0) | (IsIntegrated == 1)); - auto Result = static_cast(IsIntegrated); - return ReturnValue(Result); - } - case UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: { - // Hard coded to value returned by clinfo for OpenCL 1.2 HIP | GeForce GTX - // 1060 3GB - return ReturnValue(1000lu); - } - case UR_DEVICE_INFO_ENDIAN_LITTLE: { - return ReturnValue(true); - } - case UR_DEVICE_INFO_AVAILABLE: { - return ReturnValue(true); - } - case UR_DEVICE_INFO_BUILD_ON_SUBDEVICE: { - return ReturnValue(true); - } - case UR_DEVICE_INFO_COMPILER_AVAILABLE: { - return ReturnValue(true); - } - case UR_DEVICE_INFO_LINKER_AVAILABLE: { - return ReturnValue(true); - } - case UR_DEVICE_INFO_EXECUTION_CAPABILITIES: { - auto Capability = ur_device_exec_capability_flags_t{ - UR_DEVICE_EXEC_CAPABILITY_FLAG_KERNEL}; - return ReturnValue(Capability); - } - case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: { - // The mandated minimum capability: - uint64_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE | - UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; - return ReturnValue(Capability); - } - case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: - case UR_DEVICE_INFO_QUEUE_PROPERTIES: { - // The mandated minimum capability: - uint64_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE; - return ReturnValue(Capability); - } - case UR_DEVICE_INFO_BUILT_IN_KERNELS: { - // An empty string is returned if no built-in kernels are supported by the - // device. - return ReturnValue(""); - } - case UR_DEVICE_INFO_PLATFORM: { - return ReturnValue(hDevice->getPlatform()); - } - case UR_DEVICE_INFO_NAME: { - static constexpr size_t MAX_DEVICE_NAME_LENGTH = 256u; - char Name[MAX_DEVICE_NAME_LENGTH]; - UR_CHECK_ERROR( - hipDeviceGetName(Name, MAX_DEVICE_NAME_LENGTH, hDevice->get())); - // On AMD GPUs hipDeviceGetName returns an empty string, so return the arch - // name instead, this is also what AMD OpenCL devices return. - if (strlen(Name) == 0) { - hipDeviceProp_t Props; - detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) == - hipSuccess); - - return ReturnValue(Props.gcnArchName, strlen(Props.gcnArchName) + 1); - } - return ReturnValue(Name, strlen(Name) + 1); - } - case UR_DEVICE_INFO_VENDOR: { - return ReturnValue("AMD Corporation"); - } - case UR_DEVICE_INFO_DRIVER_VERSION: { - std::string Version; - UR_CHECK_ERROR(getHipVersionString(Version)); - return ReturnValue(Version.c_str()); - } - case UR_DEVICE_INFO_PROFILE: { - return ReturnValue("HIP"); - } - case UR_DEVICE_INFO_REFERENCE_COUNT: { - return ReturnValue(hDevice->getReferenceCount()); - } - case UR_DEVICE_INFO_VERSION: { - std::stringstream S; - - hipDeviceProp_t Props; - detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) == - hipSuccess); -#if defined(__HIP_PLATFORM_NVIDIA__) - S << Props.major << "." << Props.minor; -#elif defined(__HIP_PLATFORM_AMD__) - S << Props.gcnArchName; -#else -#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); -#endif - return ReturnValue(S.str().c_str()); - } - case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: { - return ReturnValue(""); - } - case UR_DEVICE_INFO_EXTENSIONS: { - // TODO: Remove comment when HIP support native asserts. - // DEVICELIB_ASSERT extension is set so fallback assert - // postprocessing is NOP. HIP 4.3 docs indicate support for - // native asserts are in progress - std::string SupportedExtensions = ""; - SupportedExtensions += "pi_ext_intel_devicelib_assert "; - SupportedExtensions += " "; - - hipDeviceProp_t Props; - detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) == - hipSuccess); - - if (Props.arch.hasDoubles) { - SupportedExtensions += "cl_khr_fp64 "; - } - - return ReturnValue(SupportedExtensions.c_str()); - } - case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: { - // The minimum value for the FULL profile is 1 MB. - return ReturnValue(1024lu); - } - case UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: { - return ReturnValue(true); - } - case UR_DEVICE_INFO_PARENT_DEVICE: { - return ReturnValue(nullptr); - } - case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: { - return ReturnValue(0u); - } - case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { - if (pPropSizeRet) { - *pPropSizeRet = 0; - } - return UR_RESULT_SUCCESS; - } - - case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: { - return ReturnValue(0u); - } - case UR_DEVICE_INFO_PARTITION_TYPE: { - if (pPropSizeRet) { - *pPropSizeRet = 0; - } - return UR_RESULT_SUCCESS; - } - - // Intel USM extensions - case UR_DEVICE_INFO_USM_HOST_SUPPORT: { - // from cl_intel_unified_shared_memory: "The host memory access capabilities - // apply to any host allocation." - // - // query if/how the device can access page-locked host memory, possibly - // through PCIe, using the same pointer as the host - ur_device_usm_access_capability_flags_t Value = {}; - // if (getAttribute(device, HIP_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING)) { - // the device shares a unified address space with the host - if (getAttribute(hDevice, hipDeviceAttributeComputeCapabilityMajor) >= 6) { - // compute capability 6.x introduces operations that are atomic with - // respect to other CPUs and GPUs in the system - Value = UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS | - UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS | - UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS | - UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS; - } else { - // on GPU architectures with compute capability lower than 6.x, atomic - // operations from the GPU to CPU memory will not be atomic with respect - // to CPU initiated atomic operations - Value = UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS | - UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS; - } - return ReturnValue(Value); - } - case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: { - // from cl_intel_unified_shared_memory: - // "The device memory access capabilities apply to any device allocation - // associated with this device." - // - // query how the device can access memory allocated on the device itself (?) - ur_device_usm_access_capability_flags_t Value = - UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS | - UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS | - UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS | - UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS; - return ReturnValue(Value); - } - case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: { - // from cl_intel_unified_shared_memory: - // "The single device shared memory access capabilities apply to any shared - // allocation associated with this device." - // - // query if/how the device can access managed memory associated to it - ur_device_usm_access_capability_flags_t Value = {}; - if (getAttribute(hDevice, hipDeviceAttributeManagedMemory)) { - // the device can allocate managed memory on this system - Value = UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS | - UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS; - } - if (getAttribute(hDevice, hipDeviceAttributeConcurrentManagedAccess)) { - // the device can coherently access managed memory concurrently with the - // CPU - Value |= UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS; - if (getAttribute(hDevice, hipDeviceAttributeComputeCapabilityMajor) >= - 6) { - // compute capability 6.x introduces operations that are atomic with - // respect to other CPUs and GPUs in the system - Value |= UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS; - } - } - return ReturnValue(Value); - } - case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: { - // from cl_intel_unified_shared_memory: - // "The cross-device shared memory access capabilities apply to any shared - // allocation associated with this device, or to any shared memory - // allocation on another device that also supports the same cross-device - // shared memory access capability." - // - // query if/how the device can access managed memory associated to other - // devices - ur_device_usm_access_capability_flags_t Value = {}; - if (getAttribute(hDevice, hipDeviceAttributeManagedMemory)) { - // the device can allocate managed memory on this system - Value |= UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS; - } - if (getAttribute(hDevice, hipDeviceAttributeConcurrentManagedAccess)) { - // all devices with the CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS - // attribute can coherently access managed memory concurrently with the - // CPU - Value |= UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS; - } - if (getAttribute(hDevice, hipDeviceAttributeComputeCapabilityMajor) >= 6) { - // compute capability 6.x introduces operations that are atomic with - // respect to other CPUs and GPUs in the system - if (Value & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS) - Value |= UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS; - if (Value & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS) - Value |= UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS; - } - return ReturnValue(Value); - } - case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: { - // from cl_intel_unified_shared_memory: - // "The shared system memory access capabilities apply to any allocations - // made by a system allocator, such as malloc or new." - // - // query if/how the device can access pageable host memory allocated by the - // system allocator - ur_device_usm_access_capability_flags_t Value = {}; - if (getAttribute(hDevice, hipDeviceAttributePageableMemoryAccess)) { - // the link between the device and the host does not support native - // atomic operations - Value = UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS | - UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS; - } - return ReturnValue(Value); - } - - case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: { - int Major = 0, Minor = 0; - UR_CHECK_ERROR(hipDeviceComputeCapability(&Major, &Minor, hDevice->get())); - std::string Result = std::to_string(Major) + "." + std::to_string(Minor); - return ReturnValue(Result.c_str()); - } - - case UR_DEVICE_INFO_ATOMIC_64: { - hipDeviceProp_t Props; - detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) == - hipSuccess); - return ReturnValue(Props.arch.hasGlobalInt64Atomics && - Props.arch.hasSharedInt64Atomics); - } - - case UR_DEVICE_INFO_GLOBAL_MEM_FREE: { - size_t FreeMemory = 0; - size_t TotalMemory = 0; - detail::ur::assertion(hipMemGetInfo(&FreeMemory, &TotalMemory) == - hipSuccess, - "failed hipMemGetInfo() API."); - return ReturnValue(FreeMemory); - } - - case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: { - int Value = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &Value, hipDeviceAttributeMemoryClockRate, hDevice->get())); - detail::ur::assertion(Value >= 0); - // Convert kilohertz to megahertz when returning. - return ReturnValue(Value / 1000); - } - - case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: { - int Value = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &Value, hipDeviceAttributeMemoryBusWidth, hDevice->get())); - detail::ur::assertion(Value >= 0); - return ReturnValue(Value); - } - case UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: { - return ReturnValue(int32_t{1}); - } - - case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { - uint64_t Capabilities = UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED | - UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | - UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE; - return ReturnValue(Capabilities); - } - case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: - case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { - // SYCL2020 4.6.4.2 minimum mandated capabilities for - // atomic_fence/memory_scope_capabilities. - // Because scopes are hierarchical, wider scopes support all narrower - // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and - // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382) - uint64_t Capabilities = UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | - UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | - UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; - return ReturnValue(Capabilities); - } - case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { - // SYCL2020 4.6.4.2 minimum mandated capabilities for - // atomic_fence_order_capabilities. - ur_memory_order_capability_flags_t Capabilities = - UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED | - UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | - UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | - UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL; - return ReturnValue(Capabilities); - } - case UR_DEVICE_INFO_DEVICE_ID: { - int Value = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute(&Value, hipDeviceAttributePciDeviceId, - hDevice->get())); - detail::ur::assertion(Value >= 0); - return ReturnValue(Value); - } - case UR_DEVICE_INFO_UUID: { -#if ((HIP_VERSION_MAJOR == 5 && HIP_VERSION_MINOR >= 2) || \ - HIP_VERSION_MAJOR > 5) - hipUUID UUID = {}; - // Supported since 5.2+ - detail::ur::assertion(hipDeviceGetUuid(&UUID, hDevice->get()) == - hipSuccess); - std::array Name; - std::copy(UUID.bytes, UUID.bytes + 16, Name.begin()); - return ReturnValue(Name.data(), 16); -#endif - return UR_RESULT_ERROR_INVALID_VALUE; - } - case UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP: { - // Maximum number of 32-bit registers available to a thread block. - // Note: This number is shared by all thread blocks simultaneously resident - // on a multiprocessor. - int MaxRegisters{-1}; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &MaxRegisters, hipDeviceAttributeMaxRegistersPerBlock, hDevice->get())); - - detail::ur::assertion(MaxRegisters >= 0); - - return ReturnValue(static_cast(MaxRegisters)); - } - case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT: - return ReturnValue(false); - case UR_DEVICE_INFO_IMAGE_SRGB: - return ReturnValue(false); - case UR_DEVICE_INFO_PCI_ADDRESS: { - constexpr size_t AddressBufferSize = 13; - char AddressBuffer[AddressBufferSize]; - UR_CHECK_ERROR( - hipDeviceGetPCIBusId(AddressBuffer, AddressBufferSize, hDevice->get())); - // A typical PCI address is 12 bytes + \0: "1234:67:90.2", but the HIP API - // is not guaranteed to use this format. In practice, it uses this format, - // at least in 5.3-5.5. To be on the safe side, we make sure the terminating - // \0 is set. - AddressBuffer[AddressBufferSize - 1] = '\0'; - detail::ur::assertion(strnlen(AddressBuffer, AddressBufferSize) > 0); - return ReturnValue(AddressBuffer, - strnlen(AddressBuffer, AddressBufferSize - 1) + 1); - } - case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED: - return ReturnValue(false); - case UR_DEVICE_INFO_ESIMD_SUPPORT: - return ReturnValue(false); - - // TODO: Investigate if this information is available on HIP. - case UR_DEVICE_INFO_GPU_EU_COUNT: - case UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH: - case UR_DEVICE_INFO_GPU_EU_SLICES: - case UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: - case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: - case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU: - case UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH: - case UR_DEVICE_INFO_BFLOAT16: - return UR_RESULT_ERROR_INVALID_ENUMERATION; - - default: - break; - } - return UR_RESULT_ERROR_INVALID_ENUMERATION; -} - -/// \return UR_RESULT_SUCCESS if the function is executed successfully -/// HIP devices are always root devices so retain always returns success. -UR_APIEXPORT ur_result_t UR_APICALL urDeviceRetain(ur_device_handle_t) { - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urDevicePartition(ur_device_handle_t, const ur_device_partition_properties_t *, - uint32_t, ur_device_handle_t *, uint32_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -/// \return UR_RESULT_SUCCESS always since HIP devices are always root -/// devices. -UR_APIEXPORT ur_result_t UR_APICALL urDeviceRelease(ur_device_handle_t) { - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, - ur_device_type_t DeviceType, - uint32_t NumEntries, - ur_device_handle_t *phDevices, - uint32_t *pNumDevices) { - ur_result_t Err = UR_RESULT_SUCCESS; - const bool AskingForDefault = DeviceType == UR_DEVICE_TYPE_DEFAULT; - const bool AskingForGPU = DeviceType == UR_DEVICE_TYPE_GPU; - const bool AskingForAll = DeviceType == UR_DEVICE_TYPE_ALL; - const bool ReturnDevices = AskingForDefault || AskingForGPU || AskingForAll; - - size_t NumDevices = ReturnDevices ? hPlatform->Devices.size() : 0; - - try { - UR_ASSERT(pNumDevices || phDevices, UR_RESULT_ERROR_INVALID_VALUE); - - if (pNumDevices) { - *pNumDevices = NumDevices; - } - - if (ReturnDevices && phDevices) { - for (size_t i = 0; i < std::min(size_t(NumEntries), NumDevices); ++i) { - phDevices[i] = hPlatform->Devices[i].get(); - } - } - - return Err; - } catch (ur_result_t Err) { - return Err; - } catch (...) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } -} - -/// Gets the native HIP handle of a UR device object -/// -/// \param[in] hDevice The UR device to get the native HIP object of. -/// \param[out] phNativeHandle Set to the native handle of the UR device object. -/// -/// \return UR_RESULT_SUCCESS -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( - ur_device_handle_t hDevice, ur_native_handle_t *phNativeHandle) { - *phNativeHandle = reinterpret_cast(hDevice->get()); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( - ur_native_handle_t, ur_platform_handle_t, - const ur_device_native_properties_t *, ur_device_handle_t *) { - return UR_RESULT_ERROR_INVALID_OPERATION; -} - -/// \return UR_RESULT_SUCCESS If available, the first binary that is PTX -/// -UR_APIEXPORT ur_result_t UR_APICALL -urDeviceSelectBinary(ur_device_handle_t, const ur_device_binary_t *pBinaries, - uint32_t NumBinaries, uint32_t *pSelectedBinary) { - // Ignore unused parameter - UR_ASSERT(NumBinaries > 0, UR_RESULT_ERROR_INVALID_ARGUMENT); - - // Look for an image for the HIP target, and return the first one that is - // found -#if defined(__HIP_PLATFORM_AMD__) - const char *BinaryType = UR_DEVICE_BINARY_TARGET_AMDGCN; -#elif defined(__HIP_PLATFORM_NVIDIA__) - const char *BinaryType = UR_DEVICE_BINARY_TARGET_NVPTX64; -#else -#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); -#endif - for (uint32_t i = 0; i < NumBinaries; i++) { - if (strcmp(pBinaries[i].pDeviceTargetSpec, BinaryType) == 0) { - *pSelectedBinary = i; - return UR_RESULT_SUCCESS; - } - } - - // No image can be loaded for the given device - return UR_RESULT_ERROR_INVALID_BINARY; -} - -ur_result_t UR_APICALL urDeviceGetGlobalTimestamps(ur_device_handle_t hDevice, - uint64_t *pDeviceTimestamp, - uint64_t *pHostTimestamp) { - if (!pDeviceTimestamp && !pHostTimestamp) - return UR_RESULT_SUCCESS; - - ur_event_handle_t_::native_type Event; - ScopedContext Active(hDevice); - - if (pDeviceTimestamp) { - UR_CHECK_ERROR(hipEventCreateWithFlags(&Event, hipEventDefault)); - UR_CHECK_ERROR(hipEventRecord(Event)); - } - if (pHostTimestamp) { - using namespace std::chrono; - *pHostTimestamp = - duration_cast(steady_clock::now().time_since_epoch()) - .count(); - } - - if (pDeviceTimestamp) { - UR_CHECK_ERROR(hipEventSynchronize(Event)); - float ElapsedTime = 0.0f; - UR_CHECK_ERROR(hipEventElapsedTime(&ElapsedTime, - ur_platform_handle_t_::EvBase, Event)); - *pDeviceTimestamp = (uint64_t)(ElapsedTime * (double)1e6); - } - - return UR_RESULT_SUCCESS; -} diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/device.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/device.hpp deleted file mode 100644 index 155d1900aa0d3..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/device.hpp +++ /dev/null @@ -1,46 +0,0 @@ -//===--------- device.hpp - HIP Adapter -----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -#include "common.hpp" - -#include - -/// UR device mapping to a hipDevice_t. -/// Includes an observer pointer to the platform, -/// and implements the reference counting semantics since -/// HIP objects are not refcounted. -struct ur_device_handle_t_ { -private: - using native_type = hipDevice_t; - - native_type HIPDevice; - std::atomic_uint32_t RefCount; - ur_platform_handle_t Platform; - hipCtx_t HIPContext; - -public: - ur_device_handle_t_(native_type HipDevice, hipCtx_t Context, - ur_platform_handle_t Platform) - : HIPDevice(HipDevice), RefCount{1}, Platform(Platform), - HIPContext(Context) {} - - ~ur_device_handle_t_() { - UR_CHECK_ERROR(hipDevicePrimaryCtxRelease(HIPDevice)); - } - - native_type get() const noexcept { return HIPDevice; }; - - uint32_t getReferenceCount() const noexcept { return RefCount; } - - ur_platform_handle_t getPlatform() const noexcept { return Platform; }; - - hipCtx_t getNativeContext() { return HIPContext; }; -}; - -int getAttribute(ur_device_handle_t Device, hipDeviceAttribute_t Attribute); diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/enqueue.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/enqueue.cpp deleted file mode 100644 index 263175cb37206..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/enqueue.cpp +++ /dev/null @@ -1,1473 +0,0 @@ -//===--------- enqueue.cpp - HIP Adapter ----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "common.hpp" -#include "context.hpp" -#include "event.hpp" -#include "kernel.hpp" -#include "memory.hpp" -#include "queue.hpp" - -namespace { - -static size_t imageElementByteSize(hipArray_Format ArrayFormat) { - switch (ArrayFormat) { - case HIP_AD_FORMAT_UNSIGNED_INT8: - case HIP_AD_FORMAT_SIGNED_INT8: - return 1; - case HIP_AD_FORMAT_UNSIGNED_INT16: - case HIP_AD_FORMAT_SIGNED_INT16: - case HIP_AD_FORMAT_HALF: - return 2; - case HIP_AD_FORMAT_UNSIGNED_INT32: - case HIP_AD_FORMAT_SIGNED_INT32: - case HIP_AD_FORMAT_FLOAT: - return 4; - default: - detail::ur::die("Invalid image format."); - } - return 0; -} - -ur_result_t enqueueEventsWait(ur_queue_handle_t CommandQueue, - hipStream_t Stream, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList) { - if (!EventWaitList) { - return UR_RESULT_SUCCESS; - } - try { - ScopedContext Active(CommandQueue->getDevice()); - - auto Result = forLatestEvents( - EventWaitList, NumEventsInWaitList, - [Stream](ur_event_handle_t Event) -> ur_result_t { - if (Event->getStream() == Stream) { - return UR_RESULT_SUCCESS; - } else { - UR_CHECK_ERROR(hipStreamWaitEvent(Stream, Event->get(), 0)); - return UR_RESULT_SUCCESS; - } - }); - - if (Result != UR_RESULT_SUCCESS) { - return Result; - } - return UR_RESULT_SUCCESS; - } catch (ur_result_t Err) { - return Err; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } -} - -void simpleGuessLocalWorkSize(size_t *ThreadsPerBlock, - const size_t *GlobalWorkSize, - const size_t MaxThreadsPerBlock[3], - ur_kernel_handle_t Kernel) { - assert(ThreadsPerBlock != nullptr); - assert(GlobalWorkSize != nullptr); - assert(Kernel != nullptr); - - std::ignore = Kernel; - - ThreadsPerBlock[0] = std::min(MaxThreadsPerBlock[0], GlobalWorkSize[0]); - - // Find a local work group size that is a divisor of the global - // work group size to produce uniform work groups. - while (GlobalWorkSize[0] % ThreadsPerBlock[0]) { - --ThreadsPerBlock[0]; - } -} -} // namespace - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, - size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(!(phEventWaitList == NULL && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != NULL && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - - ur_result_t Result = UR_RESULT_SUCCESS; - std::unique_ptr RetImplEvent{nullptr}; - - try { - ScopedContext Active(hQueue->getDevice()); - hipStream_t HIPStream = hQueue->getNextTransferStream(); - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); - - if (phEvent) { - RetImplEvent = - std::unique_ptr(ur_event_handle_t_::makeNative( - UR_COMMAND_MEM_BUFFER_WRITE, hQueue, HIPStream)); - UR_CHECK_ERROR(RetImplEvent->start()); - } - - UR_CHECK_ERROR( - hipMemcpyHtoDAsync(hBuffer->Mem.BufferMem.getWithOffset(offset), - const_cast(pSrc), size, HIPStream)); - - if (phEvent) { - UR_CHECK_ERROR(RetImplEvent->record()); - } - - if (blockingWrite) { - UR_CHECK_ERROR(hipStreamSynchronize(HIPStream)); - } - - if (phEvent) { - *phEvent = RetImplEvent.release(); - } - } catch (ur_result_t Err) { - Result = Err; - } - return Result; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, - size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(!(phEventWaitList == NULL && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != NULL && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - - ur_result_t Result = UR_RESULT_SUCCESS; - std::unique_ptr RetImplEvent{nullptr}; - - try { - ScopedContext Active(hQueue->getDevice()); - hipStream_t HIPStream = hQueue->getNextTransferStream(); - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); - - if (phEvent) { - RetImplEvent = - std::unique_ptr(ur_event_handle_t_::makeNative( - UR_COMMAND_MEM_BUFFER_READ, hQueue, HIPStream)); - UR_CHECK_ERROR(RetImplEvent->start()); - } - - UR_CHECK_ERROR(hipMemcpyDtoHAsync( - pDst, hBuffer->Mem.BufferMem.getWithOffset(offset), size, HIPStream)); - - if (phEvent) { - UR_CHECK_ERROR(RetImplEvent->record()); - } - - if (blockingRead) { - UR_CHECK_ERROR(hipStreamSynchronize(HIPStream)); - } - - if (phEvent) { - *phEvent = RetImplEvent.release(); - } - - } catch (ur_result_t err) { - Result = err; - } - return Result; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( - ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hQueue->getContext() == hKernel->getContext(), - UR_RESULT_ERROR_INVALID_QUEUE); - UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - - if (*pGlobalWorkSize == 0) { - return urEnqueueEventsWaitWithBarrier(hQueue, numEventsInWaitList, - phEventWaitList, phEvent); - } - - // Set the number of threads per block to the number of threads per warp - // by default unless user has provided a better number - size_t ThreadsPerBlock[3] = {32u, 1u, 1u}; - size_t MaxWorkGroupSize = 0u; - size_t MaxThreadsPerBlock[3] = {}; - bool ProvidedLocalWorkGroupSize = (pLocalWorkSize != nullptr); - - { - ur_result_t Result = urDeviceGetInfo( - hQueue->Device, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES, - sizeof(MaxThreadsPerBlock), MaxThreadsPerBlock, nullptr); - UR_ASSERT(Result == UR_RESULT_SUCCESS, Result); - - Result = - urDeviceGetInfo(hQueue->Device, UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE, - sizeof(MaxWorkGroupSize), &MaxWorkGroupSize, nullptr); - UR_ASSERT(Result == UR_RESULT_SUCCESS, Result); - - // The MaxWorkGroupSize = 1024 for AMD GPU - // The MaxThreadsPerBlock = {1024, 1024, 1024} - - if (ProvidedLocalWorkGroupSize) { - auto isValid = [&](int dim) { - UR_ASSERT(pLocalWorkSize[dim] <= MaxThreadsPerBlock[dim], - UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE); - // Checks that local work sizes are a divisor of the global work sizes - // which includes that the local work sizes are neither larger than the - // global work sizes and not 0. - UR_ASSERT(pLocalWorkSize != 0, UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE); - UR_ASSERT((pGlobalWorkSize[dim] % pLocalWorkSize[dim]) == 0, - UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE); - ThreadsPerBlock[dim] = pLocalWorkSize[dim]; - return UR_RESULT_SUCCESS; - }; - - for (size_t dim = 0; dim < workDim; dim++) { - auto err = isValid(dim); - if (err != UR_RESULT_SUCCESS) - return err; - } - } else { - simpleGuessLocalWorkSize(ThreadsPerBlock, pGlobalWorkSize, - MaxThreadsPerBlock, hKernel); - } - } - - UR_ASSERT(MaxWorkGroupSize >= size_t(ThreadsPerBlock[0] * ThreadsPerBlock[1] * - ThreadsPerBlock[2]), - UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE); - - size_t BlocksPerGrid[3] = {1u, 1u, 1u}; - - for (size_t i = 0; i < workDim; i++) { - BlocksPerGrid[i] = - (pGlobalWorkSize[i] + ThreadsPerBlock[i] - 1) / ThreadsPerBlock[i]; - } - - ur_result_t Result = UR_RESULT_SUCCESS; - std::unique_ptr RetImplEvent{nullptr}; - - try { - ur_device_handle_t Dev = hQueue->getDevice(); - ScopedContext Active(Dev); - ur_context_handle_t Ctx = hQueue->getContext(); - - uint32_t StreamToken; - ur_stream_quard Guard; - hipStream_t HIPStream = hQueue->getNextComputeStream( - numEventsInWaitList, phEventWaitList, Guard, &StreamToken); - hipFunction_t HIPFunc = hKernel->get(); - - hipDevice_t HIPDev = Dev->get(); - for (const void *P : hKernel->getPtrArgs()) { - auto [Addr, Size] = Ctx->getUSMMapping(P); - if (!Addr) - continue; - if (hipMemPrefetchAsync(Addr, Size, HIPDev, HIPStream) != hipSuccess) - return UR_RESULT_ERROR_INVALID_KERNEL_ARGS; - } - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); - - // Set the implicit global offset parameter if kernel has offset variant - if (hKernel->getWithOffsetParameter()) { - std::uint32_t hip_implicit_offset[3] = {0, 0, 0}; - if (pGlobalWorkOffset) { - for (size_t i = 0; i < workDim; i++) { - hip_implicit_offset[i] = - static_cast(pGlobalWorkOffset[i]); - if (pGlobalWorkOffset[i] != 0) { - HIPFunc = hKernel->getWithOffsetParameter(); - } - } - } - hKernel->setImplicitOffsetArg(sizeof(hip_implicit_offset), - hip_implicit_offset); - } - - auto ArgIndices = hKernel->getArgIndices(); - - if (phEvent) { - RetImplEvent = - std::unique_ptr(ur_event_handle_t_::makeNative( - UR_COMMAND_KERNEL_LAUNCH, hQueue, HIPStream, StreamToken)); - UR_CHECK_ERROR(RetImplEvent->start()); - } - - // Set local mem max size if env var is present - static const char *LocalMemSzPtrUR = - std::getenv("UR_HIP_MAX_LOCAL_MEM_SIZE"); - static const char *LocalMemSzPtrPI = - std::getenv("SYCL_PI_HIP_MAX_LOCAL_MEM_SIZE"); - static const char *LocalMemSzPtr = - LocalMemSzPtrUR ? LocalMemSzPtrUR - : (LocalMemSzPtrPI ? LocalMemSzPtrPI : nullptr); - - if (LocalMemSzPtr) { - int DeviceMaxLocalMem = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &DeviceMaxLocalMem, hipDeviceAttributeMaxSharedMemoryPerBlock, - HIPDev)); - - static const int EnvVal = std::atoi(LocalMemSzPtr); - if (EnvVal <= 0 || EnvVal > DeviceMaxLocalMem) { - setErrorMessage(LocalMemSzPtrUR ? "Invalid value specified for " - "UR_HIP_MAX_LOCAL_MEM_SIZE" - : "Invalid value specified for " - "SYCL_PI_HIP_MAX_LOCAL_MEM_SIZE", - UR_RESULT_ERROR_ADAPTER_SPECIFIC); - return UR_RESULT_ERROR_ADAPTER_SPECIFIC; - } - UR_CHECK_ERROR(hipFuncSetAttribute( - HIPFunc, hipFuncAttributeMaxDynamicSharedMemorySize, EnvVal)); - } - - UR_CHECK_ERROR(hipModuleLaunchKernel( - HIPFunc, BlocksPerGrid[0], BlocksPerGrid[1], BlocksPerGrid[2], - ThreadsPerBlock[0], ThreadsPerBlock[1], ThreadsPerBlock[2], - hKernel->getLocalSize(), HIPStream, ArgIndices.data(), nullptr)); - - hKernel->clearLocalSize(); - - if (phEvent) { - UR_CHECK_ERROR(RetImplEvent->record()); - *phEvent = RetImplEvent.release(); - } - } catch (ur_result_t err) { - Result = err; - } - return Result; -} - -/// Enqueues a wait on the given queue for all events. -/// See \ref enqueueEventWait -/// -/// Currently queues are represented by a single in-order stream, therefore -/// every command is an implicit barrier and so urEnqueueEventWait has the -/// same behavior as urEnqueueEventWaitWithBarrier. So urEnqueueEventWait can -/// just call urEnqueueEventWaitWithBarrier. -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( - ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return urEnqueueEventsWaitWithBarrier(hQueue, numEventsInWaitList, - phEventWaitList, phEvent); -} - -/// Enqueues a wait on the given queue for all specified events. -/// See \ref enqueueEventWaitWithBarrier -/// -/// If the events list is empty, the enqueued wait will wait on all previous -/// events in the queue. -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( - ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(!(phEventWaitList == NULL && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST) - UR_ASSERT(!(phEventWaitList != NULL && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST) - - try { - ScopedContext Active(hQueue->getDevice()); - uint32_t StreamToken; - ur_stream_quard Guard; - hipStream_t HIPStream = hQueue->getNextComputeStream( - numEventsInWaitList, - reinterpret_cast(phEventWaitList), Guard, - &StreamToken); - { - std::lock_guard Guard(hQueue->BarrierMutex); - if (hQueue->BarrierEvent == nullptr) { - UR_CHECK_ERROR(hipEventCreate(&hQueue->BarrierEvent)); - } - if (numEventsInWaitList == 0) { // wait on all work - if (hQueue->BarrierTmpEvent == nullptr) { - UR_CHECK_ERROR(hipEventCreate(&hQueue->BarrierTmpEvent)); - } - hQueue->syncStreams( - [HIPStream, TmpEvent = hQueue->BarrierTmpEvent](hipStream_t S) { - if (HIPStream != S) { - UR_CHECK_ERROR(hipEventRecord(TmpEvent, S)); - UR_CHECK_ERROR(hipStreamWaitEvent(HIPStream, TmpEvent, 0)); - } - }); - } else { // wait just on given events - forLatestEvents( - reinterpret_cast(phEventWaitList), - numEventsInWaitList, - [HIPStream](ur_event_handle_t Event) -> ur_result_t { - if (Event->getQueue()->hasBeenSynchronized( - Event->getComputeStreamToken())) { - return UR_RESULT_SUCCESS; - } else { - UR_CHECK_ERROR(hipStreamWaitEvent(HIPStream, Event->get(), 0)); - return UR_RESULT_SUCCESS; - } - }); - } - - UR_CHECK_ERROR(hipEventRecord(hQueue->BarrierEvent, HIPStream)); - for (unsigned int i = 0; i < hQueue->ComputeAppliedBarrier.size(); i++) { - hQueue->ComputeAppliedBarrier[i] = false; - } - for (unsigned int i = 0; i < hQueue->TransferAppliedBarrier.size(); i++) { - hQueue->TransferAppliedBarrier[i] = false; - } - } - - if (phEvent) { - *phEvent = ur_event_handle_t_::makeNative( - UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, hQueue, HIPStream, StreamToken); - UR_CHECK_ERROR((*phEvent)->start()); - UR_CHECK_ERROR((*phEvent)->record()); - } - - return UR_RESULT_SUCCESS; - } catch (ur_result_t Err) { - return Err; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } -} - -/// General 3D memory copy operation. -/// This function requires the corresponding HIP context to be at the top of -/// the context stack -/// If the source and/or destination is on the device, SrcPtr and/or DstPtr -/// must be a pointer to a hipDevPtr -static ur_result_t commonEnqueueMemBufferCopyRect( - hipStream_t HipStream, ur_rect_region_t Region, const void *SrcPtr, - const hipMemoryType SrcType, ur_rect_offset_t SrcOffset, size_t SrcRowPitch, - size_t SrcSlicePitch, void *DstPtr, const hipMemoryType DstType, - ur_rect_offset_t DstOffset, size_t DstRowPitch, size_t DstSlicePitch) { - - assert(SrcType == hipMemoryTypeDevice || SrcType == hipMemoryTypeHost); - assert(DstType == hipMemoryTypeDevice || DstType == hipMemoryTypeHost); - - SrcRowPitch = (!SrcRowPitch) ? Region.width : SrcRowPitch; - SrcSlicePitch = - (!SrcSlicePitch) ? (Region.height * SrcRowPitch) : SrcSlicePitch; - DstRowPitch = (!DstRowPitch) ? Region.width : DstRowPitch; - DstSlicePitch = - (!DstSlicePitch) ? (Region.height * DstRowPitch) : DstSlicePitch; - - HIP_MEMCPY3D Params; - - Params.WidthInBytes = Region.width; - Params.Height = Region.height; - Params.Depth = Region.depth; - - Params.srcMemoryType = SrcType; - Params.srcDevice = SrcType == hipMemoryTypeDevice - ? *static_cast(SrcPtr) - : 0; - Params.srcHost = SrcType == hipMemoryTypeHost ? SrcPtr : nullptr; - Params.srcXInBytes = SrcOffset.x; - Params.srcY = SrcOffset.y; - Params.srcZ = SrcOffset.z; - Params.srcPitch = SrcRowPitch; - Params.srcHeight = SrcSlicePitch / SrcRowPitch; - - Params.dstMemoryType = DstType; - Params.dstDevice = DstType == hipMemoryTypeDevice - ? *reinterpret_cast(DstPtr) - : 0; - Params.dstHost = DstType == hipMemoryTypeHost ? DstPtr : nullptr; - Params.dstXInBytes = DstOffset.x; - Params.dstY = DstOffset.y; - Params.dstZ = DstOffset.z; - Params.dstPitch = DstRowPitch; - Params.dstHeight = DstSlicePitch / DstRowPitch; - - UR_CHECK_ERROR(hipDrvMemcpy3DAsync(&Params, HipStream)); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, - ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, - ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, - size_t hostRowPitch, size_t hostSlicePitch, void *pDst, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - UR_ASSERT(!(phEventWaitList == NULL && numEventsInWaitList > 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(phEventWaitList != NULL && numEventsInWaitList == 0), - UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - UR_ASSERT(!(region.width == 0 || region.height == 0 || region.width == 0), - UR_RESULT_ERROR_INVALID_SIZE); - UR_ASSERT(!(bufferRowPitch != 0 && bufferRowPitch < region.width), - UR_RESULT_ERROR_INVALID_SIZE); - UR_ASSERT(!(hostRowPitch != 0 && hostRowPitch < region.width), - UR_RESULT_ERROR_INVALID_SIZE); - UR_ASSERT(!(bufferSlicePitch != 0 && - bufferSlicePitch < region.height * bufferRowPitch), - UR_RESULT_ERROR_INVALID_SIZE); - UR_ASSERT(!(bufferSlicePitch != 0 && bufferSlicePitch % bufferRowPitch != 0), - UR_RESULT_ERROR_INVALID_SIZE); - UR_ASSERT( - !(hostSlicePitch != 0 && hostSlicePitch < region.height * hostRowPitch), - UR_RESULT_ERROR_INVALID_SIZE); - UR_ASSERT(!(hostSlicePitch != 0 && hostSlicePitch % hostRowPitch != 0), - UR_RESULT_ERROR_INVALID_SIZE); - - ur_result_t Result = UR_RESULT_SUCCESS; - void *DevPtr = hBuffer->Mem.BufferMem.getVoid(); - std::unique_ptr RetImplEvent{nullptr}; - - try { - ScopedContext Active(hQueue->getDevice()); - hipStream_t HIPStream = hQueue->getNextTransferStream(); - - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); - - if (phEvent) { - RetImplEvent = - std::unique_ptr(ur_event_handle_t_::makeNative( - UR_COMMAND_MEM_BUFFER_READ_RECT, hQueue, HIPStream)); - UR_CHECK_ERROR(RetImplEvent->start()); - } - - Result = commonEnqueueMemBufferCopyRect( - HIPStream, region, &DevPtr, hipMemoryTypeDevice, bufferOrigin, - bufferRowPitch, bufferSlicePitch, pDst, hipMemoryTypeHost, hostOrigin, - hostRowPitch, hostSlicePitch); - - if (phEvent) { - UR_CHECK_ERROR(RetImplEvent->record()); - } - - if (blockingRead) { - UR_CHECK_ERROR(hipStreamSynchronize(HIPStream)); - } - - if (phEvent) { - *phEvent = RetImplEvent.release(); - } - - } catch (ur_result_t Err) { - Result = Err; - } - return Result; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, - ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, - ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, - size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - ur_result_t Result = UR_RESULT_SUCCESS; - void *DevPtr = hBuffer->Mem.BufferMem.getVoid(); - std::unique_ptr RetImplEvent{nullptr}; - - try { - ScopedContext Active(hQueue->getDevice()); - hipStream_t HIPStream = hQueue->getNextTransferStream(); - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); - - if (phEvent) { - RetImplEvent = - std::unique_ptr(ur_event_handle_t_::makeNative( - UR_COMMAND_MEM_BUFFER_WRITE_RECT, hQueue, HIPStream)); - UR_CHECK_ERROR(RetImplEvent->start()); - } - - Result = commonEnqueueMemBufferCopyRect( - HIPStream, region, pSrc, hipMemoryTypeHost, hostOrigin, hostRowPitch, - hostSlicePitch, &DevPtr, hipMemoryTypeDevice, bufferOrigin, - bufferRowPitch, bufferSlicePitch); - - if (phEvent) { - UR_CHECK_ERROR(RetImplEvent->record()); - } - - if (blockingWrite) { - UR_CHECK_ERROR(hipStreamSynchronize(HIPStream)); - } - - if (phEvent) { - *phEvent = RetImplEvent.release(); - } - - } catch (ur_result_t Err) { - Result = Err; - } - return Result; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, - ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - UR_ASSERT(size + srcOffset <= hBufferSrc->Mem.BufferMem.getSize(), - UR_RESULT_ERROR_INVALID_SIZE); - UR_ASSERT(size + dstOffset <= hBufferDst->Mem.BufferMem.getSize(), - UR_RESULT_ERROR_INVALID_SIZE); - - std::unique_ptr RetImplEvent{nullptr}; - - try { - ScopedContext Active(hQueue->getDevice()); - ur_result_t Result = UR_RESULT_SUCCESS; - auto Stream = hQueue->getNextTransferStream(); - - if (phEventWaitList) { - Result = enqueueEventsWait(hQueue, Stream, numEventsInWaitList, - phEventWaitList); - } - - if (phEvent) { - RetImplEvent = - std::unique_ptr(ur_event_handle_t_::makeNative( - UR_COMMAND_MEM_BUFFER_COPY, hQueue, Stream)); - UR_CHECK_ERROR(RetImplEvent->start()); - } - - auto Src = hBufferSrc->Mem.BufferMem.getWithOffset(srcOffset); - auto Dst = hBufferDst->Mem.BufferMem.getWithOffset(dstOffset); - - UR_CHECK_ERROR(hipMemcpyDtoDAsync(Dst, Src, size, Stream)); - - if (phEvent) { - UR_CHECK_ERROR(RetImplEvent->record()); - *phEvent = RetImplEvent.release(); - } - - return Result; - } catch (ur_result_t Err) { - return Err; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( - ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, - ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin, - ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, - size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - ur_result_t Result = UR_RESULT_SUCCESS; - void *SrcPtr = hBufferSrc->Mem.BufferMem.getVoid(); - void *DstPtr = hBufferDst->Mem.BufferMem.getVoid(); - std::unique_ptr RetImplEvent{nullptr}; - - try { - ScopedContext Active(hQueue->getDevice()); - hipStream_t HIPStream = hQueue->getNextTransferStream(); - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); - - if (phEvent) { - RetImplEvent = - std::unique_ptr(ur_event_handle_t_::makeNative( - UR_COMMAND_MEM_BUFFER_COPY_RECT, hQueue, HIPStream)); - UR_CHECK_ERROR(RetImplEvent->start()); - } - - Result = commonEnqueueMemBufferCopyRect( - HIPStream, region, &SrcPtr, hipMemoryTypeDevice, srcOrigin, srcRowPitch, - srcSlicePitch, &DstPtr, hipMemoryTypeDevice, dstOrigin, dstRowPitch, - dstSlicePitch); - - if (phEvent) { - UR_CHECK_ERROR(RetImplEvent->record()); - *phEvent = RetImplEvent.release(); - } - - } catch (ur_result_t Err) { - Result = Err; - } - return Result; -} - -// HIP has no memset functions that allow setting values more than 4 bytes. UR -// API lets you pass an arbitrary "pattern" to the buffer fill, which can be -// more than 4 bytes. We must break up the pattern into 1 byte values, and set -// the buffer using multiple strided calls. The first 4 patterns are set using -// hipMemsetD32Async then all subsequent 1 byte patterns are set using -// hipMemset2DAsync which is called for each pattern. -ur_result_t commonMemSetLargePattern(hipStream_t Stream, uint32_t PatternSize, - size_t Size, const void *pPattern, - hipDeviceptr_t Ptr) { - // Calculate the number of patterns, stride, number of times the pattern - // needs to be applied, and the number of times the first 32 bit pattern - // needs to be applied. - auto NumberOfSteps = PatternSize / sizeof(uint8_t); - auto Pitch = NumberOfSteps * sizeof(uint8_t); - auto Height = Size / NumberOfSteps; - auto Count32 = Size / sizeof(uint32_t); - - // Get 4-byte chunk of the pattern and call hipMemsetD32Async - auto Value = *(static_cast(pPattern)); - UR_CHECK_ERROR(hipMemsetD32Async(Ptr, Value, Count32, Stream)); - for (auto step = 4u; step < NumberOfSteps; ++step) { - // take 1 byte of the pattern - Value = *(static_cast(pPattern) + step); - - // offset the pointer to the part of the buffer we want to write to - auto OffsetPtr = reinterpret_cast(reinterpret_cast(Ptr) + - (step * sizeof(uint8_t))); - - // set all of the pattern chunks - UR_CHECK_ERROR(hipMemset2DAsync(OffsetPtr, Pitch, Value, sizeof(uint8_t), - Height, Stream)); - } - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, const void *pPattern, - size_t patternSize, size_t offset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - UR_ASSERT(size + offset <= hBuffer->Mem.BufferMem.getSize(), - UR_RESULT_ERROR_INVALID_SIZE); - auto ArgsAreMultiplesOfPatternSize = - (offset % patternSize == 0) || (size % patternSize == 0); - - auto PatternIsValid = (pPattern != nullptr); - - auto PatternSizeIsValid = - ((patternSize & (patternSize - 1)) == 0) && // is power of two - (patternSize > 0) && (patternSize <= 128); // falls within valid range - - UR_ASSERT(ArgsAreMultiplesOfPatternSize && PatternIsValid && - PatternSizeIsValid, - UR_RESULT_ERROR_INVALID_VALUE); - std::ignore = ArgsAreMultiplesOfPatternSize; - std::ignore = PatternIsValid; - std::ignore = PatternSizeIsValid; - - std::unique_ptr RetImplEvent{nullptr}; - - try { - ScopedContext Active(hQueue->getDevice()); - - auto Stream = hQueue->getNextTransferStream(); - ur_result_t Result = UR_RESULT_SUCCESS; - if (phEventWaitList) { - Result = enqueueEventsWait(hQueue, Stream, numEventsInWaitList, - phEventWaitList); - } - - if (phEvent) { - RetImplEvent = - std::unique_ptr(ur_event_handle_t_::makeNative( - UR_COMMAND_MEM_BUFFER_FILL, hQueue, Stream)); - UR_CHECK_ERROR(RetImplEvent->start()); - } - - auto DstDevice = hBuffer->Mem.BufferMem.getWithOffset(offset); - auto N = size / patternSize; - - // pattern size in bytes - switch (patternSize) { - case 1: { - auto Value = *static_cast(pPattern); - UR_CHECK_ERROR(hipMemsetD8Async(DstDevice, Value, N, Stream)); - break; - } - case 2: { - auto Value = *static_cast(pPattern); - UR_CHECK_ERROR(hipMemsetD16Async(DstDevice, Value, N, Stream)); - break; - } - case 4: { - auto Value = *static_cast(pPattern); - UR_CHECK_ERROR(hipMemsetD32Async(DstDevice, Value, N, Stream)); - break; - } - - default: { - Result = commonMemSetLargePattern(Stream, patternSize, size, pPattern, - DstDevice); - break; - } - } - - if (phEvent) { - UR_CHECK_ERROR(RetImplEvent->record()); - *phEvent = RetImplEvent.release(); - } - - return Result; - } catch (ur_result_t Err) { - return Err; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } -} - -/// General ND memory copy operation for images (where N > 1). -/// This function requires the corresponding HIP context to be at the top of -/// the context stack -/// If the source and/or destination is an array, SrcPtr and/or DstPtr -/// must be a pointer to a hipArray -static ur_result_t commonEnqueueMemImageNDCopy( - hipStream_t HipStream, ur_mem_type_t ImgType, const size_t *Region, - const void *SrcPtr, const hipMemoryType SrcType, const size_t *SrcOffset, - void *DstPtr, const hipMemoryType DstType, const size_t *DstOffset) { - UR_ASSERT(SrcType == hipMemoryTypeArray || SrcType == hipMemoryTypeHost, - UR_RESULT_ERROR_INVALID_VALUE); - UR_ASSERT(DstType == hipMemoryTypeArray || DstType == hipMemoryTypeHost, - UR_RESULT_ERROR_INVALID_VALUE); - - if (ImgType == UR_MEM_TYPE_IMAGE2D) { - hip_Memcpy2D CpyDesc; - memset(&CpyDesc, 0, sizeof(CpyDesc)); - CpyDesc.srcMemoryType = SrcType; - if (SrcType == hipMemoryTypeArray) { - CpyDesc.srcArray = - reinterpret_cast(const_cast(SrcPtr)); - CpyDesc.srcXInBytes = SrcOffset[0]; - CpyDesc.srcY = SrcOffset[1]; - } else { - CpyDesc.srcHost = SrcPtr; - } - CpyDesc.dstMemoryType = DstType; - if (DstType == hipMemoryTypeArray) { - CpyDesc.dstArray = - reinterpret_cast(const_cast(DstPtr)); - CpyDesc.dstXInBytes = DstOffset[0]; - CpyDesc.dstY = DstOffset[1]; - } else { - CpyDesc.dstHost = DstPtr; - } - CpyDesc.WidthInBytes = Region[0]; - CpyDesc.Height = Region[1]; - UR_CHECK_ERROR(hipMemcpyParam2DAsync(&CpyDesc, HipStream)); - return UR_RESULT_SUCCESS; - } - - if (ImgType == UR_MEM_TYPE_IMAGE3D) { - - HIP_MEMCPY3D CpyDesc; - memset(&CpyDesc, 0, sizeof(CpyDesc)); - CpyDesc.srcMemoryType = SrcType; - if (SrcType == hipMemoryTypeArray) { - CpyDesc.srcArray = - reinterpret_cast(const_cast(SrcPtr)); - CpyDesc.srcXInBytes = SrcOffset[0]; - CpyDesc.srcY = SrcOffset[1]; - CpyDesc.srcZ = SrcOffset[2]; - } else { - CpyDesc.srcHost = SrcPtr; - } - CpyDesc.dstMemoryType = DstType; - if (DstType == hipMemoryTypeArray) { - CpyDesc.dstArray = reinterpret_cast(DstPtr); - CpyDesc.dstXInBytes = DstOffset[0]; - CpyDesc.dstY = DstOffset[1]; - CpyDesc.dstZ = DstOffset[2]; - } else { - CpyDesc.dstHost = DstPtr; - } - CpyDesc.WidthInBytes = Region[0]; - CpyDesc.Height = Region[1]; - CpyDesc.Depth = Region[2]; - UR_CHECK_ERROR(hipDrvMemcpy3DAsync(&CpyDesc, HipStream)); - return UR_RESULT_SUCCESS; - } - - return UR_RESULT_ERROR_INVALID_VALUE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( - ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead, - ur_rect_offset_t origin, ur_rect_region_t region, size_t, size_t, - void *pDst, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hImage->MemType == ur_mem_handle_t_::Type::Surface, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); - - ur_result_t Result = UR_RESULT_SUCCESS; - - try { - ScopedContext Active(hQueue->getDevice()); - hipStream_t HIPStream = hQueue->getNextTransferStream(); - - if (phEventWaitList) { - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); - } - - hipArray *Array = hImage->Mem.SurfaceMem.getArray(); - - hipArray_Format Format; - size_t NumChannels; - getArrayDesc(Array, Format, NumChannels); - - int ElementByteSize = imageElementByteSize(Format); - - size_t ByteOffsetX = origin.x * ElementByteSize * NumChannels; - size_t BytesToCopy = ElementByteSize * NumChannels * region.depth; - - auto ImgType = hImage->Mem.SurfaceMem.getImageType(); - - size_t AdjustedRegion[3] = {BytesToCopy, region.height, region.height}; - size_t SrcOffset[3] = {ByteOffsetX, origin.y, origin.z}; - - std::unique_ptr RetImplEvent{nullptr}; - if (phEvent) { - RetImplEvent = - std::unique_ptr(ur_event_handle_t_::makeNative( - UR_COMMAND_MEM_BUFFER_READ_RECT, hQueue, HIPStream)); - UR_CHECK_ERROR(RetImplEvent->start()); - } - - Result = commonEnqueueMemImageNDCopy(HIPStream, ImgType, AdjustedRegion, - Array, hipMemoryTypeArray, SrcOffset, - pDst, hipMemoryTypeHost, nullptr); - - if (Result != UR_RESULT_SUCCESS) { - return Result; - } - - if (phEvent) { - UR_CHECK_ERROR(RetImplEvent->record()); - *phEvent = RetImplEvent.release(); - } - - if (blockingRead) { - UR_CHECK_ERROR(hipStreamSynchronize(HIPStream)); - } - } catch (ur_result_t Err) { - return Err; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } - return UR_RESULT_SUCCESS; - return Result; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( - ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool, - ur_rect_offset_t origin, ur_rect_region_t region, size_t, size_t, - void *pSrc, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hImage->MemType == ur_mem_handle_t_::Type::Surface, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); - - ur_result_t Result = UR_RESULT_SUCCESS; - - try { - ScopedContext Active(hQueue->getDevice()); - hipStream_t HIPStream = hQueue->getNextTransferStream(); - - if (phEventWaitList) { - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); - } - - hipArray *Array = hImage->Mem.SurfaceMem.getArray(); - - hipArray_Format Format; - size_t NumChannels; - getArrayDesc(Array, Format, NumChannels); - - int ElementByteSize = imageElementByteSize(Format); - - size_t ByteOffsetX = origin.x * ElementByteSize * NumChannels; - size_t BytesToCopy = ElementByteSize * NumChannels * region.depth; - - auto ImgType = hImage->Mem.SurfaceMem.getImageType(); - - size_t AdjustedRegion[3] = {BytesToCopy, region.height, region.height}; - size_t DstOffset[3] = {ByteOffsetX, origin.y, origin.z}; - - std::unique_ptr RetImplEvent{nullptr}; - if (phEvent) { - RetImplEvent = - std::unique_ptr(ur_event_handle_t_::makeNative( - UR_COMMAND_MEM_BUFFER_READ_RECT, hQueue, HIPStream)); - UR_CHECK_ERROR(RetImplEvent->start()); - } - - Result = commonEnqueueMemImageNDCopy(HIPStream, ImgType, AdjustedRegion, - pSrc, hipMemoryTypeHost, nullptr, - Array, hipMemoryTypeArray, DstOffset); - - if (Result != UR_RESULT_SUCCESS) { - return Result; - } - - if (phEvent) { - UR_CHECK_ERROR(RetImplEvent->record()); - *phEvent = RetImplEvent.release(); - } - } catch (ur_result_t Err) { - return Err; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } - - return UR_RESULT_SUCCESS; - - return Result; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, - ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, - ur_rect_offset_t dstOrigin, ur_rect_region_t region, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - UR_ASSERT(hImageSrc->MemType == ur_mem_handle_t_::Type::Surface, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); - UR_ASSERT(hImageDst->MemType == ur_mem_handle_t_::Type::Surface, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); - UR_ASSERT(hImageSrc->Mem.SurfaceMem.getImageType() == - hImageDst->Mem.SurfaceMem.getImageType(), - UR_RESULT_ERROR_INVALID_MEM_OBJECT); - - ur_result_t Result = UR_RESULT_SUCCESS; - - try { - ScopedContext Active(hQueue->getDevice()); - hipStream_t HIPStream = hQueue->getNextTransferStream(); - if (phEventWaitList) { - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); - } - - hipArray *SrcArray = hImageSrc->Mem.SurfaceMem.getArray(); - hipArray_Format SrcFormat; - size_t SrcNumChannels; - getArrayDesc(SrcArray, SrcFormat, SrcNumChannels); - - hipArray *DstArray = hImageDst->Mem.SurfaceMem.getArray(); - hipArray_Format DstFormat; - size_t DstNumChannels; - getArrayDesc(DstArray, DstFormat, DstNumChannels); - - UR_ASSERT(SrcFormat == DstFormat, - UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR); - UR_ASSERT(SrcNumChannels == DstNumChannels, - UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR); - - int ElementByteSize = imageElementByteSize(SrcFormat); - - size_t DstByteOffsetX = dstOrigin.x * ElementByteSize * SrcNumChannels; - size_t SrcByteOffsetX = srcOrigin.x * ElementByteSize * DstNumChannels; - size_t BytesToCopy = ElementByteSize * SrcNumChannels * region.depth; - - auto ImgType = hImageSrc->Mem.SurfaceMem.getImageType(); - - size_t AdjustedRegion[3] = {BytesToCopy, region.height, region.width}; - size_t SrcOffset[3] = {SrcByteOffsetX, srcOrigin.y, srcOrigin.z}; - size_t DstOffset[3] = {DstByteOffsetX, dstOrigin.y, dstOrigin.z}; - - std::unique_ptr RetImplEvent{nullptr}; - if (phEvent) { - RetImplEvent = - std::unique_ptr(ur_event_handle_t_::makeNative( - UR_COMMAND_MEM_BUFFER_READ_RECT, hQueue, HIPStream)); - UR_CHECK_ERROR(RetImplEvent->start()); - } - - Result = commonEnqueueMemImageNDCopy( - HIPStream, ImgType, AdjustedRegion, SrcArray, hipMemoryTypeArray, - SrcOffset, DstArray, hipMemoryTypeArray, DstOffset); - - if (Result != UR_RESULT_SUCCESS) { - return Result; - } - - if (phEvent) { - UR_CHECK_ERROR(RetImplEvent->record()); - *phEvent = RetImplEvent.release(); - } - } catch (ur_result_t Err) { - return Err; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } - - return UR_RESULT_SUCCESS; -} - -/// Implements mapping on the host using a BufferRead operation. -/// Mapped pointers are stored in the ur_mem_handle_t object. -/// If the buffer uses pinned host memory a pointer to that memory is returned -/// and no read operation is done. -/// -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingMap, - ur_map_flags_t mapFlags, size_t offset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent, void **ppRetMap) { - UR_ASSERT(hBuffer->MemType == ur_mem_handle_t_::Type::Buffer, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); - UR_ASSERT(offset + size <= hBuffer->Mem.BufferMem.getSize(), - UR_RESULT_ERROR_INVALID_SIZE); - - ur_result_t Result = UR_RESULT_ERROR_INVALID_OPERATION; - const bool IsPinned = - hBuffer->Mem.BufferMem.MemAllocMode == - ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::AllocHostPtr; - - // Currently no support for overlapping regions - if (hBuffer->Mem.BufferMem.getMapPtr() != nullptr) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - } - - // Allocate a pointer in the host to store the mapped information - auto HostPtr = hBuffer->Mem.BufferMem.mapToPtr(size, offset, mapFlags); - *ppRetMap = hBuffer->Mem.BufferMem.getMapPtr(); - if (HostPtr) { - Result = UR_RESULT_SUCCESS; - } - - if (!IsPinned && - ((mapFlags & UR_MAP_FLAG_READ) || (mapFlags & UR_MAP_FLAG_WRITE))) { - // Pinned host memory is already on host so it doesn't need to be read. - Result = urEnqueueMemBufferRead(hQueue, hBuffer, blockingMap, offset, size, - HostPtr, numEventsInWaitList, - phEventWaitList, phEvent); - } else { - ScopedContext Active(hQueue->getDevice()); - - if (IsPinned) { - Result = urEnqueueEventsWait(hQueue, numEventsInWaitList, phEventWaitList, - nullptr); - } - - if (phEvent) { - try { - *phEvent = ur_event_handle_t_::makeNative( - UR_COMMAND_MEM_BUFFER_MAP, hQueue, hQueue->getNextTransferStream()); - UR_CHECK_ERROR((*phEvent)->start()); - UR_CHECK_ERROR((*phEvent)->record()); - } catch (ur_result_t Error) { - Result = Error; - } - } - } - - return Result; -} - -/// Implements the unmap from the host, using a BufferWrite operation. -/// Requires the mapped pointer to be already registered in the given hMem. -/// If hMem uses pinned host memory, this will not do a write. -/// -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( - ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - ur_result_t Result = UR_RESULT_SUCCESS; - UR_ASSERT(hMem->MemType == ur_mem_handle_t_::Type::Buffer, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); - UR_ASSERT(hMem->Mem.BufferMem.getMapPtr() != nullptr, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); - UR_ASSERT(hMem->Mem.BufferMem.getMapPtr() == pMappedPtr, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); - - const bool IsPinned = - hMem->Mem.BufferMem.MemAllocMode == - ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::AllocHostPtr; - - if (!IsPinned && ((hMem->Mem.BufferMem.getMapFlags() & UR_MAP_FLAG_WRITE) || - (hMem->Mem.BufferMem.getMapFlags() & - UR_MAP_FLAG_WRITE_INVALIDATE_REGION))) { - // Pinned host memory is only on host so it doesn't need to be written to. - Result = urEnqueueMemBufferWrite( - hQueue, hMem, true, hMem->Mem.BufferMem.getMapOffset(), - hMem->Mem.BufferMem.getMapSize(), pMappedPtr, numEventsInWaitList, - phEventWaitList, phEvent); - } else { - ScopedContext Active(hQueue->getDevice()); - - if (IsPinned) { - Result = urEnqueueEventsWait(hQueue, numEventsInWaitList, phEventWaitList, - nullptr); - } - - if (phEvent) { - try { - *phEvent = ur_event_handle_t_::makeNative( - UR_COMMAND_MEM_UNMAP, hQueue, hQueue->getNextTransferStream()); - UR_CHECK_ERROR((*phEvent)->start()); - UR_CHECK_ERROR((*phEvent)->record()); - } catch (ur_result_t Error) { - Result = Error; - } - } - } - - hMem->Mem.BufferMem.unmap(pMappedPtr); - return Result; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( - ur_queue_handle_t hQueue, void *ptr, size_t patternSize, - const void *pPattern, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - ur_result_t Result = UR_RESULT_SUCCESS; - std::unique_ptr EventPtr{nullptr}; - - try { - ScopedContext Active(hQueue->getDevice()); - uint32_t StreamToken; - ur_stream_quard Guard; - hipStream_t HIPStream = hQueue->getNextComputeStream( - numEventsInWaitList, phEventWaitList, Guard, &StreamToken); - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); - if (phEvent) { - EventPtr = - std::unique_ptr(ur_event_handle_t_::makeNative( - UR_COMMAND_USM_FILL, hQueue, HIPStream, StreamToken)); - UR_CHECK_ERROR(EventPtr->start()); - } - - auto N = size / patternSize; - switch (patternSize) { - case 1: - UR_CHECK_ERROR(hipMemsetD8Async(reinterpret_cast(ptr), - *(const uint8_t *)pPattern & 0xFF, N, - HIPStream)); - break; - case 2: - UR_CHECK_ERROR(hipMemsetD16Async(reinterpret_cast(ptr), - *(const uint16_t *)pPattern & 0xFFFF, N, - HIPStream)); - break; - case 4: - UR_CHECK_ERROR(hipMemsetD32Async(reinterpret_cast(ptr), - *(const uint32_t *)pPattern & 0xFFFFFFFF, - N, HIPStream)); - break; - - default: - Result = commonMemSetLargePattern(HIPStream, patternSize, size, pPattern, - reinterpret_cast(ptr)); - break; - } - - if (phEvent) { - UR_CHECK_ERROR(EventPtr->record()); - *phEvent = EventPtr.release(); - } - } catch (ur_result_t Err) { - Result = Err; - } - - return Result; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( - ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc, - size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - ur_result_t Result = UR_RESULT_SUCCESS; - - std::unique_ptr EventPtr{nullptr}; - - try { - ScopedContext Active(hQueue->getDevice()); - hipStream_t HIPStream = hQueue->getNextTransferStream(); - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); - if (phEvent) { - EventPtr = - std::unique_ptr(ur_event_handle_t_::makeNative( - UR_COMMAND_USM_MEMCPY, hQueue, HIPStream)); - UR_CHECK_ERROR(EventPtr->start()); - } - UR_CHECK_ERROR( - hipMemcpyAsync(pDst, pSrc, size, hipMemcpyDefault, HIPStream)); - if (phEvent) { - UR_CHECK_ERROR(EventPtr->record()); - } - if (blocking) { - UR_CHECK_ERROR(hipStreamSynchronize(HIPStream)); - } - if (phEvent) { - *phEvent = EventPtr.release(); - } - } catch (ur_result_t Err) { - Result = Err; - } - return Result; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - void *HIPDevicePtr = const_cast(pMem); - ur_device_handle_t Device = hQueue->getContext()->getDevice(); - - // If the device does not support managed memory access, we can't set - // mem_advise. - if (!getAttribute(Device, hipDeviceAttributeManagedMemory)) { - setErrorMessage("mem_advise ignored as device does not support " - " managed memory access", - UR_RESULT_SUCCESS); - return UR_RESULT_ERROR_ADAPTER_SPECIFIC; - } - - hipPointerAttribute_t attribs; - // TODO: hipPointerGetAttributes will fail if pMem is non-HIP allocated - // memory, as it is neither registered as host memory, nor into the address - // space for the current device, meaning the pMem ptr points to a - // system-allocated memory. This means we may need to check system-alloacted - // memory and handle the failure more gracefully. - UR_CHECK_ERROR(hipPointerGetAttributes(&attribs, pMem)); - // async prefetch requires USM pointer (or hip SVM) to work. - if (!attribs.isManaged) { - setErrorMessage("Prefetch hint ignored as prefetch only works with USM", - UR_RESULT_SUCCESS); - return UR_RESULT_ERROR_ADAPTER_SPECIFIC; - } - - // HIP_POINTER_ATTRIBUTE_RANGE_SIZE is not an attribute in ROCM < 5, - // so we can't perform this check for such cases. -#if HIP_VERSION_MAJOR >= 5 - unsigned int PointerRangeSize = 0; - UR_CHECK_ERROR(hipPointerGetAttribute(&PointerRangeSize, - HIP_POINTER_ATTRIBUTE_RANGE_SIZE, - (hipDeviceptr_t)HIPDevicePtr)); - UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE); -#endif - // flags is currently unused so fail if set - if (flags != 0) - return UR_RESULT_ERROR_INVALID_VALUE; - ur_result_t Result = UR_RESULT_SUCCESS; - std::unique_ptr EventPtr{nullptr}; - - try { - ScopedContext Active(hQueue->getDevice()); - hipStream_t HIPStream = hQueue->getNextTransferStream(); - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); - if (phEvent) { - EventPtr = - std::unique_ptr(ur_event_handle_t_::makeNative( - UR_COMMAND_USM_PREFETCH, hQueue, HIPStream)); - UR_CHECK_ERROR(EventPtr->start()); - } - UR_CHECK_ERROR( - hipMemPrefetchAsync(pMem, size, hQueue->getDevice()->get(), HIPStream)); - if (phEvent) { - UR_CHECK_ERROR(EventPtr->record()); - *phEvent = EventPtr.release(); - } - } catch (ur_result_t Err) { - Result = Err; - } - - return Result; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_advice_flags_t, ur_event_handle_t *phEvent) { - void *HIPDevicePtr = const_cast(pMem); -// HIP_POINTER_ATTRIBUTE_RANGE_SIZE is not an attribute in ROCM < 5, -// so we can't perform this check for such cases. -#if HIP_VERSION_MAJOR >= 5 - unsigned int PointerRangeSize = 0; - UR_CHECK_ERROR(hipPointerGetAttribute(&PointerRangeSize, - HIP_POINTER_ATTRIBUTE_RANGE_SIZE, - (hipDeviceptr_t)HIPDevicePtr)); - UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE); -#endif - // TODO implement a mapping to hipMemAdvise once the expected behaviour - // of urEnqueueUSMAdvise is detailed in the USM extension - return urEnqueueEventsWait(hQueue, 0, nullptr, phEvent); -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( - ur_queue_handle_t, void *, size_t, size_t, const void *, size_t, size_t, - uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -/// 2D Memcpy API -/// -/// \param hQueue is the queue to submit to -/// \param blocking is whether this operation should block the host -/// \param pDst is the location the data will be copied -/// \param dstPitch is the total width of the destination memory including -/// padding -/// \param pSrc is the data to be copied -/// \param srcPitch is the total width of the source memory including padding -/// \param width is width in bytes of each row to be copied -/// \param height is height the columns to be copied -/// \param numEventsInWaitList is the number of events to wait on -/// \param phEventWaitList is an array of events to wait on -/// \param phEvent is the event that represents this operation -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( - ur_queue_handle_t hQueue, bool blocking, void *pDst, size_t dstPitch, - const void *pSrc, size_t srcPitch, size_t width, size_t height, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - ur_result_t Result = UR_RESULT_SUCCESS; - - try { - ScopedContext Active(hQueue->getDevice()); - hipStream_t HIPStream = hQueue->getNextTransferStream(); - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); - - std::unique_ptr RetImplEvent{nullptr}; - if (phEvent) { - RetImplEvent = - std::unique_ptr(ur_event_handle_t_::makeNative( - UR_COMMAND_USM_MEMCPY_2D, hQueue, HIPStream)); - UR_CHECK_ERROR(RetImplEvent->start()); - } - - UR_CHECK_ERROR(hipMemcpy2DAsync(pDst, dstPitch, pSrc, srcPitch, width, - height, hipMemcpyDefault, HIPStream)); - - if (phEvent) { - UR_CHECK_ERROR(RetImplEvent->record()); - *phEvent = RetImplEvent.release(); - } - if (blocking) { - UR_CHECK_ERROR(hipStreamSynchronize(HIPStream)); - } - } catch (ur_result_t Err) { - Result = Err; - } - - return Result; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( - ur_queue_handle_t, ur_program_handle_t, const char *, bool, size_t, size_t, - const void *, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( - ur_queue_handle_t, ur_program_handle_t, const char *, bool, size_t, size_t, - void *, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( - ur_queue_handle_t, ur_program_handle_t, const char *, bool, void *, size_t, - uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( - ur_queue_handle_t, ur_program_handle_t, const char *, bool, void *, size_t, - uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/event.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/event.cpp deleted file mode 100644 index 5963d91c48735..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/event.cpp +++ /dev/null @@ -1,332 +0,0 @@ -//===--------- event.cpp - HIP Adapter ------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "event.hpp" -#include "common.hpp" -#include "context.hpp" -#include "platform.hpp" - -ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type, - ur_context_handle_t Context, - ur_queue_handle_t Queue, - hipStream_t Stream, uint32_t StreamToken) - : CommandType{Type}, RefCount{1}, HasOwnership{true}, - HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false}, - StreamToken{StreamToken}, EvEnd{nullptr}, EvStart{nullptr}, - EvQueued{nullptr}, Queue{Queue}, Stream{Stream}, Context{Context} { - - bool ProfilingEnabled = Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE; - - UR_CHECK_ERROR(hipEventCreateWithFlags( - &EvEnd, ProfilingEnabled ? hipEventDefault : hipEventDisableTiming)); - - if (ProfilingEnabled) { - UR_CHECK_ERROR(hipEventCreateWithFlags(&EvQueued, hipEventDefault)); - UR_CHECK_ERROR(hipEventCreateWithFlags(&EvStart, hipEventDefault)); - } - - if (Queue != nullptr) { - urQueueRetain(Queue); - } - urContextRetain(Context); -} - -ur_event_handle_t_::ur_event_handle_t_(ur_context_handle_t Context, - hipEvent_t EventNative) - : CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, HasOwnership{false}, - HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false}, - StreamToken{std::numeric_limits::max()}, EvEnd{EventNative}, - EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr}, Context{Context} { - urContextRetain(Context); -} - -ur_event_handle_t_::~ur_event_handle_t_() { - if (Queue != nullptr) { - urQueueRelease(Queue); - } - urContextRelease(Context); -} - -ur_result_t ur_event_handle_t_::start() { - assert(!isStarted()); - ur_result_t Result = UR_RESULT_SUCCESS; - - try { - if (Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE) { - // NOTE: This relies on the default stream to be unused. - UR_CHECK_ERROR(hipEventRecord(EvQueued, 0)); - UR_CHECK_ERROR(hipEventRecord(EvStart, Queue->get())); - } - } catch (ur_result_t Error) { - Result = Error; - } - - IsStarted = true; - return Result; -} - -bool ur_event_handle_t_::isCompleted() const noexcept { - if (!IsRecorded) { - return false; - } - if (!HasBeenWaitedOn) { - const hipError_t Result = hipEventQuery(EvEnd); - if (Result != hipSuccess && Result != hipErrorNotReady) { - UR_CHECK_ERROR(Result); - return false; - } - if (Result == hipErrorNotReady) { - return false; - } - } - return true; -} - -uint64_t ur_event_handle_t_::getQueuedTime() const { - float MilliSeconds = 0.0f; - assert(isStarted()); - - // hipEventSynchronize waits till the event is ready for call to - // hipEventElapsedTime. - UR_CHECK_ERROR(hipEventSynchronize(EvStart)); - UR_CHECK_ERROR(hipEventSynchronize(EvEnd)); - - UR_CHECK_ERROR(hipEventElapsedTime(&MilliSeconds, EvStart, EvEnd)); - return static_cast(MilliSeconds * 1.0e6); -} - -uint64_t ur_event_handle_t_::getStartTime() const { - float MiliSeconds = 0.0f; - assert(isStarted()); - - // hipEventSynchronize waits till the event is ready for call to - // hipEventElapsedTime. - UR_CHECK_ERROR(hipEventSynchronize(ur_platform_handle_t_::EvBase)); - UR_CHECK_ERROR(hipEventSynchronize(EvStart)); - - UR_CHECK_ERROR(hipEventElapsedTime(&MiliSeconds, - ur_platform_handle_t_::EvBase, EvStart)); - return static_cast(MiliSeconds * 1.0e6); -} - -uint64_t ur_event_handle_t_::getEndTime() const { - float MiliSeconds = 0.0f; - assert(isStarted() && isRecorded()); - - // hipEventSynchronize waits till the event is ready for call to - // hipEventElapsedTime. - UR_CHECK_ERROR(hipEventSynchronize(ur_platform_handle_t_::EvBase)); - UR_CHECK_ERROR(hipEventSynchronize(EvEnd)); - - UR_CHECK_ERROR( - hipEventElapsedTime(&MiliSeconds, ur_platform_handle_t_::EvBase, EvEnd)); - return static_cast(MiliSeconds * 1.0e6); -} - -ur_result_t ur_event_handle_t_::record() { - - if (isRecorded() || !isStarted()) { - return UR_RESULT_ERROR_INVALID_EVENT; - } - - ur_result_t Result = UR_RESULT_ERROR_INVALID_OPERATION; - - UR_ASSERT(Queue, UR_RESULT_ERROR_INVALID_QUEUE); - - try { - EventId = Queue->getNextEventId(); - if (EventId == 0) { - detail::ur::die( - "Unrecoverable program state reached in event identifier overflow"); - } - UR_CHECK_ERROR(hipEventRecord(EvEnd, Stream)); - Result = UR_RESULT_SUCCESS; - } catch (ur_result_t Error) { - Result = Error; - } - - if (Result == UR_RESULT_SUCCESS) { - IsRecorded = true; - } - - return Result; -} - -ur_result_t ur_event_handle_t_::wait() { - ur_result_t Result = UR_RESULT_SUCCESS; - try { - UR_CHECK_ERROR(hipEventSynchronize(EvEnd)); - HasBeenWaitedOn = true; - } catch (ur_result_t Error) { - Result = Error; - } - - return Result; -} - -ur_result_t ur_event_handle_t_::release() { - if (!backendHasOwnership()) - return UR_RESULT_SUCCESS; - - assert(Queue != nullptr); - UR_CHECK_ERROR(hipEventDestroy(EvEnd)); - - if (Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE) { - UR_CHECK_ERROR(hipEventDestroy(EvQueued)); - UR_CHECK_ERROR(hipEventDestroy(EvStart)); - } - - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) { - UR_ASSERT(numEvents > 0, UR_RESULT_ERROR_INVALID_VALUE); - - try { - - auto Context = phEventWaitList[0]->getContext(); - ScopedContext Active(Context->getDevice()); - - auto WaitFunc = [Context](ur_event_handle_t Event) -> ur_result_t { - UR_ASSERT(Event, UR_RESULT_ERROR_INVALID_EVENT); - UR_ASSERT(Event->getContext() == Context, - UR_RESULT_ERROR_INVALID_CONTEXT); - - return Event->wait(); - }; - return forLatestEvents(phEventWaitList, numEvents, WaitFunc); - } catch (ur_result_t Err) { - return Err; - } catch (...) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } -} - -UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, - ur_event_info_t propName, - size_t propValueSize, - void *pPropValue, - size_t *pPropValueSizeRet) { - UR_ASSERT(!(pPropValue && propValueSize == 0), UR_RESULT_ERROR_INVALID_SIZE); - - UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet); - switch (propName) { - case UR_EVENT_INFO_COMMAND_QUEUE: - return ReturnValue(hEvent->getQueue()); - case UR_EVENT_INFO_COMMAND_TYPE: - return ReturnValue(hEvent->getCommandType()); - case UR_EVENT_INFO_REFERENCE_COUNT: - return ReturnValue(hEvent->getReferenceCount()); - case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS: - return ReturnValue(hEvent->getExecutionStatus()); - case UR_EVENT_INFO_CONTEXT: - return ReturnValue(hEvent->getContext()); - default: - break; - } - - return UR_RESULT_ERROR_INVALID_ENUMERATION; -} - -/// Obtain profiling information from UR HIP events -/// Timings from HIP are only elapsed time. -UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( - ur_event_handle_t hEvent, ur_profiling_info_t propName, - size_t propValueSize, void *pPropValue, size_t *pPropValueSizeRet) { - - UR_ASSERT(!(pPropValue && propValueSize == 0), UR_RESULT_ERROR_INVALID_VALUE); - - ur_queue_handle_t Queue = hEvent->getQueue(); - if (Queue == nullptr || !(Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE)) { - return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; - } - - UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet); - switch (propName) { - case UR_PROFILING_INFO_COMMAND_QUEUED: - case UR_PROFILING_INFO_COMMAND_SUBMIT: - // Note: No user for this case - return ReturnValue(static_cast(hEvent->getQueuedTime())); - case UR_PROFILING_INFO_COMMAND_START: - return ReturnValue(static_cast(hEvent->getStartTime())); - case UR_PROFILING_INFO_COMMAND_END: - return ReturnValue(static_cast(hEvent->getEndTime())); - default: - break; - } - return {}; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback(ur_event_handle_t, - ur_execution_info_t, - ur_event_callback_t, - void *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { - const auto RefCount = hEvent->incrementReferenceCount(); - - detail::ur::assertion(RefCount != 0, - "Reference count overflow detected in urEventRetain."); - - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { - // double delete or someone is messing with the ref count. - // either way, cannot safely proceed. - detail::ur::assertion(hEvent->getReferenceCount() != 0, - "Reference count overflow detected in urEventRelease."); - - // decrement ref count. If it is 0, delete the event. - if (hEvent->decrementReferenceCount() == 0) { - std::unique_ptr event_ptr{hEvent}; - ur_result_t Result = UR_RESULT_ERROR_INVALID_EVENT; - try { - ScopedContext Active(hEvent->getContext()->getDevice()); - Result = hEvent->release(); - } catch (...) { - Result = UR_RESULT_ERROR_OUT_OF_RESOURCES; - } - return Result; - } - - return UR_RESULT_SUCCESS; -} - -/// Gets the native HIP handle of a UR event object -/// -/// \param[in] hEvent The UR event to get the native HIP object of. -/// \param[out] phNativeEvent Set to the native handle of the UR event object. -/// -/// \return UR_RESULT_SUCCESS on success. UR_RESULT_ERROR_INVALID_EVENT if given -/// a user event. -UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( - ur_event_handle_t hEvent, ur_native_handle_t *phNativeEvent) { - *phNativeEvent = reinterpret_cast(hEvent->get()); - return UR_RESULT_SUCCESS; -} - -/// Created a UR event object from a HIP event handle. -/// NOTE: The created UR object doesn't take ownership of the native handle. -/// -/// \param[in] hNativeEvent The native handle to create UR event object from. -/// \param[out] phEvent Set to the UR event object created from native handle. -UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( - ur_native_handle_t hNativeEvent, ur_context_handle_t hContext, - const ur_event_native_properties_t *pProperties, - ur_event_handle_t *phEvent) { - std::ignore = pProperties; - - *phEvent = ur_event_handle_t_::makeWithNative( - hContext, reinterpret_cast(hNativeEvent)); - - return UR_RESULT_SUCCESS; -} diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/event.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/event.hpp deleted file mode 100644 index 1e418519a8fde..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/event.hpp +++ /dev/null @@ -1,185 +0,0 @@ -//===--------- event.hpp - HIP Adapter ------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -#include "common.hpp" -#include "queue.hpp" - -/// UR Event mapping to hipEvent_t -/// -struct ur_event_handle_t_ { -public: - using native_type = hipEvent_t; - - ur_result_t record(); - - ur_result_t wait(); - - ur_result_t start(); - - native_type get() const noexcept { return EvEnd; }; - - ur_queue_handle_t getQueue() const noexcept { return Queue; } - - hipStream_t getStream() const noexcept { return Stream; } - - uint32_t getComputeStreamToken() const noexcept { return StreamToken; } - - ur_command_t getCommandType() const noexcept { return CommandType; } - - uint32_t getReferenceCount() const noexcept { return RefCount; } - - bool isRecorded() const noexcept { return IsRecorded; } - - bool isStarted() const noexcept { return IsStarted; } - - bool isCompleted() const noexcept; - - uint32_t getExecutionStatus() const noexcept { - - if (!isRecorded()) { - return UR_EVENT_STATUS_SUBMITTED; - } - - if (!isCompleted()) { - return UR_EVENT_STATUS_RUNNING; - } - return UR_EVENT_STATUS_COMPLETE; - } - - ur_context_handle_t getContext() const noexcept { return Context; }; - - uint32_t incrementReferenceCount() { return ++RefCount; } - - uint32_t decrementReferenceCount() { return --RefCount; } - - uint32_t getEventId() const noexcept { return EventId; } - - bool backendHasOwnership() const noexcept { return HasOwnership; } - - // Returns the counter time when the associated command(s) were enqueued - uint64_t getQueuedTime() const; - - // Returns the counter time when the associated command(s) started execution - uint64_t getStartTime() const; - - // Returns the counter time when the associated command(s) completed - uint64_t getEndTime() const; - - // construct a native HIP. This maps closely to the underlying HIP event. - static ur_event_handle_t - makeNative(ur_command_t Type, ur_queue_handle_t Queue, hipStream_t Stream, - uint32_t StreamToken = std::numeric_limits::max()) { - return new ur_event_handle_t_(Type, Queue->getContext(), Queue, Stream, - StreamToken); - } - - static ur_event_handle_t makeWithNative(ur_context_handle_t context, - hipEvent_t eventNative) { - return new ur_event_handle_t_(context, eventNative); - } - - ur_result_t release(); - - ~ur_event_handle_t_(); - -private: - // This constructor is private to force programmers to use the makeNative / - // make_user static members in order to create a ur_event_handle_t for HIP. - ur_event_handle_t_(ur_command_t Type, ur_context_handle_t Context, - ur_queue_handle_t Queue, hipStream_t Stream, - uint32_t StreamToken); - - // This constructor is private to force programmers to use the - // makeWithNative for event interop - ur_event_handle_t_(ur_context_handle_t Context, hipEvent_t EventNative); - - ur_command_t CommandType; // The type of command associated with event. - - std::atomic_uint32_t RefCount; // Event reference count. - - bool HasOwnership; // Signifies if event owns the native type. - - bool HasBeenWaitedOn; // Signifies whether the event has been waited - // on through a call to wait(), which implies - // that it has completed. - - bool IsRecorded; // Signifies wether a native HIP event has been recorded - // yet. - bool IsStarted; // Signifies wether the operation associated with the - // UR event has started or not - // - - uint32_t StreamToken; - uint32_t EventId; // Queue identifier of the event. - - native_type EvEnd; // HIP event handle. If this ur_event_handle_t_ - // represents a user event, this will be nullptr. - - native_type EvStart; // HIP event handle associated with the start - - native_type EvQueued; // HIP event handle associated with the time - // the command was enqueued - - ur_queue_handle_t Queue; // ur_queue_handle_t associated with the event. If - // this is a user event, this will be nullptr. - - hipStream_t Stream; // hipStream_t associated with the event. If this is a - // user event, this will be uninitialized. - - ur_context_handle_t Context; // ur_context_handle_t associated with the event. - // If this is a native event, this will be the - // same context associated with the Queue member. -}; - -// Iterate over `EventWaitList` and apply the given callback `F` to the -// latest event on each queue therein. The callback must take a single -// ur_event_handle_t argument and return a ur_result_t. If the callback returns -// an error, the iteration terminates and the error is returned. -template -ur_result_t forLatestEvents(const ur_event_handle_t *EventWaitList, - size_t NumEventsInWaitList, Func &&F) { - - if (EventWaitList == nullptr || NumEventsInWaitList == 0) { - return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; - } - - // Fast path if we only have a single event - if (NumEventsInWaitList == 1) { - return F(EventWaitList[0]); - } - - std::vector Events{EventWaitList, - EventWaitList + NumEventsInWaitList}; - std::sort(Events.begin(), Events.end(), - [](ur_event_handle_t E0, ur_event_handle_t E1) { - // Tiered sort creating sublists of streams (smallest value first) - // in which the corresponding events are sorted into a sequence of - // newest first. - return E0->getStream() < E1->getStream() || - (E0->getStream() == E1->getStream() && - E0->getEventId() > E1->getEventId()); - }); - - hipStream_t LastSeenStream = 0; - for (size_t i = 0; i < Events.size(); i++) { - auto Event = Events[i]; - if (!Event || (i != 0 && Event->getStream() == LastSeenStream)) { - continue; - } - - LastSeenStream = Event->getStream(); - - auto Result = F(Event); - if (Result != UR_RESULT_SUCCESS) { - return Result; - } - } - - return UR_RESULT_SUCCESS; -} diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/image.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/image.cpp deleted file mode 100644 index 743a430999822..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/image.cpp +++ /dev/null @@ -1,173 +0,0 @@ -//===--------- image.cpp - CUDA Adapter -----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "ur/ur.hpp" - -UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp( - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, - [[maybe_unused]] const ur_usm_desc_t *pUSMDesc, - [[maybe_unused]] ur_usm_pool_handle_t pool, - [[maybe_unused]] size_t widthInBytes, [[maybe_unused]] size_t height, - [[maybe_unused]] size_t elementSizeBytes, [[maybe_unused]] void **ppMem, - [[maybe_unused]] size_t *pResultPitch) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urBindlessImagesUnsampledImageHandleDestroyExp( - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, - [[maybe_unused]] ur_exp_image_handle_t hImage) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urBindlessImagesSampledImageHandleDestroyExp( - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, - [[maybe_unused]] ur_exp_image_handle_t hImage) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, - [[maybe_unused]] const ur_image_format_t *pImageFormat, - [[maybe_unused]] const ur_image_desc_t *pImageDesc, - [[maybe_unused]] ur_exp_image_mem_handle_t *phImageMem) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageFreeExp( - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, - [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, - [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem, - [[maybe_unused]] const ur_image_format_t *pImageFormat, - [[maybe_unused]] const ur_image_desc_t *pImageDesc, - [[maybe_unused]] ur_mem_handle_t *phMem, - [[maybe_unused]] ur_exp_image_handle_t *phImage) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, - [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem, - [[maybe_unused]] const ur_image_format_t *pImageFormat, - [[maybe_unused]] const ur_image_desc_t *pImageDesc, - [[maybe_unused]] ur_sampler_handle_t hSampler, - [[maybe_unused]] ur_mem_handle_t *phMem, - [[maybe_unused]] ur_exp_image_handle_t *phImage) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( - [[maybe_unused]] ur_queue_handle_t hQueue, [[maybe_unused]] void *pDst, - [[maybe_unused]] void *pSrc, - [[maybe_unused]] const ur_image_format_t *pImageFormat, - [[maybe_unused]] const ur_image_desc_t *pImageDesc, - [[maybe_unused]] ur_exp_image_copy_flags_t imageCopyFlags, - [[maybe_unused]] ur_rect_offset_t srcOffset, - [[maybe_unused]] ur_rect_offset_t dstOffset, - [[maybe_unused]] ur_rect_region_t copyExtent, - [[maybe_unused]] ur_rect_region_t hostExtent, - [[maybe_unused]] uint32_t numEventsInWaitList, - [[maybe_unused]] const ur_event_handle_t *phEventWaitList, - [[maybe_unused]] ur_event_handle_t *phEvent) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( - [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem, - [[maybe_unused]] ur_image_info_t propName, - [[maybe_unused]] void *pPropValue, [[maybe_unused]] size_t *pPropSizeRet) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, - [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem, - [[maybe_unused]] uint32_t mipmapLevel, - [[maybe_unused]] ur_exp_image_mem_handle_t *phImageMem) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urBindlessImagesMipmapFreeExp([[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, - [[maybe_unused]] ur_exp_image_mem_handle_t hMem) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportOpaqueFDExp( - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, [[maybe_unused]] size_t size, - [[maybe_unused]] ur_exp_interop_mem_desc_t *pInteropMemDesc, - [[maybe_unused]] ur_exp_interop_mem_handle_t *phInteropMem) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, - [[maybe_unused]] const ur_image_format_t *pImageFormat, - [[maybe_unused]] const ur_image_desc_t *pImageDesc, - [[maybe_unused]] ur_exp_interop_mem_handle_t hInteropMem, - [[maybe_unused]] ur_exp_image_mem_handle_t *phImageMem) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseInteropExp( - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, - [[maybe_unused]] ur_exp_interop_mem_handle_t hInteropMem) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urBindlessImagesImportExternalSemaphoreOpaqueFDExp( - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, - [[maybe_unused]] ur_exp_interop_semaphore_desc_t *pInteropSemaphoreDesc, - [[maybe_unused]] ur_exp_interop_semaphore_handle_t *phInteropSemaphore) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesDestroyExternalSemaphoreExp( - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, - [[maybe_unused]] ur_exp_interop_semaphore_handle_t hInteropSemaphore) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( - [[maybe_unused]] ur_queue_handle_t hQueue, - [[maybe_unused]] ur_exp_interop_semaphore_handle_t hSemaphore, - [[maybe_unused]] uint32_t numEventsInWaitList, - [[maybe_unused]] const ur_event_handle_t *phEventWaitList, - [[maybe_unused]] ur_event_handle_t *phEvent) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( - [[maybe_unused]] ur_queue_handle_t hQueue, - [[maybe_unused]] ur_exp_interop_semaphore_handle_t hSemaphore, - [[maybe_unused]] uint32_t numEventsInWaitList, - [[maybe_unused]] const ur_event_handle_t *phEventWaitList, - [[maybe_unused]] ur_event_handle_t *phEvent) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/kernel.cpp deleted file mode 100644 index 2aaa25c936984..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/kernel.cpp +++ /dev/null @@ -1,322 +0,0 @@ -//===--------- kernel.cpp - HIP Adapter -----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "kernel.hpp" -#include "memory.hpp" -#include "sampler.hpp" - -UR_APIEXPORT ur_result_t UR_APICALL -urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, - ur_kernel_handle_t *phKernel) { - ur_result_t Result = UR_RESULT_SUCCESS; - std::unique_ptr RetKernel{nullptr}; - - try { - ScopedContext Active(hProgram->getContext()->getDevice()); - - hipFunction_t HIPFunc; - UR_CHECK_ERROR( - hipModuleGetFunction(&HIPFunc, hProgram->get(), pKernelName)); - - std::string KernelNameWoffset = std::string(pKernelName) + "_with_offset"; - hipFunction_t HIPFuncWithOffsetParam; - hipError_t OffsetRes = hipModuleGetFunction( - &HIPFuncWithOffsetParam, hProgram->get(), KernelNameWoffset.c_str()); - - // If there is no kernel with global offset parameter we mark it as missing - if (OffsetRes == hipErrorNotFound) { - HIPFuncWithOffsetParam = nullptr; - } else { - UR_CHECK_ERROR(OffsetRes); - } - RetKernel = std::unique_ptr( - new ur_kernel_handle_t_{HIPFunc, HIPFuncWithOffsetParam, pKernelName, - hProgram, hProgram->getContext()}); - } catch (ur_result_t Err) { - Result = Err; - } catch (...) { - Result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - *phKernel = RetKernel.release(); - return Result; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, - ur_kernel_group_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - - switch (propName) { - case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: { - size_t GlobalWorkSize[3] = {0, 0, 0}; - - int MaxBlockDimX{0}, MaxBlockDimY{0}, MaxBlockDimZ{0}; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &MaxBlockDimX, hipDeviceAttributeMaxBlockDimX, hDevice->get())); - UR_CHECK_ERROR(hipDeviceGetAttribute( - &MaxBlockDimY, hipDeviceAttributeMaxBlockDimY, hDevice->get())); - UR_CHECK_ERROR(hipDeviceGetAttribute( - &MaxBlockDimZ, hipDeviceAttributeMaxBlockDimZ, hDevice->get())); - - int max_grid_dimX{0}, max_grid_dimY{0}, max_grid_dimZ{0}; - UR_CHECK_ERROR(hipDeviceGetAttribute( - &max_grid_dimX, hipDeviceAttributeMaxGridDimX, hDevice->get())); - UR_CHECK_ERROR(hipDeviceGetAttribute( - &max_grid_dimY, hipDeviceAttributeMaxGridDimY, hDevice->get())); - UR_CHECK_ERROR(hipDeviceGetAttribute( - &max_grid_dimZ, hipDeviceAttributeMaxGridDimZ, hDevice->get())); - - GlobalWorkSize[0] = MaxBlockDimX * max_grid_dimX; - GlobalWorkSize[1] = MaxBlockDimY * max_grid_dimY; - GlobalWorkSize[2] = MaxBlockDimZ * max_grid_dimZ; - return ReturnValue(GlobalWorkSize, 3); - } - case UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: { - int MaxThreads = 0; - UR_CHECK_ERROR(hipFuncGetAttribute( - &MaxThreads, HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, hKernel->get())); - return ReturnValue(size_t(MaxThreads)); - } - case UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: { - size_t group_size[3] = {0, 0, 0}; - // Returns the work-group size specified in the kernel source or IL. - // If the work-group size is not specified in the kernel source or IL, - // (0, 0, 0) is returned. - // https://www.khronos.org/registry/OpenCL/sdk/2.1/docs/man/xhtml/clGetKernelWorkGroupInfo.html - - // TODO: can we extract the work group size from the PTX? - return ReturnValue(group_size, 3); - } - case UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: { - // OpenCL LOCAL == HIP SHARED - int Bytes = 0; - UR_CHECK_ERROR(hipFuncGetAttribute( - &Bytes, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, hKernel->get())); - return ReturnValue(uint64_t(Bytes)); - } - case UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: { - // Work groups should be multiples of the warp size - int WarpSize = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute(&WarpSize, hipDeviceAttributeWarpSize, - hDevice->get())); - return ReturnValue(static_cast(WarpSize)); - } - case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: { - // OpenCL PRIVATE == HIP LOCAL - int Bytes = 0; - UR_CHECK_ERROR(hipFuncGetAttribute( - &Bytes, HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, hKernel->get())); - return ReturnValue(uint64_t(Bytes)); - } - default: - break; - } - - return UR_RESULT_ERROR_INVALID_ENUMERATION; -} - -UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(ur_kernel_handle_t hKernel) { - UR_ASSERT(hKernel->getReferenceCount() > 0u, UR_RESULT_ERROR_INVALID_KERNEL); - - hKernel->incrementReferenceCount(); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urKernelRelease(ur_kernel_handle_t hKernel) { - // double delete or someone is messing with the ref count. - // either way, cannot safely proceed. - UR_ASSERT(hKernel->getReferenceCount() != 0, UR_RESULT_ERROR_INVALID_KERNEL); - - // decrement ref count. If it is 0, delete the program. - if (hKernel->decrementReferenceCount() == 0) { - // no internal cuda resources to clean up. Just delete it. - delete hKernel; - return UR_RESULT_SUCCESS; - } - - return UR_RESULT_SUCCESS; -} - -// TODO(ur): Not implemented on hip atm. Also, need to add tests for this -// feature. -UR_APIEXPORT ur_result_t UR_APICALL -urKernelGetNativeHandle(ur_kernel_handle_t, ur_native_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( - ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, - const ur_kernel_arg_value_properties_t *, const void *pArgValue) { - ur_result_t Result = UR_RESULT_SUCCESS; - try { - hKernel->setKernelArg(argIndex, argSize, pArgValue); - } catch (ur_result_t Err) { - Result = Err; - } - return Result; -} - -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal( - ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, - const ur_kernel_arg_local_properties_t *pProperties) { - std::ignore = pProperties; - UR_ASSERT(argSize, UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE); - - ur_result_t Result = UR_RESULT_SUCCESS; - try { - hKernel->setKernelLocalArg(argIndex, argSize); - } catch (ur_result_t Err) { - Result = Err; - } - return Result; -} - -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, - ur_kernel_info_t propName, - size_t propSize, - void *pKernelInfo, - size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pKernelInfo, pPropSizeRet); - - switch (propName) { - case UR_KERNEL_INFO_FUNCTION_NAME: - return ReturnValue(hKernel->getName()); - case UR_KERNEL_INFO_NUM_ARGS: - return ReturnValue(hKernel->getNumArgs()); - case UR_KERNEL_INFO_REFERENCE_COUNT: - return ReturnValue(hKernel->getReferenceCount()); - case UR_KERNEL_INFO_CONTEXT: - return ReturnValue(hKernel->getContext()); - case UR_KERNEL_INFO_PROGRAM: - return ReturnValue(hKernel->getProgram()); - case UR_KERNEL_INFO_ATTRIBUTES: - return ReturnValue(""); - default: - break; - } - - return UR_RESULT_ERROR_INVALID_ENUMERATION; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, - ur_kernel_sub_group_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - switch (propName) { - case UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE: { - // Sub-group size is equivalent to warp size - int WarpSize = 0; - UR_CHECK_ERROR(hipDeviceGetAttribute(&WarpSize, hipDeviceAttributeWarpSize, - hDevice->get())); - return ReturnValue(static_cast(WarpSize)); - } - case UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS: { - // Number of sub-groups = max block size / warp size + possible remainder - int MaxThreads = 0; - UR_CHECK_ERROR(hipFuncGetAttribute( - &MaxThreads, HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, hKernel->get())); - int WarpSize = 0; - urKernelGetSubGroupInfo(hKernel, hDevice, - UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE, - sizeof(uint32_t), &WarpSize, nullptr); - int MaxWarps = (MaxThreads + WarpSize - 1) / WarpSize; - return ReturnValue(static_cast(MaxWarps)); - } - case UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS: { - // Return value of 0 => not specified - // TODO: Revisit if PTX is generated for compile-time work-group sizes - return ReturnValue(0); - } - case UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL: { - // Return value of 0 => unspecified or "auto" sub-group size - // Correct for now, since warp size may be read from special register - // TODO: Return warp size once default is primary sub-group size - // TODO: Revisit if we can recover [[sub_group_size]] attribute from PTX - return ReturnValue(0); - } - default: - break; - } - - return UR_RESULT_ERROR_INVALID_ENUMERATION; -} - -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( - ur_kernel_handle_t hKernel, uint32_t argIndex, - const ur_kernel_arg_pointer_properties_t *, const void *pArgValue) { - hKernel->setKernelPtrArg(argIndex, sizeof(pArgValue), pArgValue); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( - ur_kernel_handle_t hKernel, uint32_t argIndex, - const ur_kernel_arg_mem_obj_properties_t *, ur_mem_handle_t hArgValue) { - // Below sets kernel arg when zero-sized buffers are handled. - // In such case the corresponding memory is null. - if (hArgValue == nullptr) { - hKernel->setKernelArg(argIndex, 0, nullptr); - return UR_RESULT_SUCCESS; - } - - ur_result_t Result = UR_RESULT_SUCCESS; - try { - if (hArgValue->MemType == ur_mem_handle_t_::Type::Surface) { - auto array = hArgValue->Mem.SurfaceMem.getArray(); - hipArray_Format Format; - size_t NumChannels; - getArrayDesc(array, Format, NumChannels); - if (Format != HIP_AD_FORMAT_UNSIGNED_INT32 && - Format != HIP_AD_FORMAT_SIGNED_INT32 && - Format != HIP_AD_FORMAT_HALF && Format != HIP_AD_FORMAT_FLOAT) { - detail::ur::die( - "UR HIP kernels only support images with channel types int32, " - "uint32, float, and half."); - } - hipSurfaceObject_t hipSurf = hArgValue->Mem.SurfaceMem.getSurface(); - hKernel->setKernelArg(argIndex, sizeof(hipSurf), (void *)&hipSurf); - } else - - { - void *HIPPtr = hArgValue->Mem.BufferMem.getVoid(); - hKernel->setKernelArg(argIndex, sizeof(void *), (void *)&HIPPtr); - } - } catch (ur_result_t Err) { - Result = Err; - } - return Result; -} - -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( - ur_kernel_handle_t hKernel, uint32_t argIndex, - const ur_kernel_arg_sampler_properties_t *, ur_sampler_handle_t hArgValue) { - ur_result_t Result = UR_RESULT_SUCCESS; - try { - uint32_t SamplerProps = hArgValue->Props; - hKernel->setKernelArg(argIndex, sizeof(uint32_t), (void *)&SamplerProps); - } catch (ur_result_t Err) { - Result = Err; - } - return Result; -} - -// A NOP for the HIP backend -UR_APIEXPORT ur_result_t UR_APICALL -urKernelSetExecInfo(ur_kernel_handle_t, ur_kernel_exec_info_t, size_t, - const ur_kernel_exec_info_properties_t *, const void *) { - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( - ur_native_handle_t, ur_context_handle_t, ur_program_handle_t, - const ur_kernel_native_properties_t *, ur_kernel_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/kernel.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/kernel.hpp deleted file mode 100644 index 6ac76e6b0df5c..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/kernel.hpp +++ /dev/null @@ -1,208 +0,0 @@ -//===--------- kernel.hpp - HIP Adapter -----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -#include - -#include -#include -#include -#include - -#include "program.hpp" - -/// Implementation of a UR Kernel for HIP -/// -/// UR Kernels are used to set kernel arguments, -/// creating a state on the Kernel object for a given -/// invocation. This is not the case of HIPFunction objects, -/// which are simply passed together with the arguments on the invocation. -/// The UR Kernel implementation for HIP stores the list of arguments, -/// argument sizes, and offsets to emulate the interface of UR Kernel, -/// saving the arguments for the later dispatch. -/// Note that in UR API, the Local memory is specified as a size per -/// individual argument, but in HIP only the total usage of shared -/// memory is required since it is not passed as a parameter. -/// A compiler pass converts the UR API local memory model into the -/// HIP shared model. This object simply calculates the total of -/// shared memory, and the initial offsets of each parameter. -struct ur_kernel_handle_t_ { - using native_type = hipFunction_t; - - native_type Function; - native_type FunctionWithOffsetParam; - std::string Name; - ur_context_handle_t Context; - ur_program_handle_t Program; - std::atomic_uint32_t RefCount; - - /// Structure that holds the arguments to the kernel. - /// Note earch argument size is known, since it comes - /// from the kernel signature. - /// This is not something can be queried from the HIP API - /// so there is a hard-coded size (\ref MAX_PARAM_BYTES) - /// and a storage. - struct arguments { - static constexpr size_t MAX_PARAM_BYTES = 4000u; - using args_t = std::array; - using args_size_t = std::vector; - using args_index_t = std::vector; - args_t Storage; - args_size_t ParamSizes; - args_index_t Indices; - args_size_t OffsetPerIndex; - std::set PtrArgs; - - std::uint32_t ImplicitOffsetArgs[3] = {0, 0, 0}; - - arguments() { - // Place the implicit offset index at the end of the indicies collection - Indices.emplace_back(&ImplicitOffsetArgs); - } - - /// Add an argument to the kernel. - /// If the argument existed before, it is replaced. - /// Otherwise, it is added. - /// Gaps are filled with empty arguments. - /// Implicit offset argument is kept at the back of the indices collection. - void addArg(size_t Index, size_t Size, const void *Arg, - size_t LocalSize = 0) { - if (Index + 2 > Indices.size()) { - // Move implicit offset argument Index with the end - Indices.resize(Index + 2, Indices.back()); - // Ensure enough space for the new argument - ParamSizes.resize(Index + 1); - OffsetPerIndex.resize(Index + 1); - } - ParamSizes[Index] = Size; - // calculate the insertion point on the array - size_t InsertPos = std::accumulate(std::begin(ParamSizes), - std::begin(ParamSizes) + Index, 0); - // Update the stored value for the argument - std::memcpy(&Storage[InsertPos], Arg, Size); - Indices[Index] = &Storage[InsertPos]; - OffsetPerIndex[Index] = LocalSize; - } - - void addLocalArg(size_t Index, size_t Size) { - size_t LocalOffset = this->getLocalSize(); - - // maximum required alignment is the size of the largest vector type - const size_t MaxAlignment = sizeof(double) * 16; - - // for arguments smaller than the maximum alignment simply align to the - // size of the argument - const size_t Alignment = std::min(MaxAlignment, Size); - - // align the argument - size_t AlignedLocalOffset = LocalOffset; - size_t Pad = LocalOffset % Alignment; - if (Pad != 0) { - AlignedLocalOffset += Alignment - Pad; - } - - addArg(Index, sizeof(size_t), (const void *)&AlignedLocalOffset, - Size + AlignedLocalOffset - LocalOffset); - } - - void setImplicitOffset(size_t Size, std::uint32_t *ImplicitOffset) { - assert(Size == sizeof(std::uint32_t) * 3); - std::memcpy(ImplicitOffsetArgs, ImplicitOffset, Size); - } - - void clearLocalSize() { - std::fill(std::begin(OffsetPerIndex), std::end(OffsetPerIndex), 0); - } - - const args_index_t &getIndices() const noexcept { return Indices; } - - uint32_t getLocalSize() const { - return std::accumulate(std::begin(OffsetPerIndex), - std::end(OffsetPerIndex), 0); - } - } Args; - - ur_kernel_handle_t_(hipFunction_t Func, hipFunction_t FuncWithOffsetParam, - const char *Name, ur_program_handle_t Program, - ur_context_handle_t Ctxt) - : Function{Func}, FunctionWithOffsetParam{FuncWithOffsetParam}, - Name{Name}, Context{Ctxt}, Program{Program}, RefCount{1} { - urProgramRetain(Program); - urContextRetain(Context); - } - - ur_kernel_handle_t_(hipFunction_t Func, const char *Name, - ur_program_handle_t Program, ur_context_handle_t Ctxt) - : ur_kernel_handle_t_{Func, nullptr, Name, Program, Ctxt} {} - - ~ur_kernel_handle_t_() { - urProgramRelease(Program); - urContextRelease(Context); - } - - ur_program_handle_t getProgram() const noexcept { return Program; } - - uint32_t incrementReferenceCount() noexcept { return ++RefCount; } - - uint32_t decrementReferenceCount() noexcept { return --RefCount; } - - uint32_t getReferenceCount() const noexcept { return RefCount; } - - native_type get() const noexcept { return Function; }; - - native_type getWithOffsetParameter() const noexcept { - return FunctionWithOffsetParam; - }; - - bool hasWithOffsetParameter() const noexcept { - return FunctionWithOffsetParam != nullptr; - } - - ur_context_handle_t getContext() const noexcept { return Context; }; - - const char *getName() const noexcept { return Name.c_str(); } - - /// Get the number of kernel arguments, excluding the implicit global offset. - /// Note this only returns the current known number of arguments, not the - /// real one required by the kernel, since this cannot be queried from - /// the HIP Driver API - uint32_t getNumArgs() const noexcept { return Args.Indices.size() - 1; } - - void setKernelArg(int Index, size_t Size, const void *Arg) { - Args.addArg(Index, Size, Arg); - } - - /// We track all pointer arguments to be able to issue prefetches at enqueue - /// time - void setKernelPtrArg(int Index, size_t Size, const void *PtrArg) { - Args.PtrArgs.insert(*static_cast(PtrArg)); - setKernelArg(Index, Size, PtrArg); - } - - bool isPtrArg(const void *ptr) { - return Args.PtrArgs.find(ptr) != Args.PtrArgs.end(); - } - - std::set &getPtrArgs() { return Args.PtrArgs; } - - void setKernelLocalArg(int Index, size_t Size) { - Args.addLocalArg(Index, Size); - } - - void setImplicitOffsetArg(size_t Size, std::uint32_t *ImplicitOffset) { - return Args.setImplicitOffset(Size, ImplicitOffset); - } - - const arguments::args_index_t &getArgIndices() const { - return Args.getIndices(); - } - - uint32_t getLocalSize() const noexcept { return Args.getLocalSize(); } - - void clearLocalSize() { Args.clearLocalSize(); } -}; diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/memory.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/memory.cpp deleted file mode 100644 index 837a321be5346..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/memory.cpp +++ /dev/null @@ -1,501 +0,0 @@ -//===--------- memory.cpp - HIP Adapter -----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "memory.hpp" -#include "context.hpp" -#include - -/// Decreases the reference count of the Mem object. -/// If this is zero, calls the relevant HIP Free function -/// \return UR_RESULT_SUCCESS unless deallocation error -UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { - ur_result_t Result = UR_RESULT_SUCCESS; - - try { - - // Do nothing if there are other references - if (hMem->decrementReferenceCount() > 0) { - return UR_RESULT_SUCCESS; - } - - // make sure memObj is released in case UR_CHECK_ERROR throws - std::unique_ptr uniqueMemObj(hMem); - - if (hMem->isSubBuffer()) { - return UR_RESULT_SUCCESS; - } - - ScopedContext Active(uniqueMemObj->getContext()->getDevice()); - - if (hMem->MemType == ur_mem_handle_t_::Type::Buffer) { - switch (uniqueMemObj->Mem.BufferMem.MemAllocMode) { - case ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::CopyIn: - case ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::Classic: - UR_CHECK_ERROR(hipFree((void *)uniqueMemObj->Mem.BufferMem.Ptr)); - break; - case ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::UseHostPtr: - UR_CHECK_ERROR(hipHostUnregister(uniqueMemObj->Mem.BufferMem.HostPtr)); - break; - case ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::AllocHostPtr: - UR_CHECK_ERROR(hipFreeHost(uniqueMemObj->Mem.BufferMem.HostPtr)); - }; - } - - else if (hMem->MemType == ur_mem_handle_t_::Type::Surface) { - UR_CHECK_ERROR( - hipDestroySurfaceObject(uniqueMemObj->Mem.SurfaceMem.getSurface())); - auto Array = uniqueMemObj->Mem.SurfaceMem.getArray(); - UR_CHECK_ERROR(hipFreeArray(Array)); - } - - } catch (ur_result_t Err) { - Result = Err; - } catch (...) { - Result = UR_RESULT_ERROR_OUT_OF_RESOURCES; - } - - if (Result != UR_RESULT_SUCCESS) { - // A reported HIP error is either an implementation or an asynchronous HIP - // error for which it is unclear if the function that reported it succeeded - // or not. Either way, the state of the program is compromised and likely - // unrecoverable. - detail::ur::die("Unrecoverable program state reached in urMemRelease"); - } - - return UR_RESULT_SUCCESS; -} - -/// Creates a UR Memory object using a HIP memory allocation. -/// Can trigger a manual copy depending on the mode. -/// \TODO Implement USE_HOST_PTR using hipHostRegister - See #9789 -UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( - ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, - const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) { - // Validate flags - UR_ASSERT((flags & UR_MEM_FLAGS_MASK) == 0, - UR_RESULT_ERROR_INVALID_ENUMERATION); - if (flags & - (UR_MEM_FLAG_USE_HOST_POINTER | UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER)) { - UR_ASSERT(pProperties && pProperties->pHost, - UR_RESULT_ERROR_INVALID_HOST_PTR); - } - // Need input memory object - UR_ASSERT(size != 0, UR_RESULT_ERROR_INVALID_BUFFER_SIZE); - - // Currently, USE_HOST_PTR is not implemented using host register - // since this triggers a weird segfault after program ends. - // Setting this constant to true enables testing that behavior. - const bool EnableUseHostPtr = false; - const bool PerformInitialCopy = - (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) || - ((flags & UR_MEM_FLAG_USE_HOST_POINTER) && !EnableUseHostPtr); - ur_result_t Result = UR_RESULT_SUCCESS; - ur_mem_handle_t RetMemObj = nullptr; - - try { - ScopedContext Active(hContext->getDevice()); - void *Ptr; - auto pHost = pProperties ? pProperties->pHost : nullptr; - ur_mem_handle_t_::MemImpl::BufferMem::AllocMode AllocMode = - ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::Classic; - - if ((flags & UR_MEM_FLAG_USE_HOST_POINTER) && EnableUseHostPtr) { - UR_CHECK_ERROR(hipHostRegister(pHost, size, hipHostRegisterMapped)); - UR_CHECK_ERROR(hipHostGetDevicePointer(&Ptr, pHost, 0)); - AllocMode = ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::UseHostPtr; - } else if (flags & UR_MEM_FLAG_ALLOC_HOST_POINTER) { - UR_CHECK_ERROR(hipHostMalloc(&pHost, size)); - UR_CHECK_ERROR(hipHostGetDevicePointer(&Ptr, pHost, 0)); - AllocMode = ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::AllocHostPtr; - } else { - UR_CHECK_ERROR(hipMalloc(&Ptr, size)); - if (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) { - AllocMode = ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::CopyIn; - } - } - - if (Result == UR_RESULT_SUCCESS) { - ur_mem_handle_t parentBuffer = nullptr; - - auto DevPtr = - reinterpret_cast( - Ptr); - auto URMemObj = std::unique_ptr(new ur_mem_handle_t_{ - hContext, parentBuffer, flags, AllocMode, DevPtr, pHost, size}); - if (URMemObj != nullptr) { - RetMemObj = URMemObj.release(); - if (PerformInitialCopy) { - // Operates on the default stream of the current HIP context. - UR_CHECK_ERROR(hipMemcpyHtoD(DevPtr, pHost, size)); - // Synchronize with default stream implicitly used by hipMemcpyHtoD - // to make buffer data available on device before any other UR call - // uses it. - if (Result == UR_RESULT_SUCCESS) { - hipStream_t defaultStream = 0; - UR_CHECK_ERROR(hipStreamSynchronize(defaultStream)); - } - } - } else { - Result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - } - } catch (ur_result_t Err) { - Result = Err; - } catch (...) { - Result = UR_RESULT_ERROR_OUT_OF_RESOURCES; - } - - *phBuffer = RetMemObj; - - return Result; -} - -/// Implements a buffer partition in the HIP backend. -/// A buffer partition (or a sub-buffer, in OpenCL terms) is simply implemented -/// as an offset over an existing HIP allocation. -UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( - ur_mem_handle_t hBuffer, ur_mem_flags_t flags, - ur_buffer_create_type_t bufferCreateType, const ur_buffer_region_t *pRegion, - ur_mem_handle_t *phMem) { - UR_ASSERT((flags & UR_MEM_FLAGS_MASK) == 0, - UR_RESULT_ERROR_INVALID_ENUMERATION); - UR_ASSERT(hBuffer->isBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); - UR_ASSERT(!hBuffer->isSubBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); - - // Default value for flags means UR_MEM_FLAG_READ_WRITE. - if (flags == 0) { - flags = UR_MEM_FLAG_READ_WRITE; - } - - UR_ASSERT(!(flags & - (UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER | - UR_MEM_FLAG_ALLOC_HOST_POINTER | UR_MEM_FLAG_USE_HOST_POINTER)), - UR_RESULT_ERROR_INVALID_VALUE); - if (hBuffer->MemFlags & UR_MEM_FLAG_WRITE_ONLY) { - UR_ASSERT(!(flags & (UR_MEM_FLAG_READ_WRITE | UR_MEM_FLAG_READ_ONLY)), - UR_RESULT_ERROR_INVALID_VALUE); - } - if (hBuffer->MemFlags & UR_MEM_FLAG_READ_ONLY) { - UR_ASSERT(!(flags & (UR_MEM_FLAG_READ_WRITE | UR_MEM_FLAG_WRITE_ONLY)), - UR_RESULT_ERROR_INVALID_VALUE); - } - - UR_ASSERT(bufferCreateType == UR_BUFFER_CREATE_TYPE_REGION, - UR_RESULT_ERROR_INVALID_ENUMERATION); - - UR_ASSERT(pRegion->size != 0u, UR_RESULT_ERROR_INVALID_BUFFER_SIZE); - - UR_ASSERT( - ((pRegion->origin + pRegion->size) <= hBuffer->Mem.BufferMem.getSize()), - UR_RESULT_ERROR_INVALID_BUFFER_SIZE); - // Retained indirectly due to retaining parent buffer below. - ur_context_handle_t Context = hBuffer->Context; - ur_mem_handle_t_::MemImpl::BufferMem::AllocMode AllocMode = - ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::Classic; - - UR_ASSERT(hBuffer->Mem.BufferMem.Ptr != - ur_mem_handle_t_::MemImpl::BufferMem::native_type{0}, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); - ur_mem_handle_t_::MemImpl::BufferMem::native_type Ptr = - hBuffer->Mem.BufferMem.getWithOffset(pRegion->origin); - - void *HostPtr = nullptr; - if (hBuffer->Mem.BufferMem.HostPtr) { - HostPtr = - static_cast(hBuffer->Mem.BufferMem.HostPtr) + pRegion->origin; - } - - ReleaseGuard ReleaseGuard(hBuffer); - - std::unique_ptr RetMemObj{nullptr}; - try { - ScopedContext Active(Context->getDevice()); - - RetMemObj = std::unique_ptr{new ur_mem_handle_t_{ - Context, hBuffer, flags, AllocMode, Ptr, HostPtr, pRegion->size}}; - } catch (ur_result_t Err) { - *phMem = nullptr; - return Err; - } catch (...) { - *phMem = nullptr; - return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - ReleaseGuard.dismiss(); - *phMem = RetMemObj.release(); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, - ur_mem_info_t MemInfoType, - size_t propSize, - void *pMemInfo, - size_t *pPropSizeRet) { - - UR_ASSERT(MemInfoType <= UR_MEM_INFO_CONTEXT, - UR_RESULT_ERROR_INVALID_ENUMERATION); - UR_ASSERT(hMemory->isBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); - - UrReturnHelper ReturnValue(propSize, pMemInfo, pPropSizeRet); - - ScopedContext Active(hMemory->getContext()->getDevice()); - - switch (MemInfoType) { - case UR_MEM_INFO_SIZE: { - try { - size_t AllocSize = 0; - UR_CHECK_ERROR(hipMemGetAddressRange(nullptr, &AllocSize, - hMemory->Mem.BufferMem.Ptr)); - return ReturnValue(AllocSize); - } catch (ur_result_t Err) { - return Err; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } - } - case UR_MEM_INFO_CONTEXT: { - return ReturnValue(hMemory->getContext()); - } - - default: - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } -} - -/// Gets the native HIP handle of a UR mem object -/// -/// \param[in] hMem The UR mem to get the native HIP object of. -/// \param[out] phNativeMem Set to the native handle of the UR mem object. -/// -/// \return UR_RESULT_SUCCESS -UR_APIEXPORT ur_result_t UR_APICALL -urMemGetNativeHandle(ur_mem_handle_t hMem, ur_native_handle_t *phNativeMem) { -#if defined(__HIP_PLATFORM_NVIDIA__) - if (sizeof(ur_mem_handle_t_::MemImpl::BufferMem::native_type) > - sizeof(ur_native_handle_t)) { - // Check that all the upper bits that cannot be represented by - // ur_native_handle_t are empty. - // NOTE: The following shift might trigger a warning, but the check in the - // if above makes sure that this does not underflow. - ur_mem_handle_t_::MemImpl::BufferMem::native_type UpperBits = - hMem->Mem.BufferMem.get() >> (sizeof(ur_native_handle_t) * CHAR_BIT); - if (UpperBits) { - // Return an error if any of the remaining bits is non-zero. - return UR_RESULT_ERROR_INVALID_MEM_OBJECT; - } - } - *phNativeMem = - reinterpret_cast(hMem->Mem.BufferMem.get()); -#elif defined(__HIP_PLATFORM_AMD__) - *phNativeMem = - reinterpret_cast(hMem->Mem.BufferMem.get()); -#else -#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); -#endif - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( - ur_native_handle_t, ur_context_handle_t, const ur_mem_native_properties_t *, - ur_mem_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( - ur_native_handle_t, ur_context_handle_t, const ur_image_format_t *, - const ur_image_desc_t *, const ur_mem_native_properties_t *, - ur_mem_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -/// \TODO Not implemented -UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( - ur_context_handle_t hContext, ur_mem_flags_t flags, - const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, - void *pHost, ur_mem_handle_t *phMem) { - - // Need input memory object - UR_ASSERT((flags & UR_MEM_FLAGS_MASK) == 0, - UR_RESULT_ERROR_INVALID_ENUMERATION); - if (flags & - (UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER | UR_MEM_FLAG_USE_HOST_POINTER)) { - UR_ASSERT(pHost, UR_RESULT_ERROR_INVALID_HOST_PTR); - } - - const bool PerformInitialCopy = - (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) || - ((flags & UR_MEM_FLAG_USE_HOST_POINTER)); - - UR_ASSERT(pImageDesc->stype == UR_STRUCTURE_TYPE_IMAGE_DESC, - UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR); - UR_ASSERT(pImageDesc->type <= UR_MEM_TYPE_IMAGE1D_BUFFER, - UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR); - UR_ASSERT(pImageDesc->numMipLevel == 0, - UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR); - UR_ASSERT(pImageDesc->numSamples == 0, - UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR); - if (!pHost) { - UR_ASSERT(pImageDesc->rowPitch == 0, - UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR); - UR_ASSERT(pImageDesc->slicePitch == 0, - UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR); - } - - ur_result_t Result = UR_RESULT_SUCCESS; - - // We only support RBGA channel order - // TODO: check SYCL CTS and spec. May also have to support BGRA - UR_ASSERT(pImageFormat->channelOrder == UR_IMAGE_CHANNEL_ORDER_RGBA, - UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION); - - // We have to use hipArray3DCreate, which has some caveats. The height and - // depth parameters must be set to 0 produce 1D or 2D arrays. image_desc gives - // a minimum value of 1, so we need to convert the answer. - HIP_ARRAY3D_DESCRIPTOR ArrayDesc; - ArrayDesc.NumChannels = 4; // Only support 4 channel image - ArrayDesc.Flags = 0; // No flags required - ArrayDesc.Width = pImageDesc->width; - if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) { - ArrayDesc.Height = 0; - ArrayDesc.Depth = 0; - } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) { - ArrayDesc.Height = pImageDesc->height; - ArrayDesc.Depth = 0; - } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) { - ArrayDesc.Height = pImageDesc->height; - ArrayDesc.Depth = pImageDesc->depth; - } - - // We need to get this now in bytes for calculating the total image size later - size_t PixelTypeSizeBytes; - - switch (pImageFormat->channelType) { - - case UR_IMAGE_CHANNEL_TYPE_UNORM_INT8: - case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8: - ArrayDesc.Format = HIP_AD_FORMAT_UNSIGNED_INT8; - PixelTypeSizeBytes = 1; - break; - case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8: - ArrayDesc.Format = HIP_AD_FORMAT_SIGNED_INT8; - PixelTypeSizeBytes = 1; - break; - case UR_IMAGE_CHANNEL_TYPE_UNORM_INT16: - case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16: - ArrayDesc.Format = HIP_AD_FORMAT_UNSIGNED_INT16; - PixelTypeSizeBytes = 2; - break; - case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16: - ArrayDesc.Format = HIP_AD_FORMAT_SIGNED_INT16; - PixelTypeSizeBytes = 2; - break; - case UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT: - ArrayDesc.Format = HIP_AD_FORMAT_HALF; - PixelTypeSizeBytes = 2; - break; - case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32: - ArrayDesc.Format = HIP_AD_FORMAT_UNSIGNED_INT32; - PixelTypeSizeBytes = 4; - break; - case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32: - ArrayDesc.Format = HIP_AD_FORMAT_SIGNED_INT32; - PixelTypeSizeBytes = 4; - break; - case UR_IMAGE_CHANNEL_TYPE_FLOAT: - ArrayDesc.Format = HIP_AD_FORMAT_FLOAT; - PixelTypeSizeBytes = 4; - break; - default: - // urMemImageCreate given unsupported image_channel_data_type - return UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR; - } - - // When a dimension isn't used image_desc has the size set to 1 - size_t PixelSizeBytes = - PixelTypeSizeBytes * 4; // 4 is the only number of channels we support - size_t ImageSizeBytes = PixelSizeBytes * pImageDesc->width * - pImageDesc->height * pImageDesc->depth; - - ScopedContext Active(hContext->getDevice()); - hipArray *ImageArray; - UR_CHECK_ERROR(hipArray3DCreate(reinterpret_cast(&ImageArray), - &ArrayDesc)); - - try { - if (PerformInitialCopy) { - // We have to use a different copy function for each image dimensionality - if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) { - UR_CHECK_ERROR(hipMemcpyHtoA(ImageArray, 0, pHost, ImageSizeBytes)); - } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) { - hip_Memcpy2D CpyDesc; - memset(&CpyDesc, 0, sizeof(CpyDesc)); - CpyDesc.srcMemoryType = hipMemoryType::hipMemoryTypeHost; - CpyDesc.srcHost = pHost; - CpyDesc.dstMemoryType = hipMemoryType::hipMemoryTypeArray; - CpyDesc.dstArray = reinterpret_cast(ImageArray); - CpyDesc.WidthInBytes = PixelSizeBytes * pImageDesc->width; - CpyDesc.Height = pImageDesc->height; - UR_CHECK_ERROR(hipMemcpyParam2D(&CpyDesc)); - } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) { - HIP_MEMCPY3D CpyDesc; - memset(&CpyDesc, 0, sizeof(CpyDesc)); - CpyDesc.srcMemoryType = hipMemoryType::hipMemoryTypeHost; - CpyDesc.srcHost = pHost; - CpyDesc.dstMemoryType = hipMemoryType::hipMemoryTypeArray; - CpyDesc.dstArray = reinterpret_cast(ImageArray); - CpyDesc.WidthInBytes = PixelSizeBytes * pImageDesc->width; - CpyDesc.Height = pImageDesc->height; - CpyDesc.Depth = pImageDesc->depth; - UR_CHECK_ERROR(hipDrvMemcpy3D(&CpyDesc)); - } - } - - // HIP_RESOURCE_DESC is a union of different structs, shown here - // We need to fill it as described here to use it for a surface or texture - // HIP_RESOURCE_DESC::resType must be HIP_RESOURCE_TYPE_ARRAY and - // HIP_RESOURCE_DESC::res::array::hArray must be set to a valid HIP array - // handle. - // HIP_RESOURCE_DESC::flags must be set to zero - - hipResourceDesc ImageResDesc; - ImageResDesc.res.array.array = ImageArray; - ImageResDesc.resType = hipResourceTypeArray; - - hipSurfaceObject_t Surface; - UR_CHECK_ERROR(hipCreateSurfaceObject(&Surface, &ImageResDesc)); - - auto URMemObj = std::unique_ptr(new ur_mem_handle_t_{ - hContext, ImageArray, Surface, flags, pImageDesc->type, pHost}); - - if (URMemObj == nullptr) { - return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - *phMem = URMemObj.release(); - } catch (ur_result_t Err) { - UR_CHECK_ERROR(hipFreeArray(ImageArray)); - return Err; - } catch (...) { - UR_CHECK_ERROR(hipFreeArray(ImageArray)); - return UR_RESULT_ERROR_UNKNOWN; - } - return Result; -} - -/// \TODO Not implemented -UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t, - ur_image_info_t, size_t, - void *, size_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { - UR_ASSERT(hMem->getReferenceCount() > 0, UR_RESULT_ERROR_INVALID_MEM_OBJECT); - hMem->incrementReferenceCount(); - return UR_RESULT_SUCCESS; -} diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/memory.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/memory.hpp deleted file mode 100644 index aa58abcb8befb..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/memory.hpp +++ /dev/null @@ -1,198 +0,0 @@ -//===--------- memory.hpp - HIP Adapter -----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -#include "common.hpp" -#include - -/// UR Mem mapping to HIP memory allocations, both data and texture/surface. -/// \brief Represents non-SVM allocations on the HIP backend. -/// Keeps tracks of all mapped regions used for Map/Unmap calls. -/// Only one region can be active at the same time per allocation. -struct ur_mem_handle_t_ { - - // TODO: Move as much shared data up as possible - using ur_context = ur_context_handle_t_ *; - using ur_mem = ur_mem_handle_t_ *; - - // Context where the memory object is accessible - ur_context Context; - - /// Reference counting of the handler - std::atomic_uint32_t RefCount; - enum class Type { Buffer, Surface } MemType; - - // Original mem flags passed - ur_mem_flags_t MemFlags; - - /// A UR Memory object represents either plain memory allocations ("Buffers" - /// in OpenCL) or typed allocations ("Images" in OpenCL). - /// In HIP their API handlers are different. Whereas "Buffers" are allocated - /// as pointer-like structs, "Images" are stored in Textures or Surfaces. - /// This union allows implementation to use either from the same handler. - union MemImpl { - // Handler for plain, pointer-based HIP allocations - struct BufferMem { - using native_type = hipDeviceptr_t; - - // If this allocation is a sub-buffer (i.e., a view on an existing - // allocation), this is the pointer to the parent handler structure - ur_mem Parent; - // HIP handler for the pointer - native_type Ptr; - - /// Pointer associated with this device on the host - void *HostPtr; - /// Size of the allocation in bytes - size_t Size; - /// Size of the active mapped region. - size_t MapSize; - /// Offset of the active mapped region. - size_t MapOffset; - /// Pointer to the active mapped region, if any - void *MapPtr; - /// Original flags for the mapped region - ur_map_flags_t MapFlags; - - /** AllocMode - * Classic: Just a normal buffer allocated on the device via hip malloc - * UseHostPtr: Use an address on the host for the device - * CopyIn: The data for the device comes from the host but the host - pointer is not available later for re-use - * AllocHostPtr: Uses pinned-memory allocation - */ - enum class AllocMode { - Classic, - UseHostPtr, - CopyIn, - AllocHostPtr - } MemAllocMode; - - native_type get() const noexcept { return Ptr; } - - native_type getWithOffset(size_t Offset) const noexcept { - return reinterpret_cast(reinterpret_cast(Ptr) + - Offset); - } - - void *getVoid() const noexcept { return reinterpret_cast(Ptr); } - - size_t getSize() const noexcept { return Size; } - - void *getMapPtr() const noexcept { return MapPtr; } - - size_t getMapSize() const noexcept { return MapSize; } - - size_t getMapOffset() const noexcept { return MapOffset; } - - /// Returns a pointer to data visible on the host that contains - /// the data on the device associated with this allocation. - /// The offset is used to index into the HIP allocation. - /// - void *mapToPtr(size_t Size, size_t Offset, - ur_map_flags_t Flags) noexcept { - assert(MapPtr == nullptr); - MapSize = Size; - MapOffset = Offset; - MapFlags = Flags; - if (HostPtr) { - MapPtr = static_cast(HostPtr) + Offset; - } else { - // TODO: Allocate only what is needed based on the offset - MapPtr = static_cast(malloc(this->getSize())); - } - return MapPtr; - } - - /// Detach the allocation from the host memory. - void unmap(void *) noexcept { - assert(MapPtr != nullptr); - - if (MapPtr != HostPtr) { - free(MapPtr); - } - MapPtr = nullptr; - MapSize = 0; - MapOffset = 0; - } - - ur_map_flags_t getMapFlags() const noexcept { - assert(MapPtr != nullptr); - return MapFlags; - } - } BufferMem; - - // Handler data for surface object (i.e. Images) - struct SurfaceMem { - hipArray *Array; - hipSurfaceObject_t SurfObj; - ur_mem_type_t ImageType; - - hipArray *getArray() const noexcept { return Array; } - - hipSurfaceObject_t getSurface() const noexcept { return SurfObj; } - - ur_mem_type_t getImageType() const noexcept { return ImageType; } - } SurfaceMem; - } Mem; - - /// Constructs the UR MEM handler for a non-typed allocation ("buffer") - ur_mem_handle_t_(ur_context Ctxt, ur_mem Parent, ur_mem_flags_t MemFlags, - MemImpl::BufferMem::AllocMode Mode, hipDeviceptr_t Ptr, - void *HostPtr, size_t Size) - : Context{Ctxt}, RefCount{1}, MemType{Type::Buffer}, MemFlags{MemFlags} { - Mem.BufferMem.Ptr = Ptr; - Mem.BufferMem.Parent = Parent; - Mem.BufferMem.HostPtr = HostPtr; - Mem.BufferMem.Size = Size; - Mem.BufferMem.MapSize = 0; - Mem.BufferMem.MapOffset = 0; - Mem.BufferMem.MapPtr = nullptr; - Mem.BufferMem.MapFlags = UR_MAP_FLAG_WRITE; - Mem.BufferMem.MemAllocMode = Mode; - if (isSubBuffer()) { - urMemRetain(Mem.BufferMem.Parent); - } else { - urContextRetain(Context); - } - }; - - /// Constructs the UR allocation for an Image object - ur_mem_handle_t_(ur_context Ctxt, hipArray *Array, hipSurfaceObject_t Surf, - ur_mem_flags_t MemFlags, ur_mem_type_t ImageType, void *) - : Context{Ctxt}, RefCount{1}, MemType{Type::Surface}, MemFlags{MemFlags} { - Mem.SurfaceMem.Array = Array; - Mem.SurfaceMem.ImageType = ImageType; - Mem.SurfaceMem.SurfObj = Surf; - urContextRetain(Context); - } - - ~ur_mem_handle_t_() { - if (isBuffer() && isSubBuffer()) { - urMemRelease(Mem.BufferMem.Parent); - return; - } - urContextRelease(Context); - } - - bool isBuffer() const noexcept { return MemType == Type::Buffer; } - - bool isSubBuffer() const noexcept { - return (isBuffer() && (Mem.BufferMem.Parent != nullptr)); - } - - bool isImage() const noexcept { return MemType == Type::Surface; } - - ur_context getContext() const noexcept { return Context; } - - uint32_t incrementReferenceCount() noexcept { return ++RefCount; } - - uint32_t decrementReferenceCount() noexcept { return --RefCount; } - - uint32_t getReferenceCount() const noexcept { return RefCount; } -}; diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/platform.cpp deleted file mode 100644 index 8f6fb0750dc81..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/platform.cpp +++ /dev/null @@ -1,169 +0,0 @@ -//===--------- platform.cpp - HIP Adapter ---------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "platform.hpp" - -hipEvent_t ur_platform_handle_t_::EvBase{nullptr}; - -UR_APIEXPORT ur_result_t UR_APICALL -urPlatformGetInfo(ur_platform_handle_t, ur_platform_info_t propName, - size_t propSize, void *pPropValue, size_t *pSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pSizeRet); - - switch (propName) { - case UR_PLATFORM_INFO_NAME: - return ReturnValue("AMD HIP BACKEND"); - case UR_PLATFORM_INFO_VENDOR_NAME: - return ReturnValue("AMD Corporation"); - case UR_PLATFORM_INFO_PROFILE: - return ReturnValue("FULL PROFILE"); - case UR_PLATFORM_INFO_VERSION: { - std::string Version; - UR_CHECK_ERROR(getHipVersionString(Version)); - return ReturnValue(Version.c_str()); - } - case UR_PLATFORM_INFO_BACKEND: { - return ReturnValue(UR_PLATFORM_BACKEND_HIP); - } - case UR_PLATFORM_INFO_EXTENSIONS: { - return ReturnValue(""); - } - default: - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - - return UR_RESULT_SUCCESS; -} - -/// Obtains the HIP platform. -/// There is only one HIP platform, and contains all devices on the system. -/// Triggers the HIP Driver initialization (hipInit) the first time, so this -/// must be the first UR API called. -/// -/// However because multiple devices in a context is not currently supported, -/// place each device in a separate platform. -UR_APIEXPORT ur_result_t UR_APICALL -urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, - ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { - - try { - static std::once_flag InitFlag; - static uint32_t NumPlatforms = 1; - static std::vector PlatformIds; - - UR_ASSERT(phPlatforms || pNumPlatforms, UR_RESULT_ERROR_INVALID_VALUE); - UR_ASSERT(!phPlatforms || NumEntries > 0, UR_RESULT_ERROR_INVALID_VALUE); - - ur_result_t Result = UR_RESULT_SUCCESS; - - std::call_once( - InitFlag, - [](ur_result_t &Err) { - if (hipInit(0) != hipSuccess) { - NumPlatforms = 0; - return; - } - int NumDevices = 0; - Err = UR_RESULT_SUCCESS; - UR_CHECK_ERROR(hipGetDeviceCount(&NumDevices)); - if (NumDevices == 0) { - NumPlatforms = 0; - return; - } - try { - // make one platform per device - NumPlatforms = NumDevices; - PlatformIds.resize(NumDevices); - - for (int i = 0; i < NumDevices; ++i) { - hipDevice_t Device; - UR_CHECK_ERROR(hipDeviceGet(&Device, i)); - hipCtx_t Context; - UR_CHECK_ERROR(hipDevicePrimaryCtxRetain(&Context, Device)); - PlatformIds[i].Devices.emplace_back( - new ur_device_handle_t_{Device, Context, &PlatformIds[i]}); - } - } catch (const std::bad_alloc &) { - // Signal out-of-memory situation - for (int i = 0; i < NumDevices; ++i) { - PlatformIds[i].Devices.clear(); - } - PlatformIds.clear(); - Err = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } catch (ur_result_t CatchErr) { - // Clear and rethrow to allow retry - for (int i = 0; i < NumDevices; ++i) { - PlatformIds[i].Devices.clear(); - } - PlatformIds.clear(); - Err = CatchErr; - throw CatchErr; - } catch (...) { - Err = UR_RESULT_ERROR_OUT_OF_RESOURCES; - throw; - } - }, - Result); - - if (pNumPlatforms != nullptr) { - *pNumPlatforms = NumPlatforms; - } - - if (phPlatforms != nullptr) { - for (unsigned i = 0; i < std::min(NumEntries, NumPlatforms); ++i) { - phPlatforms[i] = &PlatformIds[i]; - } - } - - return Result; - } catch (ur_result_t Err) { - return Err; - } catch (...) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } -} - -UR_APIEXPORT ur_result_t UR_APICALL -urPlatformGetApiVersion(ur_platform_handle_t, ur_api_version_t *pVersion) { - *pVersion = UR_API_VERSION_CURRENT; - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( - ur_platform_handle_t hPlatform, ur_native_handle_t *phNativePlatform) { - std::ignore = hPlatform; - std::ignore = phNativePlatform; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( - ur_native_handle_t hNativePlatform, - const ur_platform_native_properties_t *pProperties, - ur_platform_handle_t *phPlatform) { - std::ignore = hNativePlatform; - std::ignore = pProperties; - std::ignore = phPlatform; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -// Get CUDA plugin specific backend option. -// Current support is only for optimization options. -// Return empty string for cuda. -// TODO: Determine correct string to be passed. -UR_APIEXPORT ur_result_t UR_APICALL -urPlatformGetBackendOption(ur_platform_handle_t, const char *pFrontendOption, - const char **ppPlatformOption) { - using namespace std::literals; - if (pFrontendOption == "-O0"sv || pFrontendOption == "-O1"sv || - pFrontendOption == "-O2"sv || pFrontendOption == "-O3"sv || - pFrontendOption == ""sv) { - *ppPlatformOption = ""; - return UR_RESULT_SUCCESS; - } - return UR_RESULT_ERROR_INVALID_VALUE; -} diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/platform.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/platform.hpp deleted file mode 100644 index cde0e369f5c65..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/platform.hpp +++ /dev/null @@ -1,23 +0,0 @@ -//===--------- platform.hpp - HIP Adapter ---------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -#include "common.hpp" -#include "device.hpp" - -#include - -/// A UR platform stores all known UR devices, -/// in the HIP plugin this is just a vector of -/// available devices since initialization is done -/// when devices are used. -/// -struct ur_platform_handle_t_ { - static hipEvent_t EvBase; // HIP event used as base counter - std::vector> Devices; -}; diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/program.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/program.cpp deleted file mode 100644 index f1bc9df222104..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/program.cpp +++ /dev/null @@ -1,303 +0,0 @@ -//===--------- program.cpp - HIP Adapter ----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "program.hpp" - -ur_program_handle_t_::ur_program_handle_t_(ur_context_handle_t Ctxt) - : Module{nullptr}, Binary{}, BinarySizeInBytes{0}, RefCount{1}, - Context{Ctxt} { - urContextRetain(Context); -} - -ur_program_handle_t_::~ur_program_handle_t_() { urContextRelease(Context); } - -ur_result_t ur_program_handle_t_::setBinary(const char *Source, size_t Length) { - // Do not re-set program binary data which has already been set as that will - // delete the old binary data. - UR_ASSERT(Binary == nullptr && BinarySizeInBytes == 0, - UR_RESULT_ERROR_INVALID_OPERATION); - Binary = Source; - BinarySizeInBytes = Length; - return UR_RESULT_SUCCESS; -} - -ur_result_t ur_program_handle_t_::buildProgram(const char *BuildOptions) { - if (BuildOptions) { - this->BuildOptions = BuildOptions; - } - - constexpr const unsigned int NumberOfOptions = 4u; - - hipJitOption Options[NumberOfOptions]; - void *OptionVals[NumberOfOptions]; - - // Pass a buffer for info messages - Options[0] = hipJitOptionInfoLogBuffer; - OptionVals[0] = (void *)InfoLog; - // Pass the size of the info buffer - Options[1] = hipJitOptionInfoLogBufferSizeBytes; - OptionVals[1] = (void *)(long)MAX_LOG_SIZE; - // Pass a buffer for error message - Options[2] = hipJitOptionErrorLogBuffer; - OptionVals[2] = (void *)ErrorLog; - // Pass the size of the error buffer - Options[3] = hipJitOptionErrorLogBufferSizeBytes; - OptionVals[3] = (void *)(long)MAX_LOG_SIZE; - - UR_CHECK_ERROR(hipModuleLoadDataEx(&Module, static_cast(Binary), - NumberOfOptions, Options, OptionVals)); - - BuildStatus = UR_PROGRAM_BUILD_STATUS_SUCCESS; - - // If no exception, result is correct - return UR_RESULT_SUCCESS; -} - -/// Finds kernel names by searching for entry points in the PTX source, as the -/// HIP driver API doesn't expose an operation for this. -/// Note: This is currently only being used by the SYCL program class for the -/// has_kernel method, so an alternative would be to move the has_kernel -/// query to UR and use hipModuleGetFunction to check for a kernel. -ur_result_t getKernelNames(ur_program_handle_t) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -/// HIP will handle the PTX/HIPBIN binaries internally through hipModule_t -/// object. So, urProgramCreateWithIL and urProgramCreateWithBinary are -/// equivalent in terms of HIP adapter. See \ref urProgramCreateWithBinary. -UR_APIEXPORT ur_result_t UR_APICALL -urProgramCreateWithIL(ur_context_handle_t hContext, const void *pIL, - size_t length, const ur_program_properties_t *pProperties, - ur_program_handle_t *phProgram) { - ur_device_handle_t hDevice = hContext->getDevice(); - const auto pBinary = reinterpret_cast(pIL); - - return urProgramCreateWithBinary(hContext, hDevice, length, pBinary, - pProperties, phProgram); -} - -/// HIP will handle the PTX/HIPBIN binaries internally through a call to -/// hipModuleLoadDataEx. So, urProgramCompile and urProgramBuild are equivalent -/// in terms of HIP adapter. \TODO Implement asynchronous compilation -UR_APIEXPORT ur_result_t UR_APICALL -urProgramCompile(ur_context_handle_t hContext, ur_program_handle_t hProgram, - const char *pOptions) { - return urProgramBuild(hContext, hProgram, pOptions); -} - -/// Loads the images from a UR program into a hipModule_t that can be -/// used later on to extract functions (kernels). -/// See \ref ur_program_handle_t for implementation details. -UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t, - ur_program_handle_t hProgram, - const char *pOptions) { - ur_result_t Result = UR_RESULT_SUCCESS; - - try { - ScopedContext Active(hProgram->getContext()->getDevice()); - - hProgram->buildProgram(pOptions); - - } catch (ur_result_t Err) { - Result = Err; - } - return Result; -} - -UR_APIEXPORT ur_result_t UR_APICALL urProgramLink(ur_context_handle_t, uint32_t, - const ur_program_handle_t *, - const char *, - ur_program_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -/// Created a UR program object from a HIP program handle. -/// TODO: Implement this. -/// NOTE: The created UR object takes ownership of the native handle. -/// -/// \param[in] hNativeProgram The native handle to create UR program object -/// from. \param[in] hContext The UR context of the program. \param[out] -/// phProgram Set to the UR program object created from native handle. -/// -/// \return UR_RESULT_ERROR_UNSUPPORTED_FEATURE -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( - ur_native_handle_t, ur_context_handle_t, - const ur_program_native_properties_t *, ur_program_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t, - ur_program_build_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) { - // Ignore unused parameter - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - - switch (propName) { - case UR_PROGRAM_BUILD_INFO_STATUS: { - return ReturnValue(hProgram->BuildStatus); - } - case UR_PROGRAM_BUILD_INFO_OPTIONS: - return ReturnValue(hProgram->BuildOptions.c_str()); - case UR_PROGRAM_BUILD_INFO_LOG: - return ReturnValue(hProgram->InfoLog, hProgram->MAX_LOG_SIZE); - default: - break; - } - return UR_RESULT_ERROR_INVALID_ENUMERATION; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, - size_t propSize, void *pProgramInfo, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pProgramInfo, pPropSizeRet); - - switch (propName) { - case UR_PROGRAM_INFO_REFERENCE_COUNT: - return ReturnValue(hProgram->getReferenceCount()); - case UR_PROGRAM_INFO_CONTEXT: - return ReturnValue(hProgram->Context); - case UR_PROGRAM_INFO_NUM_DEVICES: - return ReturnValue(1u); - case UR_PROGRAM_INFO_DEVICES: - return ReturnValue(&hProgram->Context->DeviceId, 1); - case UR_PROGRAM_INFO_SOURCE: - return ReturnValue(hProgram->Binary); - case UR_PROGRAM_INFO_BINARY_SIZES: - return ReturnValue(&hProgram->BinarySizeInBytes, 1); - case UR_PROGRAM_INFO_BINARIES: - return ReturnValue(&hProgram->Binary, 1); - case UR_PROGRAM_INFO_KERNEL_NAMES: - return getKernelNames(hProgram); - default: - break; - } - return UR_RESULT_ERROR_INVALID_ENUMERATION; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urProgramRetain(ur_program_handle_t hProgram) { - UR_ASSERT(hProgram->getReferenceCount() > 0, UR_RESULT_ERROR_INVALID_PROGRAM); - hProgram->incrementReferenceCount(); - return UR_RESULT_SUCCESS; -} - -/// Decreases the reference count of a ur_program_handle_t object. -/// When the reference count reaches 0, it unloads the module from -/// the context. -UR_APIEXPORT ur_result_t UR_APICALL -urProgramRelease(ur_program_handle_t hProgram) { - // double delete or someone is messing with the ref count. - // either way, cannot safely proceed. - UR_ASSERT(hProgram->getReferenceCount() != 0, - UR_RESULT_ERROR_INVALID_PROGRAM); - - // decrement ref count. If it is 0, delete the program. - if (hProgram->decrementReferenceCount() == 0) { - - std::unique_ptr ProgramPtr{hProgram}; - - ur_result_t Result = UR_RESULT_ERROR_INVALID_PROGRAM; - - try { - ScopedContext Active(hProgram->getContext()->getDevice()); - auto HIPModule = hProgram->get(); - if (HIPModule) { - UR_CHECK_ERROR(hipModuleUnload(HIPModule)); - Result = UR_RESULT_SUCCESS; - } else { - // no module to unload - Result = UR_RESULT_SUCCESS; - } - } catch (...) { - Result = UR_RESULT_ERROR_OUT_OF_RESOURCES; - } - - return Result; - } - - return UR_RESULT_SUCCESS; -} - -/// Gets the native HIP handle of a UR program object -/// -/// \param[in] hProgram The UR program to get the native HIP object of. -/// \param[out] phNativeProgram Set to the native handle of the UR program -/// object. -/// -/// \return UR_RESULT_SUCCESS -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( - ur_program_handle_t hProgram, ur_native_handle_t *phNativeProgram) { - *phNativeProgram = reinterpret_cast(hProgram->get()); - return UR_RESULT_SUCCESS; -} - -/// Loads images from a list of PTX or HIPBin binaries. -/// Note: No calls to HIP driver API in this function, only store binaries -/// for later. -/// -/// Note: Only supports one device -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( - ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, - const uint8_t *pBinary, const ur_program_properties_t *, - ur_program_handle_t *phProgram) { - UR_ASSERT(pBinary != nullptr && size != 0, UR_RESULT_ERROR_INVALID_BINARY); - UR_ASSERT(hContext->getDevice()->get() == hDevice->get(), - UR_RESULT_ERROR_INVALID_CONTEXT); - - ur_result_t Result = UR_RESULT_SUCCESS; - - std::unique_ptr RetProgram{ - new ur_program_handle_t_{hContext}}; - - // TODO: Set metadata here and use reqd_work_group_size information. - // See urProgramCreateWithBinary in CUDA adapter. - - auto pBinary_string = reinterpret_cast(pBinary); - if (size == 0) { - size = strlen(pBinary_string) + 1; - } - - UR_ASSERT(size, UR_RESULT_ERROR_INVALID_SIZE); - - Result = RetProgram->setBinary(pBinary_string, size); - UR_ASSERT(Result == UR_RESULT_SUCCESS, Result); - - *phProgram = RetProgram.release(); - - return Result; -} - -// This entry point is only used for native specialization constants (SPIR-V), -// and the HIP plugin is AOT only so this entry point is not supported. -UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( - ur_program_handle_t, uint32_t, const ur_specialization_constant_info_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( - ur_device_handle_t hDevice, ur_program_handle_t hProgram, - const char *pFunctionName, void **ppFunctionPointer) { - // Check if device passed is the same the device bound to the context - UR_ASSERT(hDevice == hProgram->getContext()->getDevice(), - UR_RESULT_ERROR_INVALID_DEVICE); - - hipFunction_t Func; - hipError_t Ret = hipModuleGetFunction(&Func, hProgram->get(), pFunctionName); - *ppFunctionPointer = Func; - ur_result_t Result = UR_RESULT_SUCCESS; - - if (Ret != hipSuccess && Ret != hipErrorNotFound) - UR_CHECK_ERROR(Ret); - if (Ret == hipErrorNotFound) { - *ppFunctionPointer = 0; - Result = UR_RESULT_ERROR_INVALID_FUNCTION_NAME; - } - - return Result; -} diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/program.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/program.hpp deleted file mode 100644 index 84e31e83b7cbc..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/program.hpp +++ /dev/null @@ -1,46 +0,0 @@ -//===--------- program.hpp - HIP Adapter ----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -#include - -#include - -#include "context.hpp" - -/// Implementation of UR Program on HIP Module object -struct ur_program_handle_t_ { - using native_type = hipModule_t; - native_type Module; - const char *Binary; - size_t BinarySizeInBytes; - std::atomic_uint32_t RefCount; - ur_context_handle_t Context; - - constexpr static size_t MAX_LOG_SIZE = 8192u; - - char ErrorLog[MAX_LOG_SIZE], InfoLog[MAX_LOG_SIZE]; - std::string BuildOptions; - ur_program_build_status_t BuildStatus = UR_PROGRAM_BUILD_STATUS_NONE; - - ur_program_handle_t_(ur_context_handle_t Ctxt); - ~ur_program_handle_t_(); - - ur_result_t setBinary(const char *Binary, size_t BinarySizeInBytes); - - ur_result_t buildProgram(const char *BuildOptions); - ur_context_handle_t getContext() const { return Context; }; - - native_type get() const noexcept { return Module; }; - - uint32_t incrementReferenceCount() noexcept { return ++RefCount; } - - uint32_t decrementReferenceCount() noexcept { return --RefCount; } - - uint32_t getReferenceCount() const noexcept { return RefCount; } -}; diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/queue.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/queue.cpp deleted file mode 100644 index 20ff90a1e742f..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/queue.cpp +++ /dev/null @@ -1,299 +0,0 @@ -//===--------- queue.cpp - HIP Adapter ------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "queue.hpp" -#include "context.hpp" -#include "event.hpp" - -void ur_queue_handle_t_::computeStreamWaitForBarrierIfNeeded( - hipStream_t Stream, uint32_t Stream_i) { - if (BarrierEvent && !ComputeAppliedBarrier[Stream_i]) { - UR_CHECK_ERROR(hipStreamWaitEvent(Stream, BarrierEvent, 0)); - ComputeAppliedBarrier[Stream_i] = true; - } -} - -void ur_queue_handle_t_::transferStreamWaitForBarrierIfNeeded( - hipStream_t Stream, uint32_t Stream_i) { - if (BarrierEvent && !TransferAppliedBarrier[Stream_i]) { - UR_CHECK_ERROR(hipStreamWaitEvent(Stream, BarrierEvent, 0)); - TransferAppliedBarrier[Stream_i] = true; - } -} - -hipStream_t ur_queue_handle_t_::getNextComputeStream(uint32_t *StreamToken) { - uint32_t Stream_i; - uint32_t Token; - while (true) { - if (NumComputeStreams < ComputeStreams.size()) { - // the check above is for performance - so as not to lock mutex every time - std::lock_guard guard(ComputeStreamMutex); - // The second check is done after mutex is locked so other threads can not - // change NumComputeStreams after that - if (NumComputeStreams < ComputeStreams.size()) { - UR_CHECK_ERROR(hipStreamCreateWithFlags( - &ComputeStreams[NumComputeStreams++], Flags)); - } - } - Token = ComputeStreamIdx++; - Stream_i = Token % ComputeStreams.size(); - // if a stream has been reused before it was next selected round-robin - // fashion, we want to delay its next use and instead select another one - // that is more likely to have completed all the enqueued work. - if (DelayCompute[Stream_i]) { - DelayCompute[Stream_i] = false; - } else { - break; - } - } - if (StreamToken) { - *StreamToken = Token; - } - hipStream_t Res = ComputeStreams[Stream_i]; - computeStreamWaitForBarrierIfNeeded(Res, Stream_i); - return Res; -} - -hipStream_t ur_queue_handle_t_::getNextComputeStream( - uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, - ur_stream_quard &Guard, uint32_t *StreamToken) { - for (uint32_t i = 0; i < NumEventsInWaitList; i++) { - uint32_t Token = EventWaitList[i]->getComputeStreamToken(); - if (EventWaitList[i]->getQueue() == this && canReuseStream(Token)) { - std::unique_lock ComputeSyncGuard(ComputeStreamSyncMutex); - // redo the check after lock to avoid data races on - // LastSyncComputeStreams - if (canReuseStream(Token)) { - uint32_t Stream_i = Token % DelayCompute.size(); - DelayCompute[Stream_i] = true; - if (StreamToken) { - *StreamToken = Token; - } - Guard = ur_stream_quard{std::move(ComputeSyncGuard)}; - hipStream_t Res = EventWaitList[i]->getStream(); - computeStreamWaitForBarrierIfNeeded(Res, Stream_i); - return Res; - } - } - } - Guard = {}; - return getNextComputeStream(StreamToken); -} - -hipStream_t ur_queue_handle_t_::getNextTransferStream() { - if (TransferStreams.empty()) { // for example in in-order queue - return getNextComputeStream(); - } - if (NumTransferStreams < TransferStreams.size()) { - // the check above is for performance - so as not to lock mutex every time - std::lock_guard Guard(TransferStreamMutex); - // The second check is done after mutex is locked so other threads can not - // change NumTransferStreams after that - if (NumTransferStreams < TransferStreams.size()) { - UR_CHECK_ERROR(hipStreamCreateWithFlags( - &TransferStreams[NumTransferStreams++], Flags)); - } - } - uint32_t Stream_i = TransferStreamIdx++ % TransferStreams.size(); - hipStream_t Res = TransferStreams[Stream_i]; - transferStreamWaitForBarrierIfNeeded(Res, Stream_i); - return Res; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_queue_properties_t *pProps, ur_queue_handle_t *phQueue) { - try { - std::unique_ptr QueueImpl{nullptr}; - - if (hContext->getDevice() != hDevice) { - *phQueue = nullptr; - return UR_RESULT_ERROR_INVALID_DEVICE; - } - - unsigned int Flags = 0; - - const bool IsOutOfOrder = - pProps ? pProps->flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE - : false; - - std::vector ComputeHipStreams( - IsOutOfOrder ? ur_queue_handle_t_::DefaultNumComputeStreams : 1); - std::vector TransferHipStreams( - IsOutOfOrder ? ur_queue_handle_t_::DefaultNumTransferStreams : 0); - - QueueImpl = std::unique_ptr(new ur_queue_handle_t_{ - std::move(ComputeHipStreams), std::move(TransferHipStreams), hContext, - hDevice, Flags, pProps ? pProps->flags : 0}); - - *phQueue = QueueImpl.release(); - - return UR_RESULT_SUCCESS; - } catch (ur_result_t Err) { - return Err; - } catch (...) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } -} - -UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, - ur_queue_info_t propName, - size_t propValueSize, - void *pPropValue, - size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropSizeRet); - switch (propName) { - case UR_QUEUE_INFO_CONTEXT: - return ReturnValue(hQueue->Context); - case UR_QUEUE_INFO_DEVICE: - return ReturnValue(hQueue->Device); - case UR_QUEUE_INFO_REFERENCE_COUNT: - return ReturnValue(hQueue->getReferenceCount()); - case UR_QUEUE_INFO_FLAGS: - return ReturnValue(hQueue->URFlags); - case UR_QUEUE_INFO_EMPTY: { - bool IsReady = hQueue->allOf([](hipStream_t S) -> bool { - const hipError_t Ret = hipStreamQuery(S); - if (Ret == hipSuccess) - return true; - - try { - UR_CHECK_ERROR(Ret); - } catch (...) { - return false; - } - - return false; - }); - return ReturnValue(IsReady); - } - default: - break; - } - return {}; -} - -UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { - UR_ASSERT(hQueue->getReferenceCount() > 0, UR_RESULT_ERROR_INVALID_QUEUE); - - hQueue->incrementReferenceCount(); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { - if (hQueue->decrementReferenceCount() > 0) { - return UR_RESULT_SUCCESS; - } - - try { - std::unique_ptr QueueImpl(hQueue); - - if (!hQueue->backendHasOwnership()) - return UR_RESULT_SUCCESS; - - ScopedContext Active(hQueue->getContext()->getDevice()); - - hQueue->forEachStream([](hipStream_t S) { - UR_CHECK_ERROR(hipStreamSynchronize(S)); - UR_CHECK_ERROR(hipStreamDestroy(S)); - }); - - return UR_RESULT_SUCCESS; - } catch (ur_result_t Err) { - return Err; - } catch (...) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } -} - -UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { - // set default result to a negative result (avoid false-positve tests) - ur_result_t Result = UR_RESULT_ERROR_OUT_OF_RESOURCES; - - try { - - ScopedContext Active(hQueue->getContext()->getDevice()); - - hQueue->syncStreams([&Result](hipStream_t S) { - UR_CHECK_ERROR(hipStreamSynchronize(S)); - Result = UR_RESULT_SUCCESS; - }); - - } catch (ur_result_t Err) { - Result = Err; - } catch (...) { - Result = UR_RESULT_ERROR_OUT_OF_RESOURCES; - } - - return Result; -} - -// There is no HIP counterpart for queue flushing and we don't run into the -// same problem of having to flush cross-queue dependencies as some of the -// other plugins, so it can be left as no-op. -UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t) { - return UR_RESULT_SUCCESS; -} - -/// Gets the native HIP handle of a UR queue object -/// -/// \param[in] hQueue The UR queue to get the native HIP object of. -/// \param[out] phNativeQueue Set to the native handle of the UR queue object. -/// -/// \return UR_RESULT_SUCCESS -UR_APIEXPORT ur_result_t UR_APICALL -urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *, - ur_native_handle_t *phNativeQueue) { - ScopedContext Active(hQueue->getContext()->getDevice()); - *phNativeQueue = - reinterpret_cast(hQueue->getNextComputeStream()); - return UR_RESULT_SUCCESS; -} - -/// Created a UR queue object from a HIP queue handle. -/// NOTE: The created UR object doesn't takes ownership of the native handle. -/// -/// \param[in] hNativeQueue The native handle to create UR queue object from. -/// \param[in] hContext is the UR context of the queue. -/// \param[out] phQueue Set to the UR queue object created from native handle. -UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( - ur_native_handle_t hNativeQueue, ur_context_handle_t hContext, - ur_device_handle_t hDevice, const ur_queue_native_properties_t *pProperties, - ur_queue_handle_t *phQueue) { - (void)hDevice; - - unsigned int HIPFlags; - hipStream_t HIPStream = reinterpret_cast(hNativeQueue); - - UR_CHECK_ERROR(hipStreamGetFlags(HIPStream, &HIPFlags)); - - ur_queue_flags_t Flags = 0; - if (HIPFlags == hipStreamDefault) - Flags = UR_QUEUE_FLAG_USE_DEFAULT_STREAM; - else if (HIPFlags == hipStreamNonBlocking) - Flags = UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM; - else - detail::ur::die("Unknown hip stream"); - - std::vector ComputeHIPStreams(1, HIPStream); - std::vector TransferHIPStreams(0); - - // Create queue and set num_compute_streams to 1, as computeHIPStreams has - // valid stream - *phQueue = - new ur_queue_handle_t_{std::move(ComputeHIPStreams), - std::move(TransferHIPStreams), - hContext, - hContext->getDevice(), - HIPFlags, - Flags, - /*backend_owns*/ pProperties->isNativeHandleOwned}; - (*phQueue)->NumComputeStreams = 1; - - return UR_RESULT_SUCCESS; -} diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/queue.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/queue.hpp deleted file mode 100644 index 8b3c049638623..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/queue.hpp +++ /dev/null @@ -1,242 +0,0 @@ -//===--------- queue.hpp - HIP Adapter ------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -#include "common.hpp" - -using ur_stream_quard = std::unique_lock; - -/// UR queue mapping on to hipStream_t objects. -/// -struct ur_queue_handle_t_ { - using native_type = hipStream_t; - static constexpr int DefaultNumComputeStreams = 64; - static constexpr int DefaultNumTransferStreams = 16; - - std::vector ComputeStreams; - std::vector TransferStreams; - // DelayCompute keeps track of which streams have been recently reused and - // their next use should be delayed. If a stream has been recently reused it - // will be skipped the next time it would be selected round-robin style. When - // skipped, its delay flag is cleared. - std::vector DelayCompute; - // keep track of which streams have applied barrier - std::vector ComputeAppliedBarrier; - std::vector TransferAppliedBarrier; - ur_context_handle_t Context; - ur_device_handle_t Device; - hipEvent_t BarrierEvent = nullptr; - hipEvent_t BarrierTmpEvent = nullptr; - std::atomic_uint32_t RefCount; - std::atomic_uint32_t EventCount; - std::atomic_uint32_t ComputeStreamIdx; - std::atomic_uint32_t TransferStreamIdx; - unsigned int NumComputeStreams; - unsigned int NumTransferStreams; - unsigned int LastSyncComputeStreams; - unsigned int LastSyncTransferStreams; - unsigned int Flags; - ur_queue_flags_t URFlags; - // When ComputeStreamSyncMutex and ComputeStreamMutex both need to be - // locked at the same time, ComputeStreamSyncMutex should be locked first - // to avoid deadlocks - std::mutex ComputeStreamSyncMutex; - std::mutex ComputeStreamMutex; - std::mutex TransferStreamMutex; - std::mutex BarrierMutex; - bool HasOwnership; - - ur_queue_handle_t_(std::vector &&ComputeStreams, - std::vector &&TransferStreams, - ur_context_handle_t Context, ur_device_handle_t Device, - unsigned int Flags, ur_queue_flags_t URFlags, - bool BackendOwns = true) - : ComputeStreams{std::move(ComputeStreams)}, - TransferStreams{std::move(TransferStreams)}, - DelayCompute(this->ComputeStreams.size(), false), - ComputeAppliedBarrier(this->ComputeStreams.size()), - TransferAppliedBarrier(this->TransferStreams.size()), Context{Context}, - Device{Device}, RefCount{1}, EventCount{0}, ComputeStreamIdx{0}, - TransferStreamIdx{0}, NumComputeStreams{0}, NumTransferStreams{0}, - LastSyncComputeStreams{0}, LastSyncTransferStreams{0}, Flags(Flags), - URFlags(URFlags), HasOwnership{BackendOwns} { - urContextRetain(Context); - urDeviceRetain(Device); - } - - ~ur_queue_handle_t_() { - urContextRelease(Context); - urDeviceRelease(Device); - } - - void computeStreamWaitForBarrierIfNeeded(hipStream_t Stream, - uint32_t Stream_i); - void transferStreamWaitForBarrierIfNeeded(hipStream_t Stream, - uint32_t Stream_i); - - // getNextCompute/TransferStream() functions return streams from - // appropriate pools in round-robin fashion - native_type getNextComputeStream(uint32_t *StreamToken = nullptr); - // this overload tries select a stream that was used by one of dependencies. - // If that is not possible returns a new stream. If a stream is reused it - // returns a lock that needs to remain locked as long as the stream is in use - native_type getNextComputeStream(uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList, - ur_stream_quard &Guard, - uint32_t *StreamToken = nullptr); - native_type getNextTransferStream(); - native_type get() { return getNextComputeStream(); }; - - bool hasBeenSynchronized(uint32_t StreamToken) { - // stream token not associated with one of the compute streams - if (StreamToken == std::numeric_limits::max()) { - return false; - } - return LastSyncComputeStreams > StreamToken; - } - - bool canReuseStream(uint32_t StreamToken) { - // stream token not associated with one of the compute streams - if (StreamToken == std::numeric_limits::max()) { - return false; - } - // If the command represented by the stream token was not the last command - // enqueued to the stream we can not reuse the stream - we need to allow for - // commands enqueued after it and the one we are about to enqueue to run - // concurrently - bool IsLastCommand = - (ComputeStreamIdx - StreamToken) <= ComputeStreams.size(); - // If there was a barrier enqueued to the queue after the command - // represented by the stream token we should not reuse the stream, as we can - // not take that stream into account for the bookkeeping for the next - // barrier - such a stream would not be synchronized with. Performance-wise - // it does not matter that we do not reuse the stream, as the work - // represented by the stream token is guaranteed to be complete by the - // barrier before any work we are about to enqueue to the stream will start, - // so the event does not need to be synchronized with. - return IsLastCommand && !hasBeenSynchronized(StreamToken); - } - - template bool allOf(T &&F) { - { - std::lock_guard ComputeGuard(ComputeStreamMutex); - unsigned int End = std::min( - static_cast(ComputeStreams.size()), NumComputeStreams); - if (!std::all_of(ComputeStreams.begin(), ComputeStreams.begin() + End, F)) - return false; - } - { - std::lock_guard TransferGuard(TransferStreamMutex); - unsigned int End = - std::min(static_cast(TransferStreams.size()), - NumTransferStreams); - if (!std::all_of(TransferStreams.begin(), TransferStreams.begin() + End, - F)) - return false; - } - return true; - } - - template void forEachStream(T &&F) { - { - std::lock_guard ComputeGuard(ComputeStreamMutex); - unsigned int End = std::min( - static_cast(ComputeStreams.size()), NumComputeStreams); - for (unsigned int i = 0; i < End; i++) { - F(ComputeStreams[i]); - } - } - { - std::lock_guard TransferGuard(TransferStreamMutex); - unsigned int End = - std::min(static_cast(TransferStreams.size()), - NumTransferStreams); - for (unsigned int i = 0; i < End; i++) { - F(TransferStreams[i]); - } - } - } - - template void syncStreams(T &&F) { - auto SyncCompute = [&F, &Streams = ComputeStreams, &Delay = DelayCompute]( - unsigned int Start, unsigned int Stop) { - for (unsigned int i = Start; i < Stop; i++) { - F(Streams[i]); - Delay[i] = false; - } - }; - auto SyncTransfer = [&F, &Streams = TransferStreams](unsigned int Start, - unsigned int Stop) { - for (unsigned int i = Start; i < Stop; i++) { - F(Streams[i]); - } - }; - { - unsigned int Size = static_cast(ComputeStreams.size()); - std::lock_guard ComputeSyncGuard(ComputeStreamSyncMutex); - std::lock_guard ComputeGuard(ComputeStreamMutex); - unsigned int Start = LastSyncComputeStreams; - unsigned int End = NumComputeStreams < Size ? NumComputeStreams - : ComputeStreamIdx.load(); - if (End - Start >= Size) { - SyncCompute(0, Size); - } else { - Start %= Size; - End %= Size; - if (Start < End) { - SyncCompute(Start, End); - } else { - SyncCompute(Start, Size); - SyncCompute(0, End); - } - } - if (ResetUsed) { - LastSyncComputeStreams = End; - } - } - { - unsigned int Size = static_cast(TransferStreams.size()); - if (!Size) { - return; - } - std::lock_guard TransferGuard(TransferStreamMutex); - unsigned int Start = LastSyncTransferStreams; - unsigned int End = NumTransferStreams < Size ? NumTransferStreams - : TransferStreamIdx.load(); - if (End - Start >= Size) { - SyncTransfer(0, Size); - } else { - Start %= Size; - End %= Size; - if (Start < End) { - SyncTransfer(Start, End); - } else { - SyncTransfer(Start, Size); - SyncTransfer(0, End); - } - } - if (ResetUsed) { - LastSyncTransferStreams = End; - } - } - } - - ur_context_handle_t getContext() const { return Context; }; - - ur_device_handle_t getDevice() const { return Device; }; - - uint32_t incrementReferenceCount() noexcept { return ++RefCount; } - - uint32_t decrementReferenceCount() noexcept { return --RefCount; } - - uint32_t getReferenceCount() const noexcept { return RefCount; } - - uint32_t getNextEventId() noexcept { return ++EventCount; } - - bool backendHasOwnership() const noexcept { return HasOwnership; } -}; diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/sampler.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/sampler.cpp deleted file mode 100644 index e6a92be054f81..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/sampler.cpp +++ /dev/null @@ -1,80 +0,0 @@ -//===--------- sampler.cpp - HIP Adapter ----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "sampler.hpp" -#include "common.hpp" - -ur_result_t urSamplerCreate(ur_context_handle_t hContext, - const ur_sampler_desc_t *pDesc, - ur_sampler_handle_t *phSampler) { - std::unique_ptr RetImplSampl{ - new ur_sampler_handle_t_(hContext)}; - - if (pDesc && pDesc->stype == UR_STRUCTURE_TYPE_SAMPLER_DESC) { - RetImplSampl->Props |= pDesc->normalizedCoords; - RetImplSampl->Props |= pDesc->filterMode << 1; - RetImplSampl->Props |= pDesc->addressingMode << 2; - } else { - // Set default values - RetImplSampl->Props |= true; // Normalized Coords - RetImplSampl->Props |= UR_SAMPLER_ADDRESSING_MODE_CLAMP << 2; - } - - *phSampler = RetImplSampl.release(); - return UR_RESULT_SUCCESS; -} - -ur_result_t urSamplerGetInfo(ur_sampler_handle_t hSampler, - ur_sampler_info_t propName, size_t propValueSize, - void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropSizeRet); - - switch (propName) { - case UR_SAMPLER_INFO_REFERENCE_COUNT: - return ReturnValue(hSampler->getReferenceCount()); - case UR_SAMPLER_INFO_CONTEXT: - return ReturnValue(hSampler->Context); - case UR_SAMPLER_INFO_NORMALIZED_COORDS: { - bool NormCoordsProp = static_cast(hSampler->Props); - return ReturnValue(NormCoordsProp); - } - case UR_SAMPLER_INFO_FILTER_MODE: { - auto FilterProp = - static_cast((hSampler->Props >> 1) & 0x1); - return ReturnValue(FilterProp); - } - case UR_SAMPLER_INFO_ADDRESSING_MODE: { - auto AddressingProp = - static_cast(hSampler->Props >> 2); - return ReturnValue(AddressingProp); - } - default: - return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; - } - return {}; -} - -ur_result_t urSamplerRetain(ur_sampler_handle_t hSampler) { - hSampler->incrementReferenceCount(); - return UR_RESULT_SUCCESS; -} - -ur_result_t urSamplerRelease(ur_sampler_handle_t hSampler) { - // double delete or someone is messing with the ref count. - // either way, cannot safely proceed. - detail::ur::assertion( - hSampler->getReferenceCount() != 0, - "Reference count overflow detected in urSamplerRelease."); - - // decrement ref count. If it is 0, delete the sampler. - if (hSampler->decrementReferenceCount() == 0) { - delete hSampler; - } - - return UR_RESULT_SUCCESS; -} diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/sampler.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/sampler.hpp deleted file mode 100644 index 0226ae9b750cd..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/sampler.hpp +++ /dev/null @@ -1,31 +0,0 @@ -//===--------- sampler.hpp - HIP Adapter ----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -#include "context.hpp" - -/// Implementation of samplers for HIP -/// -/// Sampler property layout: -/// | 31 30 ... 6 5 | 4 3 2 | 1 | 0 | -/// | N/A | addressing mode | fiter mode | normalize coords | -struct ur_sampler_handle_t_ { - std::atomic_uint32_t RefCount; - uint32_t Props; - ur_context_handle_t Context; - - ur_sampler_handle_t_(ur_context_handle_t Context) - : RefCount(1), Props(0), Context(Context) {} - - uint32_t incrementReferenceCount() noexcept { return ++RefCount; } - - uint32_t decrementReferenceCount() noexcept { return --RefCount; } - - uint32_t getReferenceCount() const noexcept { return RefCount; } -}; diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/ur_interface_loader.cpp deleted file mode 100644 index 3bea2c7c427ee..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/ur_interface_loader.cpp +++ /dev/null @@ -1,310 +0,0 @@ -//===--------- ur_interface_loader.cpp - Unified Runtime -----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include - -namespace { - -// TODO - this is a duplicate of what is in the L0 plugin -// We should move this to somewhere common -ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) { - if (pDdiTable == nullptr) { - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - } - // Pre 1.0 we enforce that loader and adapter must have the same version. - // Post 1.0 only a major version match should be required. - if (version != UR_API_VERSION_CURRENT) { - return UR_RESULT_ERROR_UNSUPPORTED_VERSION; - } - return UR_RESULT_SUCCESS; -} -} // namespace - -#if defined(__cplusplus) -extern "C" { -#endif - -UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( - ur_api_version_t version, ur_platform_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; - } - pDdiTable->pfnCreateWithNativeHandle = nullptr; - pDdiTable->pfnGet = urPlatformGet; - pDdiTable->pfnGetApiVersion = urPlatformGetApiVersion; - pDdiTable->pfnGetInfo = urPlatformGetInfo; - pDdiTable->pfnGetNativeHandle = nullptr; - pDdiTable->pfnGetBackendOption = urPlatformGetBackendOption; - return UR_RESULT_SUCCESS; -} - -UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( - ur_api_version_t version, ur_context_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; - } - pDdiTable->pfnCreate = urContextCreate; - pDdiTable->pfnCreateWithNativeHandle = urContextCreateWithNativeHandle; - pDdiTable->pfnGetInfo = urContextGetInfo; - pDdiTable->pfnGetNativeHandle = urContextGetNativeHandle; - pDdiTable->pfnRelease = urContextRelease; - pDdiTable->pfnRetain = urContextRetain; - pDdiTable->pfnSetExtendedDeleter = urContextSetExtendedDeleter; - return UR_RESULT_SUCCESS; -} - -UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( - ur_api_version_t version, ur_event_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; - } - pDdiTable->pfnCreateWithNativeHandle = urEventCreateWithNativeHandle; - pDdiTable->pfnGetInfo = urEventGetInfo; - pDdiTable->pfnGetNativeHandle = urEventGetNativeHandle; - pDdiTable->pfnGetProfilingInfo = urEventGetProfilingInfo; - pDdiTable->pfnRelease = urEventRelease; - pDdiTable->pfnRetain = urEventRetain; - pDdiTable->pfnSetCallback = urEventSetCallback; - pDdiTable->pfnWait = urEventWait; - return UR_RESULT_SUCCESS; -} - -UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( - ur_api_version_t version, ur_program_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; - } - pDdiTable->pfnBuild = urProgramBuild; - pDdiTable->pfnCompile = urProgramCompile; - pDdiTable->pfnCreateWithBinary = urProgramCreateWithBinary; - pDdiTable->pfnCreateWithIL = urProgramCreateWithIL; - pDdiTable->pfnCreateWithNativeHandle = urProgramCreateWithNativeHandle; - pDdiTable->pfnGetBuildInfo = urProgramGetBuildInfo; - pDdiTable->pfnGetFunctionPointer = urProgramGetFunctionPointer; - pDdiTable->pfnGetInfo = urProgramGetInfo; - pDdiTable->pfnGetNativeHandle = urProgramGetNativeHandle; - pDdiTable->pfnLink = urProgramLink; - pDdiTable->pfnRelease = urProgramRelease; - pDdiTable->pfnRetain = urProgramRetain; - pDdiTable->pfnSetSpecializationConstants = - urProgramSetSpecializationConstants; - return UR_RESULT_SUCCESS; -} - -UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( - ur_api_version_t version, ur_kernel_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; - } - pDdiTable->pfnCreate = urKernelCreate; - pDdiTable->pfnCreateWithNativeHandle = urKernelCreateWithNativeHandle; - pDdiTable->pfnGetGroupInfo = urKernelGetGroupInfo; - pDdiTable->pfnGetInfo = urKernelGetInfo; - pDdiTable->pfnGetNativeHandle = urKernelGetNativeHandle; - pDdiTable->pfnGetSubGroupInfo = urKernelGetSubGroupInfo; - pDdiTable->pfnRelease = urKernelRelease; - pDdiTable->pfnRetain = urKernelRetain; - pDdiTable->pfnSetArgLocal = urKernelSetArgLocal; - pDdiTable->pfnSetArgMemObj = urKernelSetArgMemObj; - pDdiTable->pfnSetArgPointer = urKernelSetArgPointer; - pDdiTable->pfnSetArgSampler = urKernelSetArgSampler; - pDdiTable->pfnSetArgValue = urKernelSetArgValue; - pDdiTable->pfnSetExecInfo = urKernelSetExecInfo; - pDdiTable->pfnSetSpecializationConstants = nullptr; - return UR_RESULT_SUCCESS; -} - -UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( - ur_api_version_t version, ur_sampler_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; - } - pDdiTable->pfnCreate = urSamplerCreate; - pDdiTable->pfnCreateWithNativeHandle = nullptr; - pDdiTable->pfnGetInfo = urSamplerGetInfo; - pDdiTable->pfnGetNativeHandle = nullptr; - pDdiTable->pfnRelease = urSamplerRelease; - pDdiTable->pfnRetain = urSamplerRetain; - return UR_RESULT_SUCCESS; -} - -UR_DLLEXPORT ur_result_t UR_APICALL -urGetMemProcAddrTable(ur_api_version_t version, ur_mem_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; - } - pDdiTable->pfnBufferCreate = urMemBufferCreate; - pDdiTable->pfnBufferPartition = urMemBufferPartition; - pDdiTable->pfnBufferCreateWithNativeHandle = - urMemBufferCreateWithNativeHandle; - pDdiTable->pfnImageCreateWithNativeHandle = urMemImageCreateWithNativeHandle; - pDdiTable->pfnGetInfo = urMemGetInfo; - pDdiTable->pfnGetNativeHandle = urMemGetNativeHandle; - pDdiTable->pfnImageCreate = urMemImageCreate; - pDdiTable->pfnImageGetInfo = urMemImageGetInfo; - pDdiTable->pfnRelease = urMemRelease; - pDdiTable->pfnRetain = urMemRetain; - return UR_RESULT_SUCCESS; -} - -UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( - ur_api_version_t version, ur_enqueue_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; - } - pDdiTable->pfnDeviceGlobalVariableRead = urEnqueueDeviceGlobalVariableRead; - pDdiTable->pfnDeviceGlobalVariableWrite = urEnqueueDeviceGlobalVariableWrite; - pDdiTable->pfnEventsWait = urEnqueueEventsWait; - pDdiTable->pfnEventsWaitWithBarrier = urEnqueueEventsWaitWithBarrier; - pDdiTable->pfnKernelLaunch = urEnqueueKernelLaunch; - pDdiTable->pfnMemBufferCopy = urEnqueueMemBufferCopy; - pDdiTable->pfnMemBufferCopyRect = urEnqueueMemBufferCopyRect; - pDdiTable->pfnMemBufferFill = urEnqueueMemBufferFill; - pDdiTable->pfnMemBufferMap = urEnqueueMemBufferMap; - pDdiTable->pfnMemBufferRead = urEnqueueMemBufferRead; - pDdiTable->pfnMemBufferReadRect = urEnqueueMemBufferReadRect; - pDdiTable->pfnMemBufferWrite = urEnqueueMemBufferWrite; - pDdiTable->pfnMemBufferWriteRect = urEnqueueMemBufferWriteRect; - pDdiTable->pfnMemImageCopy = urEnqueueMemImageCopy; - pDdiTable->pfnMemImageRead = urEnqueueMemImageRead; - pDdiTable->pfnMemImageWrite = urEnqueueMemImageWrite; - pDdiTable->pfnMemUnmap = urEnqueueMemUnmap; - pDdiTable->pfnUSMFill2D = urEnqueueUSMFill2D; - pDdiTable->pfnUSMFill = urEnqueueUSMFill; - pDdiTable->pfnUSMAdvise = urEnqueueUSMAdvise; - pDdiTable->pfnUSMMemcpy2D = urEnqueueUSMMemcpy2D; - pDdiTable->pfnUSMMemcpy = urEnqueueUSMMemcpy; - pDdiTable->pfnUSMPrefetch = urEnqueueUSMPrefetch; - pDdiTable->pfnReadHostPipe = urEnqueueReadHostPipe; - pDdiTable->pfnWriteHostPipe = urEnqueueWriteHostPipe; - return UR_RESULT_SUCCESS; -} - -UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( - ur_api_version_t version, ur_global_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; - } - - pDdiTable->pfnInit = urInit; - pDdiTable->pfnTearDown = urTearDown; - pDdiTable->pfnAdapterGet = urAdapterGet; - pDdiTable->pfnAdapterGetInfo = urAdapterGetInfo; - pDdiTable->pfnAdapterGetLastError = urAdapterGetLastError; - pDdiTable->pfnAdapterRelease = urAdapterRelease; - pDdiTable->pfnAdapterRetain = urAdapterRetain; - - return UR_RESULT_SUCCESS; -} - -UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( - ur_api_version_t version, ur_queue_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; - } - pDdiTable->pfnCreate = urQueueCreate; - pDdiTable->pfnCreateWithNativeHandle = urQueueCreateWithNativeHandle; - pDdiTable->pfnFinish = urQueueFinish; - pDdiTable->pfnFlush = urQueueFlush; - pDdiTable->pfnGetInfo = urQueueGetInfo; - pDdiTable->pfnGetNativeHandle = urQueueGetNativeHandle; - pDdiTable->pfnRelease = urQueueRelease; - pDdiTable->pfnRetain = urQueueRetain; - return UR_RESULT_SUCCESS; -} - -UR_DLLEXPORT ur_result_t UR_APICALL -urGetUSMProcAddrTable(ur_api_version_t version, ur_usm_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; - } - pDdiTable->pfnDeviceAlloc = urUSMDeviceAlloc; - pDdiTable->pfnFree = urUSMFree; - pDdiTable->pfnGetMemAllocInfo = urUSMGetMemAllocInfo; - pDdiTable->pfnHostAlloc = urUSMHostAlloc; - pDdiTable->pfnPoolCreate = urUSMPoolCreate; - pDdiTable->pfnPoolRetain = urUSMPoolRetain; - pDdiTable->pfnPoolRelease = urUSMPoolRelease; - pDdiTable->pfnPoolGetInfo = urUSMPoolGetInfo; - pDdiTable->pfnSharedAlloc = urUSMSharedAlloc; - return UR_RESULT_SUCCESS; -} - -UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( - ur_api_version_t version, ur_device_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; - } - pDdiTable->pfnCreateWithNativeHandle = urDeviceCreateWithNativeHandle; - pDdiTable->pfnGet = urDeviceGet; - pDdiTable->pfnGetGlobalTimestamps = urDeviceGetGlobalTimestamps; - pDdiTable->pfnGetInfo = urDeviceGetInfo; - pDdiTable->pfnGetNativeHandle = urDeviceGetNativeHandle; - pDdiTable->pfnPartition = urDevicePartition; - pDdiTable->pfnRelease = urDeviceRelease; - pDdiTable->pfnRetain = urDeviceRetain; - pDdiTable->pfnSelectBinary = urDeviceSelectBinary; - return UR_RESULT_SUCCESS; -} - -UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( - ur_api_version_t version, ur_command_buffer_exp_dditable_t *pDdiTable) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; - } - pDdiTable->pfnCreateExp = urCommandBufferCreateExp; - pDdiTable->pfnRetainExp = urCommandBufferRetainExp; - pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp; - pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; - pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; - pDdiTable->pfnAppendMemcpyUSMExp = urCommandBufferAppendMemcpyUSMExp; - pDdiTable->pfnAppendMembufferCopyExp = urCommandBufferAppendMembufferCopyExp; - pDdiTable->pfnAppendMembufferCopyRectExp = - urCommandBufferAppendMembufferCopyRectExp; - pDdiTable->pfnAppendMembufferReadExp = urCommandBufferAppendMembufferReadExp; - pDdiTable->pfnAppendMembufferReadRectExp = - urCommandBufferAppendMembufferReadRectExp; - pDdiTable->pfnAppendMembufferWriteExp = - urCommandBufferAppendMembufferWriteExp; - pDdiTable->pfnAppendMembufferWriteRectExp = - urCommandBufferAppendMembufferWriteRectExp; - pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; - - return retVal; -} - -UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( - ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; - } - pDdiTable->pfnEnablePeerAccessExp = urUsmP2PEnablePeerAccessExp; - pDdiTable->pfnDisablePeerAccessExp = urUsmP2PDisablePeerAccessExp; - pDdiTable->pfnPeerAccessGetInfoExp = urUsmP2PPeerAccessGetInfoExp; - - return retVal; -} - -#if defined(__cplusplus) -} // extern "C" -#endif diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/usm.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/usm.cpp deleted file mode 100644 index c4941ffce0885..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/usm.cpp +++ /dev/null @@ -1,462 +0,0 @@ -//===--------- usm.cpp - HIP Adapter --------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -#include "adapter.hpp" -#include "common.hpp" -#include "context.hpp" -#include "device.hpp" -#include "platform.hpp" -#include "usm.hpp" - -/// USM: Implements USM Host allocations using HIP Pinned Memory -UR_APIEXPORT ur_result_t UR_APICALL -urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, - ur_usm_pool_handle_t hPool, size_t size, void **ppMem) { - uint32_t alignment; - UR_ASSERT(checkUSMAlignment(alignment, pUSMDesc), - UR_RESULT_ERROR_INVALID_VALUE); - - if (!hPool) { - return USMHostAllocImpl(ppMem, hContext, nullptr, size, alignment); - } - - return umfPoolMallocHelper(hPool, ppMem, size, alignment); -} - -/// USM: Implements USM device allocations using a normal HIP device pointer -UR_APIEXPORT ur_result_t UR_APICALL -urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t hPool, - size_t size, void **ppMem) { - uint32_t alignment; - UR_ASSERT(checkUSMAlignment(alignment, pUSMDesc), - UR_RESULT_ERROR_INVALID_VALUE); - - if (!hPool) { - return USMDeviceAllocImpl(ppMem, hContext, hDevice, nullptr, size, - alignment); - } - - return umfPoolMallocHelper(hPool, ppMem, size, alignment); -} - -/// USM: Implements USM Shared allocations using HIP Managed Memory -UR_APIEXPORT ur_result_t UR_APICALL -urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t hPool, - size_t size, void **ppMem) { - uint32_t alignment; - UR_ASSERT(checkUSMAlignment(alignment, pUSMDesc), - UR_RESULT_ERROR_INVALID_VALUE); - - if (!hPool) { - return USMSharedAllocImpl(ppMem, hContext, hDevice, nullptr, nullptr, size, - alignment); - } - - return umfPoolMallocHelper(hPool, ppMem, size, alignment); -} - -UR_APIEXPORT ur_result_t UR_APICALL USMFreeImpl(ur_context_handle_t hContext, - void *pMem) { - ur_result_t Result = UR_RESULT_SUCCESS; - try { - ScopedContext Active(hContext->getDevice()); - hipPointerAttribute_t hipPointerAttributeType; - UR_CHECK_ERROR(hipPointerGetAttributes(&hipPointerAttributeType, pMem)); - unsigned int Type = hipPointerAttributeType.memoryType; - UR_ASSERT(Type == hipMemoryTypeDevice || Type == hipMemoryTypeHost, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); - if (Type == hipMemoryTypeDevice) { - UR_CHECK_ERROR(hipFree(pMem)); - } - if (Type == hipMemoryTypeHost) { - UR_CHECK_ERROR(hipHostFree(pMem)); - } - } catch (ur_result_t Error) { - Result = Error; - } - return Result; -} - -/// USM: Frees the given USM pointer associated with the context. -UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, - void *pMem) { - if (auto Pool = umfPoolByPtr(pMem)) { - return umf::umf2urResult(umfPoolFree(Pool, pMem)); - } else { - return USMFreeImpl(hContext, pMem); - } -} - -ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t Context, - ur_device_handle_t, ur_usm_device_mem_flags_t *, - size_t Size, - [[maybe_unused]] uint32_t Alignment) { - try { - ScopedContext Active(Context->getDevice()); - UR_CHECK_ERROR(hipMalloc(ResultPtr, Size)); - } catch (ur_result_t Err) { - return Err; - } - - assert(checkUSMImplAlignment(Alignment, ResultPtr)); - return UR_RESULT_SUCCESS; -} - -ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t Context, - ur_device_handle_t, ur_usm_host_mem_flags_t *, - ur_usm_device_mem_flags_t *, size_t Size, - [[maybe_unused]] uint32_t Alignment) { - try { - ScopedContext Active(Context->getDevice()); - UR_CHECK_ERROR(hipMallocManaged(ResultPtr, Size, hipMemAttachGlobal)); - } catch (ur_result_t Err) { - return Err; - } - - assert(checkUSMImplAlignment(Alignment, ResultPtr)); - return UR_RESULT_SUCCESS; -} - -ur_result_t USMHostAllocImpl(void **ResultPtr, ur_context_handle_t Context, - ur_usm_host_mem_flags_t *, size_t Size, - [[maybe_unused]] uint32_t Alignment) { - try { - ScopedContext Active(Context->getDevice()); - UR_CHECK_ERROR(hipHostMalloc(ResultPtr, Size)); - } catch (ur_result_t Err) { - return Err; - } - - assert(checkUSMImplAlignment(Alignment, ResultPtr)); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, - ur_usm_alloc_info_t propName, size_t propValueSize, - void *pPropValue, size_t *pPropValueSizeRet) { - ur_result_t Result = UR_RESULT_SUCCESS; - hipPointerAttribute_t hipPointerAttributeType; - - UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet); - - try { - ScopedContext Active(hContext->getDevice()); - switch (propName) { - case UR_USM_ALLOC_INFO_TYPE: { - unsigned int Value; - // do not throw if hipPointerGetAttribute returns hipErrorInvalidValue - hipError_t Ret = hipPointerGetAttributes(&hipPointerAttributeType, pMem); - if (Ret == hipErrorInvalidValue) { - // pointer not known to the HIP subsystem - return ReturnValue(UR_USM_TYPE_UNKNOWN); - } - // Direct usage of the function, instead of UR_CHECK_ERROR, so we can get - // the line offset. - checkErrorUR(Ret, __func__, __LINE__ - 5, __FILE__); - Value = hipPointerAttributeType.isManaged; - if (Value) { - // pointer to managed memory - return ReturnValue(UR_USM_TYPE_SHARED); - } - UR_CHECK_ERROR(hipPointerGetAttributes(&hipPointerAttributeType, pMem)); - Value = hipPointerAttributeType.memoryType; - UR_ASSERT(Value == hipMemoryTypeDevice || Value == hipMemoryTypeHost, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); - if (Value == hipMemoryTypeDevice) { - // pointer to device memory - return ReturnValue(UR_USM_TYPE_DEVICE); - } - if (Value == hipMemoryTypeHost) { - // pointer to host memory - return ReturnValue(UR_USM_TYPE_HOST); - } - // should never get here -#ifdef _MSC_VER - __assume(0); -#else - __builtin_unreachable(); -#endif - return ReturnValue(UR_USM_TYPE_UNKNOWN); - } - case UR_USM_ALLOC_INFO_BASE_PTR: - case UR_USM_ALLOC_INFO_SIZE: - return UR_RESULT_ERROR_INVALID_VALUE; - case UR_USM_ALLOC_INFO_DEVICE: { - // get device index associated with this pointer - UR_CHECK_ERROR(hipPointerGetAttributes(&hipPointerAttributeType, pMem)); - - int DeviceIdx = hipPointerAttributeType.device; - - // currently each device is in its own platform, so find the platform at - // the same index - std::vector Platforms; - Platforms.resize(DeviceIdx + 1); - ur_adapter_handle_t AdapterHandle = &adapter; - Result = urPlatformGet(&AdapterHandle, 1, DeviceIdx + 1, Platforms.data(), - nullptr); - - // get the device from the platform - ur_device_handle_t Device = Platforms[DeviceIdx]->Devices[0].get(); - return ReturnValue(Device); - } - case UR_USM_ALLOC_INFO_POOL: { - auto UMFPool = umfPoolByPtr(pMem); - if (!UMFPool) { - return UR_RESULT_ERROR_INVALID_VALUE; - } - ur_usm_pool_handle_t Pool = hContext->getOwningURPool(UMFPool); - if (!Pool) { - return UR_RESULT_ERROR_INVALID_VALUE; - } - return ReturnValue(Pool); - } - default: - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - } catch (ur_result_t Error) { - Result = Error; - } - return Result; -} - -UR_APIEXPORT ur_result_t UR_APICALL urUSMImportExp(ur_context_handle_t Context, - void *HostPtr, size_t Size) { - UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT); - UR_ASSERT(!HostPtr, UR_RESULT_ERROR_INVALID_VALUE); - UR_ASSERT(Size > 0, UR_RESULT_ERROR_INVALID_VALUE); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urUSMReleaseExp(ur_context_handle_t Context, - void *HostPtr) { - UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT); - UR_ASSERT(!HostPtr, UR_RESULT_ERROR_INVALID_VALUE); - return UR_RESULT_SUCCESS; -} - -umf_result_t USMMemoryProvider::initialize(ur_context_handle_t Ctx, - ur_device_handle_t Dev) { - Context = Ctx; - Device = Dev; - // There isn't a way to query this in cuda, and there isn't much info on - // cuda's approach to alignment or transfer granularity between host and - // device. Within UMF this is only used to influence alignment, and since we - // discard that in our alloc implementations it seems we can safely ignore - // this as well, for now. - MinPageSize = 0; - - return UMF_RESULT_SUCCESS; -} - -enum umf_result_t USMMemoryProvider::alloc(size_t Size, size_t Align, - void **Ptr) { - auto Res = allocateImpl(Ptr, Size, Align); - if (Res != UR_RESULT_SUCCESS) { - getLastStatusRef() = Res; - return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; - } - - return UMF_RESULT_SUCCESS; -} - -enum umf_result_t USMMemoryProvider::free(void *Ptr, size_t Size) { - (void)Size; - - auto Res = USMFreeImpl(Context, Ptr); - if (Res != UR_RESULT_SUCCESS) { - getLastStatusRef() = Res; - return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; - } - - return UMF_RESULT_SUCCESS; -} - -void USMMemoryProvider::get_last_native_error(const char **ErrMsg, - int32_t *ErrCode) { - (void)ErrMsg; - *ErrCode = static_cast(getLastStatusRef()); -} - -umf_result_t USMMemoryProvider::get_min_page_size(void *Ptr, size_t *PageSize) { - (void)Ptr; - *PageSize = MinPageSize; - - return UMF_RESULT_SUCCESS; -} - -ur_result_t USMSharedMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) { - return USMSharedAllocImpl(ResultPtr, Context, Device, nullptr, nullptr, Size, - Alignment); -} - -ur_result_t USMDeviceMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) { - return USMDeviceAllocImpl(ResultPtr, Context, Device, nullptr, Size, - Alignment); -} - -ur_result_t USMHostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) { - return USMHostAllocImpl(ResultPtr, Context, nullptr, Size, Alignment); -} - -ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, - ur_usm_pool_desc_t *PoolDesc) - : Context(Context) { - const void *pNext = PoolDesc->pNext; - while (pNext != nullptr) { - const ur_base_desc_t *BaseDesc = static_cast(pNext); - switch (BaseDesc->stype) { - case UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC: { - const ur_usm_pool_limits_desc_t *Limits = - reinterpret_cast(BaseDesc); - for (auto &config : DisjointPoolConfigs.Configs) { - config.MaxPoolableSize = Limits->maxPoolableSize; - config.SlabMinSize = Limits->minDriverAllocSize; - } - break; - } - default: { - throw UsmAllocationException(UR_RESULT_ERROR_INVALID_ARGUMENT); - } - } - pNext = BaseDesc->pNext; - } - - auto MemProvider = - umf::memoryProviderMakeUnique(Context, nullptr) - .second; - - HostMemPool = - umf::poolMakeUnique( - {std::move(MemProvider)}, - this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Host]) - .second; - - auto Device = Context->DeviceId; - MemProvider = - umf::memoryProviderMakeUnique(Context, Device) - .second; - DeviceMemPool = - umf::poolMakeUnique( - {std::move(MemProvider)}, - this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Device]) - .second; - - MemProvider = - umf::memoryProviderMakeUnique(Context, Device) - .second; - SharedMemPool = - umf::poolMakeUnique( - {std::move(MemProvider)}, - this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Shared]) - .second; - Context->addPool(this); -} - -bool ur_usm_pool_handle_t_::hasUMFPool(umf_memory_pool_t *umf_pool) { - return DeviceMemPool.get() == umf_pool || SharedMemPool.get() == umf_pool || - HostMemPool.get() == umf_pool; -} - -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( - ur_context_handle_t Context, ///< [in] handle of the context object - ur_usm_pool_desc_t - *PoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with - ///< ::ur_usm_pool_limits_desc_t - ur_usm_pool_handle_t *Pool ///< [out] pointer to USM memory pool -) { - // Without pool tracking we can't free pool allocations. -#ifdef UMF_ENABLE_POOL_TRACKING - if (PoolDesc->flags & UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - } - try { - *Pool = reinterpret_cast( - new ur_usm_pool_handle_t_(Context, PoolDesc)); - } catch (const UsmAllocationException &Ex) { - return Ex.getError(); - } - return UR_RESULT_SUCCESS; -#else - std::ignore = Context; - std::ignore = PoolDesc; - std::ignore = Pool; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -#endif -} - -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolRetain( - ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool -) { - Pool->incrementReferenceCount(); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolRelease( - ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool -) { - if (Pool->decrementReferenceCount() > 0) { - return UR_RESULT_SUCCESS; - } - Pool->Context->removePool(Pool); - delete Pool; - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolGetInfo( - ur_usm_pool_handle_t hPool, ///< [in] handle of the USM memory pool - ur_usm_pool_info_t propName, ///< [in] name of the pool property to query - size_t propSize, ///< [in] size in bytes of the pool property value provided - void *pPropValue, ///< [out][optional][typename(propName, propSize)] value - ///< of the pool property - size_t *pPropSizeRet ///< [out][optional] size in bytes returned in pool - ///< property value -) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - - switch (propName) { - case UR_USM_POOL_INFO_REFERENCE_COUNT: { - return ReturnValue(hPool->getReferenceCount()); - } - case UR_USM_POOL_INFO_CONTEXT: { - return ReturnValue(hPool->Context); - } - default: { - return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; - } - } -} - -bool checkUSMAlignment(uint32_t &alignment, const ur_usm_desc_t *pUSMDesc) { - alignment = pUSMDesc ? pUSMDesc->align : 0u; - return (!pUSMDesc || - (alignment == 0 || ((alignment & (alignment - 1)) == 0))); -} - -bool checkUSMImplAlignment(uint32_t Alignment, void **ResultPtr) { - return Alignment == 0 || - reinterpret_cast(*ResultPtr) % Alignment == 0; -} - -ur_result_t umfPoolMallocHelper(ur_usm_pool_handle_t hPool, void **ppMem, - size_t size, uint32_t alignment) { - auto UMFPool = hPool->DeviceMemPool.get(); - *ppMem = umfPoolAlignedMalloc(UMFPool, size, alignment); - if (*ppMem == nullptr) { - auto umfErr = umfPoolGetLastAllocationError(UMFPool); - return umf::umf2urResult(umfErr); - } - return UR_RESULT_SUCCESS; -} diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/usm.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/usm.hpp deleted file mode 100644 index be540679122b8..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/usm.hpp +++ /dev/null @@ -1,137 +0,0 @@ -//===--------- usm.hpp - HIP Adapter --------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===-----------------------------------------------------------------===// - -#include "common.hpp" - -#include -#include - -usm::DisjointPoolAllConfigs InitializeDisjointPoolConfig(); - -struct ur_usm_pool_handle_t_ { - std::atomic_uint32_t RefCount = 1; - - ur_context_handle_t Context = nullptr; - - usm::DisjointPoolAllConfigs DisjointPoolConfigs = - usm::DisjointPoolAllConfigs(); - - umf::pool_unique_handle_t DeviceMemPool; - umf::pool_unique_handle_t SharedMemPool; - umf::pool_unique_handle_t HostMemPool; - - ur_usm_pool_handle_t_(ur_context_handle_t Context, - ur_usm_pool_desc_t *PoolDesc); - - uint32_t incrementReferenceCount() noexcept { return ++RefCount; } - - uint32_t decrementReferenceCount() noexcept { return --RefCount; } - - uint32_t getReferenceCount() const noexcept { return RefCount; } - - bool hasUMFPool(umf_memory_pool_t *umf_pool); -}; - -// Exception type to pass allocation errors -class UsmAllocationException { - const ur_result_t Error; - -public: - UsmAllocationException(ur_result_t Err) : Error{Err} {} - ur_result_t getError() const { return Error; } -}; - -// Implements memory allocation via driver API for USM allocator interface -class USMMemoryProvider { -private: - ur_result_t &getLastStatusRef() { - static thread_local ur_result_t LastStatus = UR_RESULT_SUCCESS; - return LastStatus; - } - -protected: - ur_context_handle_t Context; - ur_device_handle_t Device; - size_t MinPageSize; - - // Internal allocation routine which must be implemented for each allocation - // type - virtual ur_result_t allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) = 0; - -public: - umf_result_t initialize(ur_context_handle_t Ctx, ur_device_handle_t Dev); - umf_result_t alloc(size_t Size, size_t Align, void **Ptr); - umf_result_t free(void *Ptr, size_t Size); - void get_last_native_error(const char **ErrMsg, int32_t *ErrCode); - umf_result_t get_min_page_size(void *, size_t *); - umf_result_t get_recommended_page_size(size_t, size_t *) { - return UMF_RESULT_ERROR_NOT_SUPPORTED; - }; - umf_result_t purge_lazy(void *, size_t) { - return UMF_RESULT_ERROR_NOT_SUPPORTED; - }; - umf_result_t purge_force(void *, size_t) { - return UMF_RESULT_ERROR_NOT_SUPPORTED; - }; - virtual const char *get_name() = 0; - - virtual ~USMMemoryProvider() = default; -}; - -// Allocation routines for shared memory type -class USMSharedMemoryProvider final : public USMMemoryProvider { -public: - const char *get_name() override { return "USMSharedMemoryProvider"; } - -protected: - ur_result_t allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) override; -}; - -// Allocation routines for device memory type -class USMDeviceMemoryProvider final : public USMMemoryProvider { -public: - const char *get_name() override { return "USMSharedMemoryProvider"; } - -protected: - ur_result_t allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) override; -}; - -// Allocation routines for host memory type -class USMHostMemoryProvider final : public USMMemoryProvider { -public: - const char *get_name() override { return "USMSharedMemoryProvider"; } - -protected: - ur_result_t allocateImpl(void **ResultPtr, size_t Size, - uint32_t Alignment) override; -}; - -ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t Context, - ur_device_handle_t Device, - ur_usm_device_mem_flags_t *Flags, size_t Size, - uint32_t Alignment); - -ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t Context, - ur_device_handle_t Device, - ur_usm_host_mem_flags_t *, - ur_usm_device_mem_flags_t *, size_t Size, - uint32_t Alignment); - -ur_result_t USMHostAllocImpl(void **ResultPtr, ur_context_handle_t Context, - ur_usm_host_mem_flags_t *Flags, size_t Size, - uint32_t Alignment); - -bool checkUSMAlignment(uint32_t &alignment, const ur_usm_desc_t *pUSMDesc); - -bool checkUSMImplAlignment(uint32_t Alignment, void **ResultPtr); - -ur_result_t umfPoolMallocHelper(ur_usm_pool_handle_t hPool, void **ppMem, - size_t size, uint32_t alignment); diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/usm_p2p.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/usm_p2p.cpp deleted file mode 100644 index cd59c16e85ec1..0000000000000 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/usm_p2p.cpp +++ /dev/null @@ -1,31 +0,0 @@ -//===--------- usm_p2p.cpp - HIP Adapter-----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "common.hpp" - -UR_APIEXPORT ur_result_t UR_APICALL -urUsmP2PEnablePeerAccessExp(ur_device_handle_t, ur_device_handle_t) { - detail::ur::die( - "urUsmP2PEnablePeerAccessExp is not implemented for HIP adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urUsmP2PDisablePeerAccessExp(ur_device_handle_t, ur_device_handle_t) { - detail::ur::die( - "urUsmP2PDisablePeerAccessExp is not implemented for HIP adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( - ur_device_handle_t, ur_device_handle_t, ur_exp_peer_info_t, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - // Zero return value indicates that all of the queries currently return false. - return ReturnValue(uint32_t{0}); -}