diff --git a/sycl/plugins/hip/CMakeLists.txt b/sycl/plugins/hip/CMakeLists.txt index 29ac21230f025..0b94e09460b9b 100644 --- a/sycl/plugins/hip/CMakeLists.txt +++ b/sycl/plugins/hip/CMakeLists.txt @@ -120,6 +120,7 @@ add_sycl_plugin(hip "../unified_runtime/ur/adapters/hip/sampler.hpp" "../unified_runtime/ur/adapters/hip/ur_interface_loader.cpp" "../unified_runtime/ur/adapters/hip/usm.cpp" + "../unified_runtime/ur/adapters/hip/usm.hpp" "../unified_runtime/ur/adapters/hip/usm_p2p.cpp" "${sycl_inc_dir}/sycl/detail/pi.h" "${sycl_inc_dir}/sycl/detail/pi.hpp" @@ -130,6 +131,8 @@ add_sycl_plugin(hip ${CMAKE_CURRENT_SOURCE_DIR}/../unified_runtime LIBRARIES UnifiedRuntime-Headers + UnifiedRuntimeCommon + UnifiedMallocFramework HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/features.hpp ) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 71f1d47eca7cb..dd404e1afc16d 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -223,11 +223,13 @@ if ("hip" IN_LIST SYCL_ENABLE_PLUGINS) "ur/adapters/hip/sampler.hpp" "ur/adapters/hip/ur_interface_loader.cpp" "ur/adapters/hip/usm.cpp" + "ur/adapters/hip/usm.hpp" "ur/adapters/hip/usm_p2p.cpp" INCLUDE_DIRS ${sycl_inc_dir} LIBRARIES UnifiedRuntime-Headers + UnifiedRuntimeCommon Threads::Threads ) @@ -236,6 +238,13 @@ if ("hip" IN_LIST SYCL_ENABLE_PLUGINS) SOVERSION "0" ) + if(UMF_ENABLE_POOL_TRACKING) + target_compile_definitions("ur_adapter_hip" PRIVATE + UMF_ENABLE_POOL_TRACKING) + else() + message(WARNING "HIP adapter USM pools are disabled, set UMF_ENABLE_POOL_TRACKING to enable them") + endif() + if("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "AMD") target_link_libraries(ur_adapter_hip PUBLIC rocmdrv) # Set HIP define to select AMD platform diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/context.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/context.cpp index 34d61856a3882..17a752e7a91f9 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/context.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/hip/context.cpp @@ -7,6 +7,28 @@ //===----------------------------------------------------------------------===// #include "context.hpp" +#include "usm.hpp" + +void ur_context_handle_t_::addPool(ur_usm_pool_handle_t Pool) { + std::lock_guard Lock(Mutex); + PoolHandles.insert(Pool); +} + +void ur_context_handle_t_::removePool(ur_usm_pool_handle_t Pool) { + std::lock_guard Lock(Mutex); + PoolHandles.erase(Pool); +} + +ur_usm_pool_handle_t +ur_context_handle_t_::getOwningURPool(umf_memory_pool_t *UMFPool) { + std::lock_guard Lock(Mutex); + for (auto &Pool : PoolHandles) { + if (Pool->hasUMFPool(UMFPool)) { + return Pool; + } + } + return nullptr; +} /// Create a UR HIP context. /// diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/context.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/context.hpp index 5c086efc58ea3..be4607b154099 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/context.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/hip/context.hpp @@ -7,12 +7,15 @@ //===----------------------------------------------------------------------===// #pragma once +#include #include #include "common.hpp" #include "device.hpp" #include "platform.hpp" +#include + typedef void (*ur_context_extended_deleter_t)(void *UserData); /// UR context mapping to a HIP context object. @@ -95,6 +98,12 @@ struct ur_context_handle_t_ { uint32_t getReferenceCount() const noexcept { return RefCount; } + void addPool(ur_usm_pool_handle_t Pool); + + void removePool(ur_usm_pool_handle_t Pool); + + ur_usm_pool_handle_t getOwningURPool(umf_memory_pool_t *UMFPool); + /// We need to keep track of USM mappings in AMD HIP, as certain extra /// synchronization *is* actually required for correctness. /// During kernel enqueue we must dispatch a prefetch for each kernel argument @@ -150,6 +159,7 @@ struct ur_context_handle_t_ { std::mutex Mutex; std::vector ExtendedDeleters; std::unordered_map USMMappings; + std::set PoolHandles; }; namespace { diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/device.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/device.hpp index 155d1900aa0d3..e6aaeafc0aaa5 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/device.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/hip/device.hpp @@ -24,6 +24,8 @@ struct ur_device_handle_t_ { ur_platform_handle_t Platform; hipCtx_t HIPContext; + size_t MaxAllocSize{0}; + public: ur_device_handle_t_(native_type HipDevice, hipCtx_t Context, ur_platform_handle_t Platform) diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/ur_interface_loader.cpp index 1e0b68a568b48..3bea2c7c427ee 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/hip/ur_interface_loader.cpp @@ -239,10 +239,10 @@ urGetUSMProcAddrTable(ur_api_version_t version, ur_usm_dditable_t *pDdiTable) { pDdiTable->pfnFree = urUSMFree; pDdiTable->pfnGetMemAllocInfo = urUSMGetMemAllocInfo; pDdiTable->pfnHostAlloc = urUSMHostAlloc; - pDdiTable->pfnPoolCreate = nullptr; - pDdiTable->pfnPoolRetain = nullptr; - pDdiTable->pfnPoolRelease = nullptr; - pDdiTable->pfnPoolGetInfo = nullptr; + pDdiTable->pfnPoolCreate = urUSMPoolCreate; + pDdiTable->pfnPoolRetain = urUSMPoolRetain; + pDdiTable->pfnPoolRelease = urUSMPoolRelease; + pDdiTable->pfnPoolGetInfo = urUSMPoolGetInfo; pDdiTable->pfnSharedAlloc = urUSMSharedAlloc; return UR_RESULT_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/usm.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/usm.cpp index be5e0bc32fe22..0f077d0667aa3 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/usm.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/hip/usm.cpp @@ -13,108 +13,133 @@ #include "context.hpp" #include "device.hpp" #include "platform.hpp" +#include "usm.hpp" /// USM: Implements USM Host allocations using HIP Pinned Memory -UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( - ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, - [[maybe_unused]] ur_usm_pool_handle_t pool, size_t size, void **ppMem) { - - UR_ASSERT(!pUSMDesc || (pUSMDesc->align == 0 || - ((pUSMDesc->align & (pUSMDesc->align - 1)) == 0)), +UR_APIEXPORT ur_result_t UR_APICALL +urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t hPool, size_t size, void **ppMem) { + uint32_t alignment; + UR_ASSERT(checkUSMAlignment(alignment, pUSMDesc), UR_RESULT_ERROR_INVALID_VALUE); - ur_result_t Result = UR_RESULT_SUCCESS; - try { - ScopedContext Active(hContext->getDevice()); - Result = UR_CHECK_ERROR(hipHostMalloc(ppMem, size)); - } catch (ur_result_t Error) { - return Error; + if (!hPool) { + return USMHostAllocImpl(ppMem, hContext, nullptr, size, alignment); } - if (Result == UR_RESULT_SUCCESS) { - assert((!pUSMDesc || pUSMDesc->align == 0 || - reinterpret_cast(*ppMem) % pUSMDesc->align == 0)); - hContext->addUSMMapping(*ppMem, size); - } - return Result; + return umfPoolMallocHelper(hPool, ppMem, size, alignment); } /// USM: Implements USM device allocations using a normal HIP device pointer -UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( - ur_context_handle_t hContext, ur_device_handle_t, - const ur_usm_desc_t *pUSMDesc, [[maybe_unused]] ur_usm_pool_handle_t pool, - size_t size, void **ppMem) { - UR_ASSERT(!pUSMDesc || (pUSMDesc->align == 0 || - ((pUSMDesc->align & (pUSMDesc->align - 1)) == 0)), +UR_APIEXPORT ur_result_t UR_APICALL +urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t hPool, + size_t size, void **ppMem) { + uint32_t alignment; + UR_ASSERT(checkUSMAlignment(alignment, pUSMDesc), UR_RESULT_ERROR_INVALID_VALUE); - ur_result_t Result = UR_RESULT_SUCCESS; - try { - ScopedContext Active(hContext->getDevice()); - Result = UR_CHECK_ERROR(hipMalloc(ppMem, size)); - } catch (ur_result_t Error) { - return Error; + if (!hPool) { + return USMDeviceAllocImpl(ppMem, hContext, hDevice, nullptr, size, + alignment); } - if (Result == UR_RESULT_SUCCESS) { - assert((!pUSMDesc || pUSMDesc->align == 0 || - reinterpret_cast(*ppMem) % pUSMDesc->align == 0)); - hContext->addUSMMapping(*ppMem, size); - } - return Result; + return umfPoolMallocHelper(hPool, ppMem, size, alignment); } /// USM: Implements USM Shared allocations using HIP Managed Memory -UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( - ur_context_handle_t hContext, ur_device_handle_t, - const ur_usm_desc_t *pUSMDesc, [[maybe_unused]] ur_usm_pool_handle_t pool, - size_t size, void **ppMem) { - UR_ASSERT(!pUSMDesc || (pUSMDesc->align == 0 || - ((pUSMDesc->align & (pUSMDesc->align - 1)) == 0)), +UR_APIEXPORT ur_result_t UR_APICALL +urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t hPool, + size_t size, void **ppMem) { + uint32_t alignment; + UR_ASSERT(checkUSMAlignment(alignment, pUSMDesc), UR_RESULT_ERROR_INVALID_VALUE); - ur_result_t Result = UR_RESULT_SUCCESS; - try { - ScopedContext Active(hContext->getDevice()); - Result = UR_CHECK_ERROR(hipMallocManaged(ppMem, size, hipMemAttachGlobal)); - } catch (ur_result_t Error) { - Result = Error; + if (!hPool) { + return USMSharedAllocImpl(ppMem, hContext, hDevice, nullptr, nullptr, size, + alignment); } - if (Result == UR_RESULT_SUCCESS) { - assert((!pUSMDesc || pUSMDesc->align == 0 || - reinterpret_cast(*ppMem) % pUSMDesc->align == 0)); - hContext->addUSMMapping(*ppMem, size); - } - return Result; + return umfPoolMallocHelper(hPool, ppMem, size, alignment); } -/// USM: Frees the given USM pointer associated with the context. -UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, - void *pMem) { +UR_APIEXPORT ur_result_t UR_APICALL USMFreeImpl(ur_context_handle_t hContext, + void *pMem) { ur_result_t Result = UR_RESULT_SUCCESS; try { ScopedContext Active(hContext->getDevice()); - unsigned int Type; hipPointerAttribute_t hipPointerAttributeType; Result = UR_CHECK_ERROR(hipPointerGetAttributes(&hipPointerAttributeType, pMem)); - Type = hipPointerAttributeType.memoryType; + unsigned int Type = hipPointerAttributeType.memoryType; UR_ASSERT(Type == hipMemoryTypeDevice || Type == hipMemoryTypeHost, UR_RESULT_ERROR_INVALID_MEM_OBJECT); if (Type == hipMemoryTypeDevice) { Result = UR_CHECK_ERROR(hipFree(pMem)); } if (Type == hipMemoryTypeHost) { - Result = UR_CHECK_ERROR(hipFreeHost(pMem)); + Result = UR_CHECK_ERROR(hipHostFree(pMem)); } } catch (ur_result_t Error) { - return Error; + Result = Error; } - hContext->removeUSMMapping(pMem); return Result; } +/// USM: Frees the given USM pointer associated with the context. +UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, + void *pMem) { + if (auto Pool = umfPoolByPtr(pMem)) { + return umf::umf2urResult(umfPoolFree(Pool, pMem)); + } else { + return USMFreeImpl(hContext, pMem); + } +} + +ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t Context, + ur_device_handle_t, ur_usm_device_mem_flags_t *, + size_t Size, uint32_t Alignment) { + try { + ScopedContext Active(Context->getDevice()); + UR_CHECK_ERROR(hipMalloc(ResultPtr, Size)); + } catch (ur_result_t Err) { + return Err; + } + + assert(checkUSMImplAlignment(Alignment, ResultPtr)); + return UR_RESULT_SUCCESS; +} + +ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t Context, + ur_device_handle_t, ur_usm_host_mem_flags_t *, + ur_usm_device_mem_flags_t *, size_t Size, + uint32_t Alignment) { + try { + ScopedContext Active(Context->getDevice()); + UR_CHECK_ERROR(hipMallocManaged(ResultPtr, Size, hipMemAttachGlobal)); + } catch (ur_result_t Err) { + return Err; + } + + assert(checkUSMImplAlignment(Alignment, ResultPtr)); + return UR_RESULT_SUCCESS; +} + +ur_result_t USMHostAllocImpl(void **ResultPtr, ur_context_handle_t Context, + ur_usm_host_mem_flags_t *, size_t Size, + uint32_t Alignment) { + try { + ScopedContext Active(Context->getDevice()); + UR_CHECK_ERROR(hipHostMalloc(ResultPtr, Size)); + } catch (ur_result_t Err) { + return Err; + } + + assert(checkUSMImplAlignment(Alignment, ResultPtr)); + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, ur_usm_alloc_info_t propName, size_t propValueSize, @@ -184,6 +209,17 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, ur_device_handle_t Device = Platforms[DeviceIdx]->Devices[0].get(); return ReturnValue(Device); } + case UR_USM_ALLOC_INFO_POOL: { + auto UMFPool = umfPoolByPtr(pMem); + if (!UMFPool) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + ur_usm_pool_handle_t Pool = hContext->getOwningURPool(UMFPool); + if (!Pool) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + return ReturnValue(Pool); + } default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } @@ -207,3 +243,220 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMReleaseExp(ur_context_handle_t Context, UR_ASSERT(!HostPtr, UR_RESULT_ERROR_INVALID_VALUE); return UR_RESULT_SUCCESS; } + +umf_result_t USMMemoryProvider::initialize(ur_context_handle_t Ctx, + ur_device_handle_t Dev) { + Context = Ctx; + Device = Dev; + // There isn't a way to query this in cuda, and there isn't much info on + // cuda's approach to alignment or transfer granularity between host and + // device. Within UMF this is only used to influence alignment, and since we + // discard that in our alloc implementations it seems we can safely ignore + // this as well, for now. + MinPageSize = 0; + + return UMF_RESULT_SUCCESS; +} + +enum umf_result_t USMMemoryProvider::alloc(size_t Size, size_t Align, + void **Ptr) { + auto Res = allocateImpl(Ptr, Size, Align); + if (Res != UR_RESULT_SUCCESS) { + getLastStatusRef() = Res; + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + + return UMF_RESULT_SUCCESS; +} + +enum umf_result_t USMMemoryProvider::free(void *Ptr, size_t Size) { + (void)Size; + + auto Res = USMFreeImpl(Context, Ptr); + if (Res != UR_RESULT_SUCCESS) { + getLastStatusRef() = Res; + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + + return UMF_RESULT_SUCCESS; +} + +void USMMemoryProvider::get_last_native_error(const char **ErrMsg, + int32_t *ErrCode) { + (void)ErrMsg; + *ErrCode = static_cast(getLastStatusRef()); +} + +umf_result_t USMMemoryProvider::get_min_page_size(void *Ptr, size_t *PageSize) { + (void)Ptr; + *PageSize = MinPageSize; + + return UMF_RESULT_SUCCESS; +} + +ur_result_t USMSharedMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, + uint32_t Alignment) { + return USMSharedAllocImpl(ResultPtr, Context, Device, nullptr, nullptr, Size, + Alignment); +} + +ur_result_t USMDeviceMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, + uint32_t Alignment) { + return USMDeviceAllocImpl(ResultPtr, Context, Device, nullptr, Size, + Alignment); +} + +ur_result_t USMHostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, + uint32_t Alignment) { + return USMHostAllocImpl(ResultPtr, Context, nullptr, Size, Alignment); +} + +ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, + ur_usm_pool_desc_t *PoolDesc) + : Context(Context) { + const void *pNext = PoolDesc->pNext; + while (pNext != nullptr) { + const ur_base_desc_t *BaseDesc = static_cast(pNext); + switch (BaseDesc->stype) { + case UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC: { + const ur_usm_pool_limits_desc_t *Limits = + reinterpret_cast(BaseDesc); + for (auto &config : DisjointPoolConfigs.Configs) { + config.MaxPoolableSize = Limits->maxPoolableSize; + config.SlabMinSize = Limits->minDriverAllocSize; + } + break; + } + default: { + throw UsmAllocationException(UR_RESULT_ERROR_INVALID_ARGUMENT); + } + } + pNext = BaseDesc->pNext; + } + + auto MemProvider = + umf::memoryProviderMakeUnique(Context, nullptr) + .second; + + HostMemPool = + umf::poolMakeUnique( + {std::move(MemProvider)}, + this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Host]) + .second; + + auto Device = Context->DeviceId; + MemProvider = + umf::memoryProviderMakeUnique(Context, Device) + .second; + DeviceMemPool = + umf::poolMakeUnique( + {std::move(MemProvider)}, + this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Device]) + .second; + + MemProvider = + umf::memoryProviderMakeUnique(Context, Device) + .second; + SharedMemPool = + umf::poolMakeUnique( + {std::move(MemProvider)}, + this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Shared]) + .second; + Context->addPool(this); +} + +bool ur_usm_pool_handle_t_::hasUMFPool(umf_memory_pool_t *umf_pool) { + return DeviceMemPool.get() == umf_pool || SharedMemPool.get() == umf_pool || + HostMemPool.get() == umf_pool; +} + +UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( + ur_context_handle_t Context, ///< [in] handle of the context object + ur_usm_pool_desc_t + *PoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with + ///< ::ur_usm_pool_limits_desc_t + ur_usm_pool_handle_t *Pool ///< [out] pointer to USM memory pool +) { + // Without pool tracking we can't free pool allocations. +#ifdef UMF_ENABLE_POOL_TRACKING + if (PoolDesc->flags & UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + try { + *Pool = reinterpret_cast( + new ur_usm_pool_handle_t_(Context, PoolDesc)); + } catch (const UsmAllocationException &Ex) { + return Ex.getError(); + } + return UR_RESULT_SUCCESS; +#else + std::ignore = Context; + std::ignore = PoolDesc; + std::ignore = Pool; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +#endif +} + +UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolRetain( + ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool +) { + Pool->incrementReferenceCount(); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolRelease( + ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool +) { + if (Pool->decrementReferenceCount() > 0) { + return UR_RESULT_SUCCESS; + } + Pool->Context->removePool(Pool); + delete Pool; + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolGetInfo( + ur_usm_pool_handle_t hPool, ///< [in] handle of the USM memory pool + ur_usm_pool_info_t propName, ///< [in] name of the pool property to query + size_t propSize, ///< [in] size in bytes of the pool property value provided + void *pPropValue, ///< [out][optional][typename(propName, propSize)] value + ///< of the pool property + size_t *pPropSizeRet ///< [out][optional] size in bytes returned in pool + ///< property value +) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { + case UR_USM_POOL_INFO_REFERENCE_COUNT: { + return ReturnValue(hPool->getReferenceCount()); + } + case UR_USM_POOL_INFO_CONTEXT: { + return ReturnValue(hPool->Context); + } + default: { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } + } +} + +bool checkUSMAlignment(uint32_t &alignment, const ur_usm_desc_t *pUSMDesc) { + alignment = pUSMDesc ? pUSMDesc->align : 0u; + return (!pUSMDesc || + (alignment == 0 || ((alignment & (alignment - 1)) == 0))); +} + +bool checkUSMImplAlignment(uint32_t Alignment, void **ResultPtr) { + return Alignment == 0 || + reinterpret_cast(*ResultPtr) % Alignment == 0; +} + +ur_result_t umfPoolMallocHelper(ur_usm_pool_handle_t hPool, void **ppMem, + size_t size, uint32_t alignment) { + auto UMFPool = hPool->DeviceMemPool.get(); + *ppMem = umfPoolAlignedMalloc(UMFPool, size, alignment); + if (*ppMem == nullptr) { + auto umfErr = umfPoolGetLastAllocationError(UMFPool); + return umf::umf2urResult(umfErr); + } + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/usm.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/usm.hpp new file mode 100644 index 0000000000000..be540679122b8 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/hip/usm.hpp @@ -0,0 +1,137 @@ +//===--------- usm.hpp - HIP Adapter --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" + +#include +#include + +usm::DisjointPoolAllConfigs InitializeDisjointPoolConfig(); + +struct ur_usm_pool_handle_t_ { + std::atomic_uint32_t RefCount = 1; + + ur_context_handle_t Context = nullptr; + + usm::DisjointPoolAllConfigs DisjointPoolConfigs = + usm::DisjointPoolAllConfigs(); + + umf::pool_unique_handle_t DeviceMemPool; + umf::pool_unique_handle_t SharedMemPool; + umf::pool_unique_handle_t HostMemPool; + + ur_usm_pool_handle_t_(ur_context_handle_t Context, + ur_usm_pool_desc_t *PoolDesc); + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } + + bool hasUMFPool(umf_memory_pool_t *umf_pool); +}; + +// Exception type to pass allocation errors +class UsmAllocationException { + const ur_result_t Error; + +public: + UsmAllocationException(ur_result_t Err) : Error{Err} {} + ur_result_t getError() const { return Error; } +}; + +// Implements memory allocation via driver API for USM allocator interface +class USMMemoryProvider { +private: + ur_result_t &getLastStatusRef() { + static thread_local ur_result_t LastStatus = UR_RESULT_SUCCESS; + return LastStatus; + } + +protected: + ur_context_handle_t Context; + ur_device_handle_t Device; + size_t MinPageSize; + + // Internal allocation routine which must be implemented for each allocation + // type + virtual ur_result_t allocateImpl(void **ResultPtr, size_t Size, + uint32_t Alignment) = 0; + +public: + umf_result_t initialize(ur_context_handle_t Ctx, ur_device_handle_t Dev); + umf_result_t alloc(size_t Size, size_t Align, void **Ptr); + umf_result_t free(void *Ptr, size_t Size); + void get_last_native_error(const char **ErrMsg, int32_t *ErrCode); + umf_result_t get_min_page_size(void *, size_t *); + umf_result_t get_recommended_page_size(size_t, size_t *) { + return UMF_RESULT_ERROR_NOT_SUPPORTED; + }; + umf_result_t purge_lazy(void *, size_t) { + return UMF_RESULT_ERROR_NOT_SUPPORTED; + }; + umf_result_t purge_force(void *, size_t) { + return UMF_RESULT_ERROR_NOT_SUPPORTED; + }; + virtual const char *get_name() = 0; + + virtual ~USMMemoryProvider() = default; +}; + +// Allocation routines for shared memory type +class USMSharedMemoryProvider final : public USMMemoryProvider { +public: + const char *get_name() override { return "USMSharedMemoryProvider"; } + +protected: + ur_result_t allocateImpl(void **ResultPtr, size_t Size, + uint32_t Alignment) override; +}; + +// Allocation routines for device memory type +class USMDeviceMemoryProvider final : public USMMemoryProvider { +public: + const char *get_name() override { return "USMSharedMemoryProvider"; } + +protected: + ur_result_t allocateImpl(void **ResultPtr, size_t Size, + uint32_t Alignment) override; +}; + +// Allocation routines for host memory type +class USMHostMemoryProvider final : public USMMemoryProvider { +public: + const char *get_name() override { return "USMSharedMemoryProvider"; } + +protected: + ur_result_t allocateImpl(void **ResultPtr, size_t Size, + uint32_t Alignment) override; +}; + +ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t Context, + ur_device_handle_t Device, + ur_usm_device_mem_flags_t *Flags, size_t Size, + uint32_t Alignment); + +ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t Context, + ur_device_handle_t Device, + ur_usm_host_mem_flags_t *, + ur_usm_device_mem_flags_t *, size_t Size, + uint32_t Alignment); + +ur_result_t USMHostAllocImpl(void **ResultPtr, ur_context_handle_t Context, + ur_usm_host_mem_flags_t *Flags, size_t Size, + uint32_t Alignment); + +bool checkUSMAlignment(uint32_t &alignment, const ur_usm_desc_t *pUSMDesc); + +bool checkUSMImplAlignment(uint32_t Alignment, void **ResultPtr); + +ur_result_t umfPoolMallocHelper(ur_usm_pool_handle_t hPool, void **ppMem, + size_t size, uint32_t alignment);