diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 4d0da9995908a..31c6a86f3300d 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -149,9 +149,11 @@ // 14.40 Add HIP _pi_mem_advice alises to match the PI_MEM_ADVICE_CUDA* ones. // 14.41 Added piextCommandBufferMemBufferFill & piextCommandBufferFillUSM // 14.42 Added piextCommandBufferPrefetchUSM and piextCommandBufferAdviseUSM +// 15.43 Changed the signature of piextMemGetNativeHandle to also take a +// pi_device -#define _PI_H_VERSION_MAJOR 14 -#define _PI_H_VERSION_MINOR 42 +#define _PI_H_VERSION_MAJOR 15 +#define _PI_H_VERSION_MINOR 43 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -1424,8 +1426,9 @@ __SYCL_EXPORT pi_result piMemBufferPartition( /// Gets the native handle of a PI mem object. /// /// \param mem is the PI mem to get the native handle of. +/// \param dev is the PI device that the native allocation will be resident on /// \param nativeHandle is the native handle of mem. -__SYCL_EXPORT pi_result piextMemGetNativeHandle(pi_mem mem, +__SYCL_EXPORT pi_result piextMemGetNativeHandle(pi_mem mem, pi_device dev, pi_native_handle *nativeHandle); /// Creates PI mem object from a native handle. diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 4b8163b03efbd..db1db3cbb5261 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -228,8 +228,9 @@ pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, HostPtr, RetImage); } -pi_result piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) { - return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle); +pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev, + pi_native_handle *NativeHandle) { + return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle); } pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle, diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 266c72a3b3587..126ada92348f6 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -236,8 +236,9 @@ pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, HostPtr, RetImage); } -pi_result piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) { - return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle); +pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev, + pi_native_handle *NativeHandle) { + return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle); } pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle, diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 79e047850af88..0fc36a231be6c 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -243,8 +243,9 @@ pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, HostPtr, RetImage); } -pi_result piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) { - return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle); +pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev, + pi_native_handle *NativeHandle) { + return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle); } pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle, diff --git a/sycl/plugins/native_cpu/pi_native_cpu.cpp b/sycl/plugins/native_cpu/pi_native_cpu.cpp index c7e71f9791d35..48ce104a94e90 100644 --- a/sycl/plugins/native_cpu/pi_native_cpu.cpp +++ b/sycl/plugins/native_cpu/pi_native_cpu.cpp @@ -240,8 +240,9 @@ pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, HostPtr, RetImage); } -pi_result piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) { - return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle); +pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev, + pi_native_handle *NativeHandle) { + return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle); } pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle, diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 3e7f3aea4dfed..c09be92f89406 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -222,8 +222,9 @@ pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, HostPtr, RetImage); } -pi_result piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) { - return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle); +pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev, + pi_native_handle *NativeHandle) { + return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle); } pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle, diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index af56733f4b72f..c990359e39e96 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -56,14 +56,14 @@ endif() if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime") - # commit 3225b822b5d8cbfa85d7fc1bd5a5bf96e5bb8c1a - # Merge: edb281f3 5fc41099 + set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") + # commit d216eb44d5c9fe3433eecdd09b10e3e79ac25bd7 + # Merge: 40517d2b fc1f3066 # Author: Kenneth Benzie (Benie) - # Date: Tue Jan 30 12:31:44 2024 +0000 - # Merge pull request #1168 from Seanst98/sean/unique-addr-mode-per-dim-adapters - # [Bindless][CUDA] Unique addressing modes per dimension - set(UNIFIED_RUNTIME_TAG 3225b822b5d8cbfa85d7fc1bd5a5bf96e5bb8c1a) + # Date: Wed Jan 31 10:38:07 2024 +0000 + # Merge pull request #1226 from hdelan/get-native-mem-on-device2 + # [UR] Add extra param to urMemGetNativeHandle + set(UNIFIED_RUNTIME_TAG d216eb44d5c9fe3433eecdd09b10e3e79ac25bd7) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index df841b786bfb1..c19c93a6af53a 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -3081,13 +3081,14 @@ inline pi_result piMemBufferPartition(pi_mem Buffer, pi_mem_flags Flags, return PI_SUCCESS; } -inline pi_result piextMemGetNativeHandle(pi_mem Mem, +inline pi_result piextMemGetNativeHandle(pi_mem Mem, pi_device Dev, pi_native_handle *NativeHandle) { PI_ASSERT(Mem, PI_ERROR_INVALID_MEM_OBJECT); ur_mem_handle_t UrMem = reinterpret_cast(Mem); + ur_device_handle_t UrDev = reinterpret_cast(Dev); ur_native_handle_t NativeMem{}; - HANDLE_ERRORS(urMemGetNativeHandle(UrMem, &NativeMem)); + HANDLE_ERRORS(urMemGetNativeHandle(UrMem, UrDev, &NativeMem)); *NativeHandle = reinterpret_cast(NativeMem); diff --git a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp index adbeb652bf613..b9742b8697fa8 100644 --- a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp +++ b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp @@ -235,9 +235,9 @@ __SYCL_EXPORT pi_result piMemBufferPartition( BufferCreateInfo, RetMem); } -__SYCL_EXPORT pi_result -piextMemGetNativeHandle(pi_mem Mem, pi_native_handle *NativeHandle) { - return pi2ur::piextMemGetNativeHandle(Mem, NativeHandle); +__SYCL_EXPORT pi_result piextMemGetNativeHandle( + pi_mem Mem, pi_device Dev, pi_native_handle *NativeHandle) { + return pi2ur::piextMemGetNativeHandle(Mem, Dev, NativeHandle); } __SYCL_EXPORT pi_result diff --git a/sycl/source/detail/buffer_impl.cpp b/sycl/source/detail/buffer_impl.cpp index 04c055465a9cf..835c732a40bf9 100644 --- a/sycl/source/detail/buffer_impl.cpp +++ b/sycl/source/detail/buffer_impl.cpp @@ -84,7 +84,11 @@ buffer_impl::getNativeVector(backend BackendName) const { } pi_native_handle Handle; - Plugin->call(NativeMem, &Handle); + // When doing buffer interop we don't know what device the memory should be + // resident on, so pass nullptr for Device param. Buffer interop may not be + // supported by all backends. + Plugin->call(NativeMem, /*Dev*/ nullptr, + &Handle); Handles.push_back(Handle); } diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 0daa53587ed4d..e59fb94a09f65 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -144,7 +144,11 @@ void memBufferCreateHelper(const PluginPtr &Plugin, pi_context Ctx, // Always use call_nocheck here, because call may throw an exception, // and this lambda will be called from destructor, which in combination // rewards us with UB. - Plugin->call_nocheck(*RetMem, &Ptr); + // When doing buffer interop we don't know what device the memory should + // be resident on, so pass nullptr for Device param. Buffer interop may + // not be supported by all backends. + Plugin->call_nocheck( + *RetMem, /*Dev*/ nullptr, &Ptr); emitMemAllocEndTrace(MemObjID, (uintptr_t)(Ptr), Size, 0 /* guard zone */, CorrID); }}; @@ -167,7 +171,11 @@ void memReleaseHelper(const PluginPtr &Plugin, pi_mem Mem) { // Do not make unnecessary PI calls without instrumentation enabled if (xptiTraceEnabled()) { pi_native_handle PtrHandle = 0; - Plugin->call(Mem, &PtrHandle); + // When doing buffer interop we don't know what device the memory should be + // resident on, so pass nullptr for Device param. Buffer interop may not be + // supported by all backends. + Plugin->call(Mem, /*Dev*/ nullptr, + &PtrHandle); Ptr = (uintptr_t)(PtrHandle); } #endif diff --git a/sycl/source/interop_handle.cpp b/sycl/source/interop_handle.cpp index 0b0ab39199370..cd479493bbae3 100644 --- a/sycl/source/interop_handle.cpp +++ b/sycl/source/interop_handle.cpp @@ -34,8 +34,8 @@ pi_native_handle interop_handle::getNativeMem(detail::Requirement *Req) const { auto Plugin = MQueue->getPlugin(); pi_native_handle Handle; - Plugin->call(Iter->second, - &Handle); + Plugin->call( + Iter->second, MDevice->getHandleRef(), &Handle); return Handle; } diff --git a/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp b/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp new file mode 100644 index 0000000000000..1f98b2da24c33 --- /dev/null +++ b/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp @@ -0,0 +1,136 @@ +// FIXME: the rocm include path and link path are highly platform dependent, +// we should set this with some variable instead. +// RUN: %{build} -o %t.out -I/opt/rocm/include -L/opt/rocm/lib -lamdhip64 +// RUN: %{run} %t.out +// REQUIRES: hip + +#include +#include + +#define __HIP_PLATFORM_AMD__ + +#include + +using namespace sycl; +using namespace sycl::access; + +static constexpr size_t BUFFER_SIZE = 1024; + +template class Modifier; + +template class Init; + +template +void checkBufferValues(BufferT Buffer, ValueT Value) { + auto Acc = Buffer.get_host_access(); + for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) { + if (Acc[Idx] != Value) { + std::cerr << "buffer[" << Idx << "] = " << Acc[Idx] + << ", expected val = " << Value << '\n'; + exit(1); + } + } +} + +template +void copy(buffer &Src, buffer &Dst, queue &Q) { + Q.submit([&](handler &CGH) { + auto SrcA = Src.template get_access(CGH); + auto DstA = Dst.template get_access(CGH); + + auto Func = [=](interop_handle IH) { + auto HipStream = IH.get_native_queue(); + auto SrcMem = IH.get_native_mem(SrcA); + auto DstMem = IH.get_native_mem(DstA); + + if (hipMemcpyWithStream(DstMem, SrcMem, sizeof(DataT) * SrcA.get_count(), + hipMemcpyDefault, HipStream) != hipSuccess) { + throw; + } + + if (hipStreamSynchronize(HipStream) != hipSuccess) { + throw; + } + + if (Q.get_backend() != IH.get_backend()) + throw; + }; + CGH.host_task(Func); + }); +} + +template void modify(buffer &B, queue &Q) { + Q.submit([&](handler &CGH) { + auto Acc = B.template get_access(CGH); + + auto Kernel = [=](item<1> Id) { Acc[Id] += 1; }; + + CGH.parallel_for>(Acc.get_count(), Kernel); + }); +} + +template +void init(buffer &B1, buffer &B2, queue &Q) { + Q.submit([&](handler &CGH) { + auto Acc1 = B1.template get_access(CGH); + auto Acc2 = B2.template get_access(CGH); + + CGH.parallel_for>(BUFFER_SIZE, [=](item<1> Id) { + Acc1[Id] = B1Init; + Acc2[Id] = B2Init; + }); + }); +} + +// Check that a single host-interop-task with a buffer will work. +void test_ht_buffer(queue &Q) { + buffer Buffer{BUFFER_SIZE}; + + Q.submit([&](handler &CGH) { + auto Acc = Buffer.get_access(CGH); + auto Func = [=](interop_handle IH) { /*A no-op */ }; + CGH.host_task(Func); + }); +} + +// A test that uses HIP interop to copy data from buffer A to buffer B, by +// getting HIP ptrs and calling the hipMemcpyWithStream. Then run a SYCL +// kernel that modifies the data in place for B, e.g. increment one, then copy +// back to buffer A. Run it on a loop, to ensure the dependencies and the +// reference counting of the objects is not leaked. +void test_ht_kernel_dependencies(queue &Q) { + static constexpr int COUNT = 4; + buffer Buffer1{BUFFER_SIZE}; + buffer Buffer2{BUFFER_SIZE}; + + // Init the buffer with a'priori invalid data. + init(Buffer1, Buffer2, Q); + + // Repeat a couple of times. + for (size_t Idx = 0; Idx < COUNT; ++Idx) { + copy(Buffer1, Buffer2, Q); + modify(Buffer2, Q); + copy(Buffer2, Buffer1, Q); + } + + checkBufferValues(Buffer1, COUNT - 1); + checkBufferValues(Buffer2, COUNT - 1); +} + +void tests(queue &Q) { + test_ht_buffer(Q); + test_ht_kernel_dependencies(Q); +} + +int main() { + queue Q([](sycl::exception_list ExceptionList) { + if (ExceptionList.size() != 1) { + std::cerr << "Should be one exception in exception list" << std::endl; + std::abort(); + } + std::rethrow_exception(*ExceptionList.begin()); + }); + tests(Q); + std::cout << "Test PASSED" << std::endl; + return 0; +} diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index c1f0a58f82274..31eac5598f588 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -613,7 +613,7 @@ mock_piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, return PI_SUCCESS; } -inline pi_result mock_piextMemGetNativeHandle(pi_mem mem, +inline pi_result mock_piextMemGetNativeHandle(pi_mem mem, pi_device dev, pi_native_handle *nativeHandle) { *nativeHandle = reinterpret_cast(mem); return PI_SUCCESS;