From a5bb5d4d0c266174b9e724d41aa0cd197e3585e4 Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Mon, 12 Feb 2024 16:26:47 +0000 Subject: [PATCH 01/26] Implemented native USM Fill and removed piextUSMEnqueueMemset symbol --- sycl/include/sycl/detail/cg.hpp | 2 +- sycl/include/sycl/detail/pi.def | 2 +- sycl/include/sycl/detail/pi.h | 27 +++++++------- sycl/include/sycl/handler.hpp | 10 +++--- sycl/plugins/cuda/pi_cuda.cpp | 12 +++---- sycl/plugins/hip/pi_hip.cpp | 12 +++---- sycl/plugins/level_zero/pi_level_zero.cpp | 23 ++++++------ sycl/plugins/native_cpu/pi_native_cpu.cpp | 12 +++---- sycl/plugins/opencl/pi_opencl.cpp | 12 +++---- sycl/plugins/unified_runtime/pi2ur.hpp | 14 ++++---- .../unified_runtime/pi_unified_runtime.cpp | 36 +++++++++---------- sycl/source/detail/graph_impl.hpp | 6 ++-- sycl/source/detail/memory_manager.cpp | 20 +++++------ sycl/source/detail/memory_manager.hpp | 6 ++-- sycl/source/detail/queue_impl.cpp | 4 +-- sycl/source/detail/scheduler/commands.cpp | 4 +-- sycl/source/handler.cpp | 10 ++++++ .../DiscardEvents/discard_events_usm.cpp | 14 ++++---- .../discard_events_usm_ooo_queue.cpp | 14 ++++---- sycl/test/abi/pi_cuda_symbol_check.dump | 2 +- sycl/test/abi/pi_hip_symbol_check.dump | 2 +- sycl/test/abi/pi_level_zero_symbol_check.dump | 2 +- sycl/test/abi/pi_opencl_symbol_check.dump | 4 +-- sycl/test/abi/sycl_symbols_linux.dump | 7 ++-- sycl/test/abi/sycl_symbols_windows.dump | 7 ++-- sycl/tools/xpti_helpers/usm_analyzer.hpp | 14 ++++---- sycl/unittests/SYCL2020/GetNativeOpenCL.cpp | 8 ++--- sycl/unittests/helpers/PiMockPlugin.hpp | 11 +++--- sycl/unittests/queue/USM.cpp | 6 ++-- sycl/unittests/queue/Wait.cpp | 5 +-- .../scheduler/InOrderQueueHostTaskDeps.cpp | 7 ++-- .../unittests/xpti_trace/QueueApiFailures.cpp | 20 +++++------ 32 files changed, 173 insertions(+), 162 deletions(-) diff --git a/sycl/include/sycl/detail/cg.hpp b/sycl/include/sycl/detail/cg.hpp index 5c2ffdc712c00..08edae99c506f 100644 --- a/sycl/include/sycl/detail/cg.hpp +++ b/sycl/include/sycl/detail/cg.hpp @@ -299,7 +299,7 @@ class CGFillUSM : public CG { MPattern(std::move(Pattern)), MDst(DstPtr), MLength(Length) {} void *getDst() { return MDst; } size_t getLength() { return MLength; } - int getFill() { return MPattern[0]; } + const std::vector &getPattern() { return MPattern; } }; /// "Prefetch USM" command group class. diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def index 46a200e001231..ee7e4f3de4a90 100644 --- a/sycl/include/sycl/detail/pi.def +++ b/sycl/include/sycl/detail/pi.def @@ -128,7 +128,7 @@ _PI_API(piextUSMHostAlloc) _PI_API(piextUSMDeviceAlloc) _PI_API(piextUSMSharedAlloc) _PI_API(piextUSMFree) -_PI_API(piextUSMEnqueueMemset) +_PI_API(piextUSMEnqueueFill) _PI_API(piextUSMEnqueueMemcpy) _PI_API(piextUSMEnqueuePrefetch) _PI_API(piextUSMEnqueueMemAdvise) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 3c9076e09f66b..f693d116157dc 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -156,9 +156,10 @@ // piextEnqueueCooperativeKernelLaunch. // 15.46 Add piextGetGlobalVariablePointer // 15.47 Added PI_ERROR_FEATURE_UNSUPPORTED. +// 16.48 Replaced piextUSMEnqueueMemset with piextUSMEnqueueFill -#define _PI_H_VERSION_MAJOR 15 -#define _PI_H_VERSION_MINOR 47 +#define _PI_H_VERSION_MAJOR 16 +#define _PI_H_VERSION_MINOR 48 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -2020,22 +2021,22 @@ __SYCL_EXPORT pi_result piextUSMPitchedAlloc( /// \param ptr is the memory to be freed __SYCL_EXPORT pi_result piextUSMFree(pi_context context, void *ptr); -/// USM Memset API +/// USM Fill API /// /// \param queue is the queue to submit to -/// \param ptr is the ptr to memset -/// \param value is value to set. It is interpreted as an 8-bit value and the -/// upper -/// 24 bits are ignored -/// \param count is the size in bytes to memset +/// \param ptr is the ptr to fill +/// \param pattern is the ptr with the bytes of the pattern to set +/// \param patternSize is the size in bytes of the pattern to set +/// \param count is the size in bytes to fill /// \param num_events_in_waitlist is the number of events to wait on /// \param events_waitlist is an array of events to wait on /// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, - pi_int32 value, size_t count, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event); +__SYCL_EXPORT pi_result piextUSMEnqueueFill(pi_queue queue, void *ptr, + const void *pattern, + size_t patternSize, size_t count, + pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, + pi_event *event); /// USM Memcpy API /// diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 672f8da32c91c..4451bb7b6800c 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -2796,14 +2796,9 @@ class __SYCL_EXPORT handler { /// device copyable. /// \param Count is the number of times to fill Pattern into Ptr. template void fill(void *Ptr, const T &Pattern, size_t Count) { - throwIfActionIsCreated(); - setUserFacingNodeType(ext::oneapi::experimental::node_type::memfill); static_assert(is_device_copyable::value, "Pattern must be device copyable"); - parallel_for<__usmfill>(range<1>(Count), [=](id<1> Index) { - T *CastedPtr = static_cast(Ptr); - CastedPtr[Index] = Pattern; - }); + this->fill_impl(Ptr, &Pattern, sizeof(T), Count); } /// Prevents any commands submitted afterward to this queue from executing @@ -3613,6 +3608,9 @@ class __SYCL_EXPORT handler { }); } + // Implementation of USM fill using command for native fill. + void fill_impl(void *Dest, const void *Value, size_t ValueSize, size_t Count); + // Implementation of ext_oneapi_memcpy2d using command for native 2D memcpy. void ext_oneapi_memcpy2d_impl(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height); diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index e6d395e758568..cc3ffa656a661 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -902,12 +902,12 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, - size_t Count, pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset( - Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, + size_t PatternSize, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, pi_event *Event) { + return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, + NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 2fbde10b77123..4a1fc086ceaf9 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -905,12 +905,12 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, - size_t Count, pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset( - Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, + size_t PatternSize, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, pi_event *Event) { + return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, + NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 26a1b104b3335..8019b5af76865 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -929,23 +929,22 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -/// USM Memset API +/// USM Fill API /// /// @param Queue is the queue to submit to -/// @param Ptr is the ptr to memset -/// @param Value is value to set. It is interpreted as an 8-bit value and the -/// upper -/// 24 bits are ignored -/// @param Count is the size in bytes to memset +/// @param Ptr is the ptr to fill +/// \param Pattern is the ptr with the bytes of the pattern to set +/// \param PatternSize is the size in bytes of the pattern to set +/// @param Count is the size in bytes to fill /// @param NumEventsInWaitlist is the number of events to wait on /// @param EventsWaitlist is an array of events to wait on /// @param Event is the event that represents this operation -pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, - size_t Count, pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset( - Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, + size_t PatternSize, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, pi_event *Event) { + return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, + NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/native_cpu/pi_native_cpu.cpp b/sycl/plugins/native_cpu/pi_native_cpu.cpp index 5174456e95a77..af7b7333da608 100644 --- a/sycl/plugins/native_cpu/pi_native_cpu.cpp +++ b/sycl/plugins/native_cpu/pi_native_cpu.cpp @@ -905,12 +905,12 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, - size_t Count, pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset( - Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, + size_t PatternSize, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, pi_event *Event) { + return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, + NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index e8a168b60445e..9ffcb0ce65f57 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -864,12 +864,12 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, - size_t Count, pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset( - Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, + size_t PatternSize, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, pi_event *Event) { + return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, + NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 14b7b4723c0dc..443a9c5a76cb4 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -3856,11 +3856,12 @@ inline pi_result piEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, return PI_SUCCESS; } -inline pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, - pi_int32 Value, size_t Count, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { +inline pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, + const void *Pattern, size_t PatternSize, + size_t Count, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *OutEvent) { PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); if (!Ptr) { return PI_ERROR_INVALID_VALUE; @@ -3872,8 +3873,7 @@ inline pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - size_t PatternSize = 1; - HANDLE_ERRORS(urEnqueueUSMFill(UrQueue, Ptr, PatternSize, &Value, Count, + HANDLE_ERRORS(urEnqueueUSMFill(UrQueue, Ptr, PatternSize, Pattern, Count, NumEventsInWaitList, UrEventsWaitList, UREvent)); diff --git a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp index 8701d23027682..5d465cc10407e 100644 --- a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp +++ b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp @@ -437,24 +437,24 @@ __SYCL_EXPORT pi_result piQueueGetInfo(pi_queue Queue, pi_queue_info ParamName, ParamValueSizeRet); } -/// USM Memset API +/// USM Fill API /// -/// @param Queue is the queue to submit to -/// @param Ptr is the ptr to memset -/// @param Value is value to set. It is interpreted as an 8-bit value and the -/// upper -/// 24 bits are ignored -/// @param Count is the size in bytes to memset -/// @param NumEventsInWaitlist is the number of events to wait on -/// @param EventsWaitlist is an array of events to wait on -/// @param Event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, - pi_int32 Value, size_t Count, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueMemset( - Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); +/// \param queue is the queue to submit to +/// \param ptr is the ptr to fill +/// \param pattern is the ptr with the bytes of the pattern to set +/// \param patternSize is the size in bytes of the pattern to set +/// \param count is the size in bytes to fill +/// \param num_events_in_waitlist is the number of events to wait on +/// \param events_waitlist is an array of events to wait on +/// \param event is the event that represents this operation +__SYCL_EXPORT pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, + const void *Pattern, + size_t PatternSize, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, + pi_event *Event) { + return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, + NumEventsInWaitlist, EventsWaitlist, Event); } __SYCL_EXPORT pi_result piEnqueueMemBufferCopyRect( @@ -1472,7 +1472,7 @@ __SYCL_EXPORT pi_result piPluginInit(pi_plugin *PluginInit) { _PI_API(piEnqueueMemBufferMap) _PI_API(piEnqueueMemUnmap) _PI_API(piEnqueueMemBufferFill) - _PI_API(piextUSMEnqueueMemset) + _PI_API(piextUSMEnqueueFill) _PI_API(piEnqueueMemBufferCopyRect) _PI_API(piEnqueueMemBufferCopy) _PI_API(piextUSMEnqueueMemcpy) diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index 73064b021edc5..9c725a4c73613 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -454,8 +454,10 @@ class node_impl { sycl::detail::CGFillUSM *FillUSM = static_cast(MCommandGroup.get()); Stream << "Dst: " << FillUSM->getDst() - << " Length: " << FillUSM->getLength() - << " Pattern: " << FillUSM->getFill() << "\\n"; + << " Length: " << FillUSM->getLength() << " Pattern: "; + for (auto byte : FillUSM->getPattern()) + Stream << byte; + Stream << "\\n"; } break; case sycl::detail::CG::CGTYPE::PrefetchUSM: diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index c0af141935bd5..e799533572a68 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -942,7 +942,7 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { assert(!SrcQueue->getContextImplPtr()->is_host() && - "Host queue not supported in fill_usm."); + "Host queue not supported in copy_usm."); if (!Len) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { @@ -978,7 +978,7 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, } void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, - int Pattern, + const std::vector &Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { @@ -1001,14 +1001,14 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); const PluginPtr &Plugin = Queue->getPlugin(); - Plugin->call( - Queue->getHandleRef(), Mem, Pattern, Length, DepEvents.size(), - DepEvents.data(), OutEvent); + Plugin->call( + Queue->getHandleRef(), Mem, Pattern.data(), Pattern.size(), Length, + DepEvents.size(), DepEvents.data(), OutEvent); } // TODO: This function will remain until ABI-breaking change void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, - int Pattern, + const std::vector &Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent) { MemoryManager::fill_usm(Mem, Queue, Length, Pattern, DepEvents, OutEvent, @@ -1677,7 +1677,8 @@ void MemoryManager::ext_oneapi_copy_usm_cmd_buffer( void MemoryManager::ext_oneapi_fill_usm_cmd_buffer( sycl::detail::ContextImplPtr Context, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem, - size_t Len, int Pattern, std::vector Deps, + size_t Len, const std::vector &Pattern, + std::vector Deps, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { if (!DstMem) @@ -1685,10 +1686,9 @@ void MemoryManager::ext_oneapi_fill_usm_cmd_buffer( PI_ERROR_INVALID_VALUE); const PluginPtr &Plugin = Context->getPlugin(); - // Pattern is interpreted as an unsigned char so pattern size is always 1. - size_t PatternSize = 1; + Plugin->call( - CommandBuffer, DstMem, &Pattern, PatternSize, Len, Deps.size(), + CommandBuffer, DstMem, Pattern.data(), Pattern.size(), Len, Deps.size(), Deps.data(), OutSyncPoint); } diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp index 1d2800bf9dadc..858e7232380ac 100644 --- a/sycl/source/detail/memory_manager.hpp +++ b/sycl/source/detail/memory_manager.hpp @@ -172,14 +172,14 @@ class __SYCL_EXPORT MemoryManager { sycl::detail::pi::PiEvent *OutEvent); static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, - int Pattern, + const std::vector &Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); // TODO: This function will remain until ABI-breaking change static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, - int Pattern, + const std::vector &Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent); @@ -319,7 +319,7 @@ class __SYCL_EXPORT MemoryManager { static void ext_oneapi_fill_usm_cmd_buffer( sycl::detail::ContextImplPtr Context, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem, - size_t Len, int Pattern, + size_t Len, const std::vector &Pattern, std::vector Deps, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 321cc48b29769..28a5b61a5c82e 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -121,11 +121,11 @@ event queue_impl::memset(const std::shared_ptr &Self, // Emit a begin/end scope for this call PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin); #endif - + const std::vector Pattern{static_cast(Value)}; return submitMemOpHelper( Self, DepEvents, [&](handler &CGH) { CGH.memset(Ptr, Value, Count); }, [](const auto &...Args) { MemoryManager::fill_usm(Args...); }, Ptr, Self, - Count, Value); + Count, Pattern); } void report(const code_location &CodeLoc) { diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index efc553cdb97e2..85b867319b1db 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2847,7 +2847,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get(); MemoryManager::ext_oneapi_fill_usm_cmd_buffer( MQueue->getContextImplPtr(), MCommandBuffer, Fill->getDst(), - Fill->getLength(), Fill->getFill(), std::move(MSyncPointDeps), + Fill->getLength(), Fill->getPattern(), std::move(MSyncPointDeps), &OutSyncPoint); MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; @@ -3041,7 +3041,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { case CG::CGTYPE::FillUSM: { CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get(); MemoryManager::fill_usm(Fill->getDst(), MQueue, Fill->getLength(), - Fill->getFill(), std::move(RawEvents), Event, + Fill->getPattern(), std::move(RawEvents), Event, MEvent); return PI_SUCCESS; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 5b40f1b3b07c3..83de1c17bb888 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -971,6 +971,16 @@ void handler::mem_advise(const void *Ptr, size_t Count, int Advice) { setType(detail::CG::AdviseUSM); } +void handler::fill_impl(void *Dest, const void *Value, size_t ValueSize, + size_t Count) { + throwIfActionIsCreated(); + MDstPtr = Dest; + MPattern.resize(ValueSize); + std::memcpy(MPattern.data(), Value, ValueSize); + MLength = Count * ValueSize; + setType(detail::CG::FillUSM); +} + void handler::ext_oneapi_memcpy2d_impl(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height) { diff --git a/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp b/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp index 48ab65c68896c..269706b10321d 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp @@ -12,7 +12,7 @@ // Since it is a warning it is safe to ignore for this test. // // Everything that follows TestQueueOperations() -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // @@ -20,8 +20,7 @@ // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // -// Q.fill don't use piEnqueueMemBufferFill -// CHECK: ---> piEnqueueKernelLaunch( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // @@ -47,7 +46,7 @@ // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // // RegularQueue -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -57,7 +56,7 @@ // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // // Everything that follows TestQueueOperationsViaSubmit() -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // @@ -65,8 +64,7 @@ // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // -// Q.fill don't use piEnqueueMemBufferFill -// CHECK: ---> piEnqueueKernelLaunch( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // @@ -92,7 +90,7 @@ // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // // RegularQueue -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS diff --git a/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp b/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp index 96d53a632beb6..fb7555bab9d9f 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp @@ -12,7 +12,7 @@ // Since it is a warning it is safe to ignore for this test. // // Everything that follows TestQueueOperations() -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -22,8 +22,7 @@ // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS // -// Q.fill don't use piEnqueueMemBufferFill -// CHECK: ---> piEnqueueKernelLaunch( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -58,7 +57,7 @@ // CHECK: ---> pi_result : PI_SUCCESS // // RegularQueue -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -69,7 +68,7 @@ // CHECK: ---> pi_result : PI_SUCCESS // // Everything that follows TestQueueOperationsViaSubmit() -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -79,8 +78,7 @@ // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS // -// Q.fill don't use piEnqueueMemBufferFill -// CHECK: ---> piEnqueueKernelLaunch( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -115,7 +113,7 @@ // CHECK: ---> pi_result : PI_SUCCESS // // RegularQueue -// CHECK: ---> piextUSMEnqueueMemset( +// CHECK: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS diff --git a/sycl/test/abi/pi_cuda_symbol_check.dump b/sycl/test/abi/pi_cuda_symbol_check.dump index ec83769469dcd..a81c70cb3a17c 100644 --- a/sycl/test/abi/pi_cuda_symbol_check.dump +++ b/sycl/test/abi/pi_cuda_symbol_check.dump @@ -151,11 +151,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc +piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/pi_hip_symbol_check.dump b/sycl/test/abi/pi_hip_symbol_check.dump index 975e3315c0197..e748e4054e816 100644 --- a/sycl/test/abi/pi_hip_symbol_check.dump +++ b/sycl/test/abi/pi_hip_symbol_check.dump @@ -151,11 +151,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc +piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index 336e1cd3cdd8e..97c23450e95ba 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -150,11 +150,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc +piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index fa7c7a2dc0525..0d3d00c76c52e 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -81,8 +81,8 @@ piSamplerGetInfo piSamplerRelease piSamplerRetain piTearDown -piextCommandBufferAdviseUSM piextBindlessImageSamplerCreate +piextCommandBufferAdviseUSM piextCommandBufferCreate piextCommandBufferFillUSM piextCommandBufferFinalize @@ -150,11 +150,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc +piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index dffee1588a04a..57cf0f03051f1 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3941,7 +3941,7 @@ _ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyD2D_cmd_bufferESt10shared_ptr _ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyD2H_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPvjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjPcjSC_SE_jSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyH2D_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPcjNS0_5rangeILi3EEENS0_2idILi3EEEjPvjSC_SC_SE_jSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager30ext_oneapi_copy_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEPKvP22_pi_ext_command_buffermPvSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager30ext_oneapi_fill_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmiSt6vectorIjSaIjEEPj +_ZN4sycl3_V16detail13MemoryManager30ext_oneapi_fill_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmRKSt6vectorIcSaIcEES9_IjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager32ext_oneapi_advise_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPKvm14_pi_mem_adviceSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager34ext_oneapi_prefetch_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager3mapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEENS0_6access4modeEjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_ @@ -3954,8 +3954,8 @@ _ZN4sycl3_V16detail13MemoryManager7releaseESt10shared_ptrINS1_12context_implEEPN _ZN4sycl3_V16detail13MemoryManager8allocateESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEbPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event _ZN4sycl3_V16detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EEPSB_ _ZN4sycl3_V16detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EEPS9_ -_ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EEPS9_RKS4_INS1_10event_implEE +_ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmRKSt6vectorIcSaIcEES7_IP9_pi_eventSaISD_EEPSD_ +_ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmRKSt6vectorIcSaIcEES7_IP9_pi_eventSaISD_EEPSD_RKS4_INS1_10event_implEE _ZN4sycl3_V16detail13host_pipe_map3addEPKvPKc _ZN4sycl3_V16detail13make_platformEmNS0_7backendE _ZN4sycl3_V16detail13select_deviceERKSt8functionIFiRKNS0_6deviceEEE @@ -4165,6 +4165,7 @@ _ZN4sycl3_V17handler6memcpyEPvPKvm _ZN4sycl3_V17handler6memsetEPvim _ZN4sycl3_V17handler8finalizeEv _ZN4sycl3_V17handler8prefetchEPKvm +_ZN4sycl3_V17handler9fill_implEPvPKvmm _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEEb diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 2f180817725d4..a3035337d2fcf 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -1058,7 +1058,7 @@ ?ext_oneapi_enable_peer_access@device@_V1@sycl@@QEAAXAEBV123@@Z ?ext_oneapi_fill2d_impl@handler@_V1@sycl@@AEAAXPEAX_KPEBX111@Z ?ext_oneapi_fill_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAVSYCLMemObjI@234@PEAX_KPEBDIV?$range@$02@34@6V?$id@$02@34@IV?$vector@IV?$allocator@I@std@@@6@PEAI@Z -?ext_oneapi_fill_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KHV?$vector@IV?$allocator@I@std@@@6@PEAI@Z +?ext_oneapi_fill_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KAEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_get_composite_devices@platform@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ ?ext_oneapi_get_default_context@platform@_V1@sycl@@QEBA?AVcontext@23@XZ ?ext_oneapi_get_graph@queue@_V1@sycl@@QEBA?AV?$command_graph@$0A@@experimental@oneapi@ext@23@XZ @@ -1109,8 +1109,9 @@ ?fill@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KPEBDIV?$range@$02@34@5V?$id@$02@34@IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@7@@Z ?fill_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22AEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?fill_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22AEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z -?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KHV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z -?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KHV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z +?fill_impl@handler@_V1@sycl@@AEAAXPEAXPEBX_K2@Z +?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KAEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z +?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KAEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?finalize@handler@_V1@sycl@@AEAA?AVevent@23@XZ ?finalize@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$command_graph@$00@34567@AEBVproperty_list@67@@Z ?finalizeImpl@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXXZ diff --git a/sycl/tools/xpti_helpers/usm_analyzer.hpp b/sycl/tools/xpti_helpers/usm_analyzer.hpp index 9cdf27dd98ddc..1eee3474c51ed 100644 --- a/sycl/tools/xpti_helpers/usm_analyzer.hpp +++ b/sycl/tools/xpti_helpers/usm_analyzer.hpp @@ -214,8 +214,8 @@ class USMAnalyzer { USMAnalyzer::handleUSMSharedAlloc); ArgHandlerPreCall.set_piextUSMFree(USMAnalyzer::handleUSMFree); ArgHandlerPreCall.set_piMemBufferCreate(USMAnalyzer::handleMemBufferCreate); - ArgHandlerPreCall.set_piextUSMEnqueueMemset( - USMAnalyzer::handleUSMEnqueueMemset); + ArgHandlerPreCall.set_piextUSMEnqueueFill( + USMAnalyzer::handleUSMEnqueueFill); ArgHandlerPreCall.set_piextUSMEnqueueMemcpy( USMAnalyzer::handleUSMEnqueueMemcpy); ArgHandlerPreCall.set_piextUSMEnqueuePrefetch( @@ -350,11 +350,11 @@ class USMAnalyzer { } } - static void handleUSMEnqueueMemset(const pi_plugin &, - std::optional, pi_queue, - void *ptr, pi_int32, size_t numBytes, - pi_uint32, const pi_event *, pi_event *) { - CheckPointerValidness("input parameter", ptr, numBytes, "memset"); + static void handleUSMEnqueueFill(const pi_plugin &, std::optional, + pi_queue, void *ptr, const void *, size_t, + size_t numBytes, pi_uint32, const pi_event *, + pi_event *) { + CheckPointerValidness("input parameter", ptr, numBytes, "fill"); } static void handleUSMEnqueueMemcpy(const pi_plugin &, diff --git a/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp b/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp index 27967973c1363..3860833256048 100644 --- a/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp +++ b/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp @@ -75,9 +75,9 @@ pi_result redefinedEventGetInfo(pi_event event, pi_event_info param_name, return PI_SUCCESS; } -static pi_result redefinedUSMEnqueueMemset(pi_queue, void *, pi_int32, size_t, - pi_uint32, const pi_event *, - pi_event *event) { +static pi_result redefinedUSMEnqueueMemset(pi_queue, void *, const void *, + size_t, size_t, pi_uint32, + const pi_event *, pi_event *event) { *event = reinterpret_cast(new int{}); return PI_SUCCESS; } @@ -97,7 +97,7 @@ TEST(GetNative, GetNativeHandle) { Mock.redefineBefore(redefinedMemRetain); Mock.redefineBefore( redefinedMemBufferCreate); - Mock.redefineBefore( + Mock.redefineBefore( redefinedUSMEnqueueMemset); context Context(Plt); diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index be777f23df239..ce1ed45c7aa6d 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1199,11 +1199,12 @@ inline pi_result mock_piextUSMFree(pi_context context, void *ptr) { return PI_SUCCESS; } -inline pi_result mock_piextUSMEnqueueMemset(pi_queue queue, void *ptr, - pi_int32 value, size_t count, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { +inline pi_result mock_piextUSMEnqueueFill(pi_queue queue, void *ptr, + const void *pattern, + size_t patternSize, size_t count, + pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, + pi_event *event) { *event = createDummyHandle(); return PI_SUCCESS; } diff --git a/sycl/unittests/queue/USM.cpp b/sycl/unittests/queue/USM.cpp index 30cdd2b37393e..03ff8c538d7ed 100644 --- a/sycl/unittests/queue/USM.cpp +++ b/sycl/unittests/queue/USM.cpp @@ -48,8 +48,8 @@ pi_result redefinedUSMEnqueueMemcpyAfter(pi_queue, pi_bool, void *, return PI_SUCCESS; } -pi_result redefinedUSMEnqueueMemsetAfter(pi_queue, void *, pi_int32, size_t, - pi_uint32, const pi_event *, +pi_result redefinedUSMEnqueueMemsetAfter(pi_queue, void *, const void *, size_t, + size_t, pi_uint32, const pi_event *, pi_event *Event) { // Set MEMSET to the event produced by the original USMEnqueueMemcpy MEMSET = *Event; @@ -64,7 +64,7 @@ TEST(USM, NoOpPreservesDependencyChain) { redefinedEnqueueEventsWaitAfter); Mock.redefineAfter( redefinedUSMEnqueueMemcpyAfter); - Mock.redefineAfter( + Mock.redefineAfter( redefinedUSMEnqueueMemsetAfter); context Ctx{Plt.get_devices()[0]}; diff --git a/sycl/unittests/queue/Wait.cpp b/sycl/unittests/queue/Wait.cpp index 8b2d72055d847..f9ec3fb6083ac 100644 --- a/sycl/unittests/queue/Wait.cpp +++ b/sycl/unittests/queue/Wait.cpp @@ -37,7 +37,8 @@ pi_result redefinedQueueCreateEx(pi_context context, pi_device device, return PI_SUCCESS; } -pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, +pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, + const void *Pattern, size_t PatternSize, size_t Count, pi_uint32 Num_events_in_waitlist, const pi_event *Events_waitlist, @@ -88,7 +89,7 @@ TEST(QueueWait, QueueWaitTest) { Mock.redefineBefore( redefinedQueueCreateEx); Mock.redefineBefore(redefinedQueueFinish); - Mock.redefineBefore( + Mock.redefineBefore( redefinedUSMEnqueueMemset); Mock.redefineBefore(redefinedEventsWait); Mock.redefineBefore( diff --git a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp index 8693ff5e4c52b..07d36d479d3c0 100644 --- a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp @@ -61,7 +61,8 @@ inline pi_result customEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, ExecutedCommands.push_back({CommandType::KERNEL, EventsCount}); return PI_SUCCESS; } -inline pi_result customextUSMEnqueueMemset(pi_queue, void *, pi_int32, size_t, +inline pi_result customextUSMEnqueueMemset(pi_queue, void *, const void *, + size_t, size_t, pi_uint32 EventsCount, const pi_event *, pi_event *) { ExecutedCommands.push_back({CommandType::MEMSET, EventsCount}); @@ -73,7 +74,7 @@ TEST_F(SchedulerTest, InOrderQueueCrossDeps) { sycl::unittest::PiMock Mock; Mock.redefineBefore( customEnqueueKernelLaunch); - Mock.redefineBefore( + Mock.redefineBefore( customextUSMEnqueueMemset); sycl::platform Plt = Mock.getPlatform(); @@ -126,7 +127,7 @@ TEST_F(SchedulerTest, InOrderQueueCrossDepsShortcutFuncs) { sycl::unittest::PiMock Mock; Mock.redefineBefore( customEnqueueKernelLaunch); - Mock.redefineBefore( + Mock.redefineBefore( customextUSMEnqueueMemset); sycl::platform Plt = Mock.getPlatform(); diff --git a/sycl/unittests/xpti_trace/QueueApiFailures.cpp b/sycl/unittests/xpti_trace/QueueApiFailures.cpp index 88c5fae49394c..c634f2dfce299 100644 --- a/sycl/unittests/xpti_trace/QueueApiFailures.cpp +++ b/sycl/unittests/xpti_trace/QueueApiFailures.cpp @@ -145,7 +145,8 @@ TEST_F(QueueApiFailures, QueueSingleTask) { EXPECT_FALSE(queryReceivedNotifications(TraceType, Message)); } -pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, +pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, + const void *Pattern, size_t PatternSize, size_t Count, pi_uint32 Num_events_in_waitlist, const pi_event *Events_waitlist, @@ -154,7 +155,7 @@ pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, } TEST_F(QueueApiFailures, QueueMemset) { - MockPlugin.redefine( + MockPlugin.redefine( redefinedUSMEnqueueMemset); MockPlugin.redefine( redefinedPluginGetLastError); @@ -241,18 +242,17 @@ TEST_F(QueueApiFailures, QueueCopy) { EXPECT_FALSE(queryReceivedNotifications(TraceType, Message)); } -pi_result redefinedEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, - const void *Pattern, size_t PatternSize, - size_t Offset, size_t Size, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { +pi_result redefinedUSMEnqueueFill(pi_queue Queue, void *Ptr, + const void *Pattern, size_t PatternSize, + size_t Count, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } TEST_F(QueueApiFailures, QueueFill) { - MockPlugin.redefine( - redefinedEnqueueMemBufferFill); + MockPlugin.redefine( + redefinedUSMEnqueueFill); MockPlugin.redefine( redefinedPluginGetLastError); sycl::queue Q; From bd59ffced44c53848dea9ca4f746b53f2fb6bd29 Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Wed, 28 Feb 2024 13:04:38 -0500 Subject: [PATCH 02/26] Added overload to fill usm and pointed to UR PR tag --- sycl/plugins/unified_runtime/CMakeLists.txt | 10 ++-------- sycl/source/detail/memory_manager.cpp | 16 +++++++++++++++- sycl/source/detail/memory_manager.hpp | 9 ++++++++- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 0eebfa5a02014..6678d0f16829b 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -81,14 +81,8 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) CACHE PATH "Path to external '${name}' adapter source dir" FORCE) endfunction() - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit ed1f8bf618c88eaabea6bde0f6c06fc265f3b49f - # Merge: ca5c3421 69c43b45 - # Author: Kenneth Benzie (Benie) - # Date: Tue Mar 19 21:00:20 2024 +0000 - # Merge pull request #1326 from hdelan/refactor-guess-local-worksize - # [CUDA][HIP] Fix bug in guess local worksize funcs and improve local worksize guessing in HIP adapter - set(UNIFIED_RUNTIME_TAG ed1f8bf618c88eaabea6bde0f6c06fc265f3b49f) + set(UNIFIED_RUNTIME_REPO "https://github.com/konradkusiak97/unified-runtime.git") + set(UNIFIED_RUNTIME_TAG URChangesForFillCombined) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index e799533572a68..4cad9353c7b9b 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -1008,9 +1008,23 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, // TODO: This function will remain until ABI-breaking change void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, - const std::vector &Pattern, + int Pattern, + std::vector DepEvents, + sycl::detail::pi::PiEvent *OutEvent, + const detail::EventImplPtr &OutEventImpl) { + std::vector vecPattern(sizeof(Pattern)); + std::memcpy(vecPattern.data(), &Pattern, sizeof(Pattern)); + MemoryManager::fill_usm(Mem, Queue, Length, Pattern, DepEvents, OutEvent, + OutEventImpl); +} + +// TODO: This function will remain until ABI-breaking change +void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, + int Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent) { + std::vector vecPattern(sizeof(Pattern)); + std::memcpy(vecPattern.data(), &Pattern, sizeof(Pattern)); MemoryManager::fill_usm(Mem, Queue, Length, Pattern, DepEvents, OutEvent, nullptr); // OutEventImpl); } diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp index 858e7232380ac..0636fe6f3d1c0 100644 --- a/sycl/source/detail/memory_manager.hpp +++ b/sycl/source/detail/memory_manager.hpp @@ -179,7 +179,14 @@ class __SYCL_EXPORT MemoryManager { // TODO: This function will remain until ABI-breaking change static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, - const std::vector &Pattern, + int Pattern, + std::vector DepEvents, + sycl::detail::pi::PiEvent *OutEvent, + const detail::EventImplPtr &OutEventImpl); + + // TODO: This function will remain until ABI-breaking change + static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, + int Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent); From c0a5439f500accc603a1dea1a91e4bbfdc3b473a Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Thu, 29 Feb 2024 18:00:51 +0000 Subject: [PATCH 03/26] Fixed handling ABI breaking interfaces --- sycl/plugins/unified_runtime/CMakeLists.txt | 2 +- sycl/source/detail/memory_manager.cpp | 4 +- sycl/source/handler.cpp | 3 +- sycl/test/abi/sycl_symbols_linux.dump | 3 +- sycl/unittests/Extensions/CommandGraph.cpp | 2547 +++++++++++++++++++ 5 files changed, 2554 insertions(+), 5 deletions(-) create mode 100644 sycl/unittests/Extensions/CommandGraph.cpp diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 6678d0f16829b..387458e7ee8ba 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -82,7 +82,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/konradkusiak97/unified-runtime.git") - set(UNIFIED_RUNTIME_TAG URChangesForFillCombined) + set(UNIFIED_RUNTIME_TAG e400d8f37e6ccef65ff62491b417fc43b8c659ce) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 4cad9353c7b9b..9b8d94d557f4c 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -1014,7 +1014,7 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, const detail::EventImplPtr &OutEventImpl) { std::vector vecPattern(sizeof(Pattern)); std::memcpy(vecPattern.data(), &Pattern, sizeof(Pattern)); - MemoryManager::fill_usm(Mem, Queue, Length, Pattern, DepEvents, OutEvent, + MemoryManager::fill_usm(Mem, Queue, Length, vecPattern, DepEvents, OutEvent, OutEventImpl); } @@ -1025,7 +1025,7 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, sycl::detail::pi::PiEvent *OutEvent) { std::vector vecPattern(sizeof(Pattern)); std::memcpy(vecPattern.data(), &Pattern, sizeof(Pattern)); - MemoryManager::fill_usm(Mem, Queue, Length, Pattern, DepEvents, OutEvent, + MemoryManager::fill_usm(Mem, Queue, Length, vecPattern, DepEvents, OutEvent, nullptr); // OutEventImpl); } diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 83de1c17bb888..b66fae23a1249 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -952,7 +952,7 @@ void handler::memset(void *Dest, int Value, size_t Count) { MDstPtr = Dest; MPattern.push_back(static_cast(Value)); MLength = Count; - setUserFacingNodeType(ext::oneapi::experimental::node_type::memset); + setUserFacingNodeType(ext::oneapi::experimental::node_type::memfill); setType(detail::CG::FillUSM); } @@ -978,6 +978,7 @@ void handler::fill_impl(void *Dest, const void *Value, size_t ValueSize, MPattern.resize(ValueSize); std::memcpy(MPattern.data(), Value, ValueSize); MLength = Count * ValueSize; + setUserFacingNodeType(ext::oneapi::experimental::node_type::memfill); setType(detail::CG::FillUSM); } diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 57cf0f03051f1..115a8d55711e6 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3954,8 +3954,9 @@ _ZN4sycl3_V16detail13MemoryManager7releaseESt10shared_ptrINS1_12context_implEEPN _ZN4sycl3_V16detail13MemoryManager8allocateESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEbPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event _ZN4sycl3_V16detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EEPSB_ _ZN4sycl3_V16detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmRKSt6vectorIcSaIcEES7_IP9_pi_eventSaISD_EEPSD_ _ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmRKSt6vectorIcSaIcEES7_IP9_pi_eventSaISD_EEPSD_RKS4_INS1_10event_implEE +_ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EEPS9_ +_ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EEPS9_RKS4_INS1_10event_implEE _ZN4sycl3_V16detail13host_pipe_map3addEPKvPKc _ZN4sycl3_V16detail13make_platformEmNS0_7backendE _ZN4sycl3_V16detail13select_deviceERKSt8functionIFiRKNS0_6deviceEEE diff --git a/sycl/unittests/Extensions/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph.cpp new file mode 100644 index 0000000000000..c583d64e77927 --- /dev/null +++ b/sycl/unittests/Extensions/CommandGraph.cpp @@ -0,0 +1,2547 @@ +//==--------------------- CommandGraph.cpp -------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "sycl/ext/oneapi/experimental/graph.hpp" +#include + +#include "../thread_safety/ThreadUtils.h" +#include "detail/graph_impl.hpp" + +#include +#include +#include +#include + +#include + +using namespace sycl; +using namespace sycl::ext::oneapi; + +// anonymous namespace used to avoid code redundancy by defining functions +// used by multiple times by unitests. +// Defining anonymous namespace prevents from function naming conflits +namespace { +/// Submits four kernels with diamond dependency to the queue Q +/// @param Q Queue to submit nodes to. +void runKernels(queue Q) { + auto NodeA = Q.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto NodeB = Q.submit([&](sycl::handler &cgh) { + cgh.depends_on(NodeA); + cgh.single_task>([]() {}); + }); + auto NodeC = Q.submit([&](sycl::handler &cgh) { + cgh.depends_on(NodeA); + cgh.single_task>([]() {}); + }); + auto NodeD = Q.submit([&](sycl::handler &cgh) { + cgh.depends_on({NodeB, NodeC}); + cgh.single_task>([]() {}); + }); +} + +/// Submits four kernels without any additional dependencies the queue Q +/// @param Q Queue to submit nodes to. +void runKernelsInOrder(queue Q) { + auto NodeA = Q.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto NodeB = Q.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto NodeC = Q.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto NodeD = Q.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); +} + +/// Adds four kernels with diamond dependency to the Graph G +/// @param G Modifiable graph to add commands to. +void addKernels( + experimental::command_graph G) { + auto NodeA = G.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto NodeB = + G.add([&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(NodeA)}); + auto NodeC = + G.add([&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(NodeA)}); + auto NodeD = + G.add([&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(NodeB, NodeC)}); +} + +bool checkExecGraphSchedule( + std::shared_ptr + GraphA, + std::shared_ptr + GraphB) { + auto ScheduleA = GraphA->getSchedule(); + auto ScheduleB = GraphB->getSchedule(); + if (ScheduleA.size() != ScheduleB.size()) + return false; + + std::vector< + std::shared_ptr> + VScheduleA{std::begin(ScheduleA), std::end(ScheduleA)}; + std::vector< + std::shared_ptr> + VScheduleB{std::begin(ScheduleB), std::end(ScheduleB)}; + + for (size_t i = 0; i < VScheduleA.size(); i++) { + if (!VScheduleA[i]->isSimilar(VScheduleB[i])) + return false; + } + return true; +} + +/// Define the three possible path to add node to a SYCL Graph. +/// Shortcut is a sub-type of Record&Replay using Queue shortcut +/// instead of standard kernel submitions. +enum OperationPath { Explicit, RecordReplay, Shortcut }; + +/// Tries to add a memcpy2D node to the graph G +/// It tests that an invalid exception has been thrown +/// Since sycl_ext_oneapi_memcpy2d extension can not be used +/// along with SYCL Graph. +/// +/// @param G Modifiable graph to add commands to. +/// @param Q Queue to submit nodes to. +/// @param Dest Pointer to the memory destination +/// @param DestPitch pitch at the destination +/// @param Src Pointer to the memory source +/// @param SrcPitch pitch at the source +/// @param Witdh width of the data to copy +/// @param Height height of the data to copy +template +void addMemcpy2D(experimental::detail::modifiable_command_graph &G, queue &Q, + void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, + size_t Width, size_t Height) { + std::error_code ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit([&](handler &CGH) { + CGH.ext_oneapi_memcpy2d(Dest, DestPitch, Src, SrcPitch, Width, Height); + }); + } + if constexpr (PathKind == OperationPath::Shortcut) { + Q.ext_oneapi_memcpy2d(Dest, DestPitch, Src, SrcPitch, Width, Height); + } + if constexpr (PathKind == OperationPath::Explicit) { + G.add([&](handler &CGH) { + CGH.ext_oneapi_memcpy2d(Dest, DestPitch, Src, SrcPitch, Width, Height); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); +} + +/// Tries to add nodes including images bindless copy instructions +/// to the graph G. It tests that an invalid exception has been thrown +/// Since sycl_ext_oneapi_bindless_images extension can not be used +/// along with SYCL Graph. +/// +/// @param G Modifiable graph to add commands to. +/// @param Q Queue to submit nodes to. +/// @param Img Image memory +/// @param HostData Host Pointer to the memory +/// @param ImgUSM USM Pointer to Image memory +/// @param Pitch image pitch +/// @param Desc Image descriptor +template +void addImagesCopies(experimental::detail::modifiable_command_graph &G, + queue &Q, sycl::ext::oneapi::experimental::image_mem Img, + std::vector HostData, void *ImgUSM, + size_t Pitch, + sycl::ext::oneapi::experimental::image_descriptor Desc) { + // simple copy Host to Device + std::error_code ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit([&](handler &CGH) { + CGH.ext_oneapi_copy(HostData.data(), Img.get_handle(), Desc); + }); + } + if constexpr (PathKind == OperationPath::Shortcut) { + Q.ext_oneapi_copy(HostData.data(), Img.get_handle(), Desc); + } + if constexpr (PathKind == OperationPath::Explicit) { + G.add([&](handler &CGH) { + CGH.ext_oneapi_copy(HostData.data(), Img.get_handle(), Desc); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + // simple copy Device to Host + ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit([&](handler &CGH) { + CGH.ext_oneapi_copy(Img.get_handle(), HostData.data(), Desc); + }); + } + if constexpr (PathKind == OperationPath::Shortcut) { + Q.ext_oneapi_copy(Img.get_handle(), HostData.data(), Desc); + } + if constexpr (PathKind == OperationPath::Explicit) { + G.add([&](handler &CGH) { + CGH.ext_oneapi_copy(Img.get_handle(), HostData.data(), Desc); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + // simple copy Host to Device USM + ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit([&](handler &CGH) { + CGH.ext_oneapi_copy(HostData.data(), ImgUSM, Desc, Pitch); + }); + } + if constexpr (PathKind == OperationPath::Shortcut) { + Q.ext_oneapi_copy(HostData.data(), ImgUSM, Desc, Pitch); + } + if constexpr (PathKind == OperationPath::Explicit) { + G.add([&](handler &CGH) { + CGH.ext_oneapi_copy(HostData.data(), ImgUSM, Desc, Pitch); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + // subregion copy Host to Device + ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit([&](handler &CGH) { + CGH.ext_oneapi_copy(HostData.data(), {0, 0, 0}, {0, 0, 0}, + Img.get_handle(), {0, 0, 0}, Desc, {0, 0, 0}); + }); + } + if constexpr (PathKind == OperationPath::Shortcut) { + Q.ext_oneapi_copy(HostData.data(), {0, 0, 0}, {0, 0, 0}, Img.get_handle(), + {0, 0, 0}, Desc, {0, 0, 0}); + } + if constexpr (PathKind == OperationPath::Explicit) { + G.add([&](handler &CGH) { + CGH.ext_oneapi_copy(HostData.data(), {0, 0, 0}, {0, 0, 0}, + Img.get_handle(), {0, 0, 0}, Desc, {0, 0, 0}); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + // subregion copy Device to Host + ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit([&](handler &CGH) { + CGH.ext_oneapi_copy(Img.get_handle(), {0, 0, 0}, Desc, HostData.data(), + {0, 0, 0}, {0, 0, 0}, {0, 0, 0}); + }); + } + if constexpr (PathKind == OperationPath::Shortcut) { + Q.ext_oneapi_copy(Img.get_handle(), {0, 0, 0}, Desc, HostData.data(), + {0, 0, 0}, {0, 0, 0}, {0, 0, 0}); + } + if constexpr (PathKind == OperationPath::Explicit) { + G.add([&](handler &CGH) { + CGH.ext_oneapi_copy(Img.get_handle(), {0, 0, 0}, Desc, HostData.data(), + {0, 0, 0}, {0, 0, 0}, {0, 0, 0}); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + // subregion copy Host to Device USM + ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit([&](handler &CGH) { + CGH.ext_oneapi_copy(HostData.data(), {0, 0, 0}, ImgUSM, {0, 0, 0}, Desc, + Pitch, {0, 0, 0}, {0, 0, 0}); + }); + } + if constexpr (PathKind == OperationPath::Shortcut) { + Q.ext_oneapi_copy(HostData.data(), {0, 0, 0}, ImgUSM, {0, 0, 0}, Desc, + Pitch, {0, 0, 0}, {0, 0, 0}); + } + if constexpr (PathKind == OperationPath::Explicit) { + G.add([&](handler &CGH) { + CGH.ext_oneapi_copy(HostData.data(), {0, 0, 0}, ImgUSM, {0, 0, 0}, Desc, + Pitch, {0, 0, 0}, {0, 0, 0}); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); +} +} // anonymous namespace + +class CommandGraphTest : public ::testing::Test { +public: + CommandGraphTest() + : Mock{}, Plat{Mock.getPlatform()}, Dev{Plat.get_devices()[0]}, + Queue{Dev}, + Graph{Queue.get_context(), + Dev, + {experimental::property::graph::assume_buffer_outlives_graph{}}} { + } + +protected: + void SetUp() override {} + +protected: + unittest::PiMock Mock; + sycl::platform Plat; + sycl::device Dev; + sycl::queue Queue; + experimental::command_graph Graph; +}; + +TEST_F(CommandGraphTest, QueueState) { + experimental::queue_state State = Queue.ext_oneapi_get_state(); + ASSERT_EQ(State, experimental::queue_state::executing); + + experimental::command_graph Graph{Queue.get_context(), Queue.get_device()}; + Graph.begin_recording(Queue); + State = Queue.ext_oneapi_get_state(); + ASSERT_EQ(State, experimental::queue_state::recording); + + Graph.end_recording(); + State = Queue.ext_oneapi_get_state(); + ASSERT_EQ(State, experimental::queue_state::executing); +} + +TEST_F(CommandGraphTest, AddNode) { + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + + ASSERT_TRUE(GraphImpl->MRoots.empty()); + + auto Node1 = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + ASSERT_NE(sycl::detail::getSyclObjImpl(Node1), nullptr); + ASSERT_FALSE(sycl::detail::getSyclObjImpl(Node1)->isEmpty()); + ASSERT_EQ(GraphImpl->MRoots.size(), 1lu); + ASSERT_EQ((*GraphImpl->MRoots.begin()).lock(), + sycl::detail::getSyclObjImpl(Node1)); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node1)->MSuccessors.empty()); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node1)->MPredecessors.empty()); + + // Add a node which depends on the first + auto Node2Deps = experimental::property::node::depends_on(Node1); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node2Deps.get_dependencies().front()), + sycl::detail::getSyclObjImpl(Node1)); + auto Node2 = Graph.add([&](sycl::handler &cgh) {}, {Node2Deps}); + ASSERT_NE(sycl::detail::getSyclObjImpl(Node2), nullptr); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node2)->isEmpty()); + ASSERT_EQ(GraphImpl->MRoots.size(), 1lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node1)->MSuccessors.size(), 1lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node1)->MSuccessors.front().lock(), + sycl::detail::getSyclObjImpl(Node2)); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node1)->MPredecessors.empty()); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node2)->MPredecessors.size(), 1lu); + + // Add a third node which depends on both + auto Node3 = + Graph.add([&](sycl::handler &cgh) {}, + {experimental::property::node::depends_on(Node1, Node2)}); + ASSERT_NE(sycl::detail::getSyclObjImpl(Node3), nullptr); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node3)->isEmpty()); + ASSERT_EQ(GraphImpl->MRoots.size(), 1lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node1)->MSuccessors.size(), 2lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node2)->MSuccessors.size(), 1lu); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node1)->MPredecessors.empty()); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node2)->MPredecessors.size(), 1lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node3)->MPredecessors.size(), 2lu); + + // Add a fourth node without any dependencies on the others + auto Node4 = Graph.add([&](sycl::handler &cgh) {}); + ASSERT_NE(sycl::detail::getSyclObjImpl(Node4), nullptr); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node4)->isEmpty()); + ASSERT_EQ(GraphImpl->MRoots.size(), 2lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node1)->MSuccessors.size(), 2lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node2)->MSuccessors.size(), 1lu); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node3)->MSuccessors.empty()); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node1)->MPredecessors.empty()); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node2)->MPredecessors.size(), 1lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node3)->MPredecessors.size(), 2lu); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node4)->MPredecessors.empty()); +} + +TEST_F(CommandGraphTest, Finalize) { + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + + sycl::buffer Buf(1); + auto Node1 = Graph.add([&](sycl::handler &cgh) { + sycl::accessor A(Buf, cgh, sycl::write_only, sycl::no_init); + cgh.single_task>([]() {}); + }); + + // Add independent node + auto Node2 = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + // Add a node that depends on Node1 due to the accessor + auto Node3 = Graph.add([&](sycl::handler &cgh) { + sycl::accessor A(Buf, cgh, sycl::write_only, sycl::no_init); + cgh.single_task>([]() {}); + }); + + // Guarantee order of independent nodes 1 and 2 + Graph.make_edge(Node2, Node1); + + auto GraphExec = Graph.finalize(); + auto GraphExecImpl = sycl::detail::getSyclObjImpl(GraphExec); + + // The final schedule should contain three nodes in order: 2->1->3 + auto Schedule = GraphExecImpl->getSchedule(); + ASSERT_EQ(Schedule.size(), 3ul); + auto ScheduleIt = Schedule.begin(); + ASSERT_TRUE((*ScheduleIt)->isSimilar(sycl::detail::getSyclObjImpl(Node2))); + ScheduleIt++; + ASSERT_TRUE((*ScheduleIt)->isSimilar(sycl::detail::getSyclObjImpl(Node1))); + ScheduleIt++; + ASSERT_TRUE((*ScheduleIt)->isSimilar(sycl::detail::getSyclObjImpl(Node3))); + ASSERT_EQ(Queue.get_context(), GraphExecImpl->getContext()); +} + +TEST_F(CommandGraphTest, MakeEdge) { + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + + // Add two independent nodes + auto Node1 = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2 = Graph.add([&](sycl::handler &cgh) {}); + ASSERT_EQ(GraphImpl->MRoots.size(), 2ul); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node1)->MSuccessors.empty()); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node1)->MPredecessors.empty()); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node2)->MSuccessors.empty()); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node2)->MPredecessors.empty()); + + // Connect nodes and verify order + Graph.make_edge(Node1, Node2); + ASSERT_EQ(GraphImpl->MRoots.size(), 1ul); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node1)->MSuccessors.size(), 1lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node1)->MSuccessors.front().lock(), + sycl::detail::getSyclObjImpl(Node2)); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node1)->MPredecessors.empty()); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node2)->MSuccessors.empty()); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node2)->MPredecessors.size(), 1lu); +} + +TEST_F(CommandGraphTest, BeginEndRecording) { + sycl::queue Queue2{Queue.get_context(), Dev}; + + // Test throwing behaviour + // Check we can repeatedly begin recording on the same queues + ASSERT_NO_THROW(Graph.begin_recording(Queue)); + ASSERT_NO_THROW(Graph.begin_recording(Queue)); + ASSERT_NO_THROW(Graph.begin_recording(Queue2)); + ASSERT_NO_THROW(Graph.begin_recording(Queue2)); + // Check we can repeatedly end recording on the same queues + ASSERT_NO_THROW(Graph.end_recording(Queue)); + ASSERT_NO_THROW(Graph.end_recording(Queue)); + ASSERT_NO_THROW(Graph.end_recording(Queue2)); + ASSERT_NO_THROW(Graph.end_recording(Queue2)); + // Vector versions + ASSERT_NO_THROW(Graph.begin_recording({Queue, Queue2})); + ASSERT_NO_THROW(Graph.begin_recording({Queue, Queue2})); + ASSERT_NO_THROW(Graph.end_recording({Queue, Queue2})); + ASSERT_NO_THROW(Graph.end_recording({Queue, Queue2})); + + experimental::command_graph Graph2(Queue.get_context(), Dev); + + Graph.begin_recording(Queue); + // Trying to record to a second Graph should throw + ASSERT_ANY_THROW(Graph2.begin_recording(Queue)); + // Trying to end when it is recording to a different graph should throw + ASSERT_ANY_THROW(Graph2.end_recording(Queue)); + Graph.end_recording(Queue); + + // Testing return values of begin and end recording + // Queue should change state so should return true here + ASSERT_TRUE(Graph.begin_recording(Queue)); + // But not changed state here + ASSERT_FALSE(Graph.begin_recording(Queue)); + + // Queue2 should change state so should return true here + ASSERT_TRUE(Graph.begin_recording(Queue2)); + // But not changed state here + ASSERT_FALSE(Graph.begin_recording(Queue2)); + + // Queue should have changed state so should return true + ASSERT_TRUE(Graph.end_recording(Queue)); + // But not changed state here + ASSERT_FALSE(Graph.end_recording(Queue)); + + // Should end recording on Queue2 + ASSERT_TRUE(Graph.end_recording()); + // State should not change on Queue2 now + ASSERT_FALSE(Graph.end_recording(Queue2)); + + // Testing vector begin and end + ASSERT_TRUE(Graph.begin_recording({Queue, Queue2})); + // Both shoudl now not have state changed + ASSERT_FALSE(Graph.begin_recording(Queue)); + ASSERT_FALSE(Graph.begin_recording(Queue2)); + + // End recording on both + ASSERT_TRUE(Graph.end_recording({Queue, Queue2})); + // Both shoudl now not have state changed + ASSERT_FALSE(Graph.end_recording(Queue)); + ASSERT_FALSE(Graph.end_recording(Queue2)); + + // First add one single queue + ASSERT_TRUE(Graph.begin_recording(Queue)); + // Vector begin should still return true as Queue2 has state changed + ASSERT_TRUE(Graph.begin_recording({Queue, Queue2})); + // End recording on Queue2 + ASSERT_TRUE(Graph.end_recording(Queue2)); + // Vector end should still return true as Queue will have state changed + ASSERT_TRUE(Graph.end_recording({Queue, Queue2})); +} + +TEST_F(CommandGraphTest, GetCGCopy) { + auto Node1 = Graph.add([&](sycl::handler &cgh) {}); + auto Node2 = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(Node1)}); + + // Get copy of CG of Node2 and check equality + auto Node2Imp = sycl::detail::getSyclObjImpl(Node2); + auto Node2CGCopy = Node2Imp->getCGCopy(); + ASSERT_EQ(Node2CGCopy->getType(), Node2Imp->MCGType); + ASSERT_EQ(Node2CGCopy->getType(), sycl::detail::CG::Kernel); + ASSERT_EQ(Node2CGCopy->getType(), Node2Imp->MCommandGroup->getType()); + ASSERT_EQ(Node2CGCopy->getAccStorage(), + Node2Imp->MCommandGroup->getAccStorage()); + ASSERT_EQ(Node2CGCopy->getArgsStorage(), + Node2Imp->MCommandGroup->getArgsStorage()); + ASSERT_EQ(Node2CGCopy->getEvents(), Node2Imp->MCommandGroup->getEvents()); + ASSERT_EQ(Node2CGCopy->getRequirements(), + Node2Imp->MCommandGroup->getRequirements()); + ASSERT_EQ(Node2CGCopy->getSharedPtrStorage(), + Node2Imp->MCommandGroup->getSharedPtrStorage()); +} +TEST_F(CommandGraphTest, SubGraph) { + // Add sub-graph with two nodes + auto Node1Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(Node1Graph)}); + auto GraphExec = Graph.finalize(); + + // Add node to main graph followed by sub-graph and another node + experimental::command_graph MainGraph(Queue.get_context(), Dev); + auto Node1MainGraph = MainGraph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2MainGraph = + MainGraph.add([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }, + {experimental::property::node::depends_on(Node1MainGraph)}); + auto Node3MainGraph = MainGraph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(Node2MainGraph)}); + + // Assert order of the added sub-graph + ASSERT_NE(sycl::detail::getSyclObjImpl(Node2MainGraph), nullptr); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node2MainGraph)->MNodeType == + experimental::node_type::subgraph); + ASSERT_EQ(sycl::detail::getSyclObjImpl(MainGraph)->MRoots.size(), 1lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node1MainGraph)->MSuccessors.size(), + 1lu); + // Subgraph nodes are duplicated when inserted to parent graph on + // finalization. we thus check the node content only. + const bool CompareContentOnly = true; + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node1MainGraph) + ->MSuccessors.front() + .lock() + ->MNodeType == experimental::node_type::subgraph); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node2MainGraph)->MSuccessors.size(), + 1lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node1MainGraph)->MPredecessors.size(), + 0lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node2MainGraph)->MPredecessors.size(), + 1lu); + + // Finalize main graph and check schedule + auto MainGraphExec = MainGraph.finalize(); + auto MainGraphExecImpl = sycl::detail::getSyclObjImpl(MainGraphExec); + auto Schedule = MainGraphExecImpl->getSchedule(); + auto ScheduleIt = Schedule.begin(); + // The schedule list must contain 4 nodes: the two nodes from the subgraph are + // merged into the main graph in place of the subgraph node. + ASSERT_EQ(Schedule.size(), 4ul); + ASSERT_TRUE( + (*ScheduleIt)->isSimilar(sycl::detail::getSyclObjImpl(Node1MainGraph))); + ScheduleIt++; + ASSERT_TRUE((*ScheduleIt) + ->isSimilar(sycl::detail::getSyclObjImpl(Node1Graph), + CompareContentOnly)); + ScheduleIt++; + ASSERT_TRUE((*ScheduleIt) + ->isSimilar(sycl::detail::getSyclObjImpl(Node2Graph), + CompareContentOnly)); + ScheduleIt++; + ASSERT_TRUE( + (*ScheduleIt)->isSimilar(sycl::detail::getSyclObjImpl(Node3MainGraph))); + ASSERT_EQ(Queue.get_context(), MainGraphExecImpl->getContext()); +} + +TEST_F(CommandGraphTest, SubGraphWithEmptyNode) { + // Add sub-graph with two nodes + auto Node1Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Empty1Graph = + Graph.add([&](sycl::handler &cgh) { /*empty node */ }, + {experimental::property::node::depends_on(Node1Graph)}); + auto Node2Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(Empty1Graph)}); + + auto GraphExec = Graph.finalize(); + + // Add node to main graph followed by sub-graph and another node + experimental::command_graph MainGraph(Queue.get_context(), Dev); + auto Node1MainGraph = MainGraph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2MainGraph = + MainGraph.add([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }, + {experimental::property::node::depends_on(Node1MainGraph)}); + auto Node3MainGraph = MainGraph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(Node2MainGraph)}); + + // Assert order of the added sub-graph + ASSERT_NE(sycl::detail::getSyclObjImpl(Node2MainGraph), nullptr); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node2MainGraph)->MNodeType == + experimental::node_type::subgraph); + // Check the structure of the main graph. + // 1 root connected to 1 successor (the single root of the subgraph) + ASSERT_EQ(sycl::detail::getSyclObjImpl(MainGraph)->MRoots.size(), 1lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node1MainGraph)->MSuccessors.size(), + 1lu); + // Subgraph nodes are duplicated when inserted to parent graph. + // we thus check the node content only. + const bool CompareContentOnly = true; + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node1MainGraph) + ->MSuccessors.front() + .lock() + ->MNodeType == experimental::node_type::subgraph); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node1MainGraph)->MSuccessors.size(), + 1lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node2MainGraph)->MSuccessors.size(), + 1lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node1MainGraph)->MPredecessors.size(), + 0lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node2MainGraph)->MPredecessors.size(), + 1lu); + + // Finalize main graph and check schedule + auto MainGraphExec = MainGraph.finalize(); + auto MainGraphExecImpl = sycl::detail::getSyclObjImpl(MainGraphExec); + auto Schedule = MainGraphExecImpl->getSchedule(); + auto ScheduleIt = Schedule.begin(); + // The schedule list must contain 5 nodes: 2 main graph nodes and 3 subgraph + // nodes which have been merged. + ASSERT_EQ(Schedule.size(), 5ul); + ASSERT_TRUE( + (*ScheduleIt)->isSimilar(sycl::detail::getSyclObjImpl(Node1MainGraph))); + ScheduleIt++; + ASSERT_TRUE((*ScheduleIt) + ->isSimilar(sycl::detail::getSyclObjImpl(Node1Graph), + CompareContentOnly)); + ScheduleIt++; + ASSERT_TRUE((*ScheduleIt)->isEmpty()); // empty node inside the subgraph + ScheduleIt++; + ASSERT_TRUE((*ScheduleIt) + ->isSimilar(sycl::detail::getSyclObjImpl(Node2Graph), + CompareContentOnly)); + ScheduleIt++; + ASSERT_TRUE( + (*ScheduleIt)->isSimilar(sycl::detail::getSyclObjImpl(Node3MainGraph))); + ASSERT_EQ(Queue.get_context(), MainGraphExecImpl->getContext()); +} + +TEST_F(CommandGraphTest, SubGraphWithEmptyNodeLast) { + // Add sub-graph with two nodes + auto Node1Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(Node1Graph)}); + auto EmptyGraph = + Graph.add([&](sycl::handler &cgh) { /*empty node */ }, + {experimental::property::node::depends_on(Node2Graph)}); + + auto GraphExec = Graph.finalize(); + + // Add node to main graph followed by sub-graph and another node + experimental::command_graph MainGraph(Queue.get_context(), Dev); + auto Node1MainGraph = MainGraph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2MainGraph = + MainGraph.add([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }, + {experimental::property::node::depends_on(Node1MainGraph)}); + auto Node3MainGraph = MainGraph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(Node2MainGraph)}); + + // Assert order of the added sub-graph + ASSERT_NE(sycl::detail::getSyclObjImpl(Node2MainGraph), nullptr); + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node2MainGraph)->MNodeType == + experimental::node_type::subgraph); + // Check the structure of the main graph. + // 1 root connected to 1 successor (the single root of the subgraph) + ASSERT_EQ(sycl::detail::getSyclObjImpl(MainGraph)->MRoots.size(), 1lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node1MainGraph)->MSuccessors.size(), + 1lu); + // Subgraph nodes are duplicated when inserted to parent graph. + // we thus check the node content only. + const bool CompareContentOnly = true; + ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node1MainGraph) + ->MSuccessors.front() + .lock() + ->MNodeType == experimental::node_type::subgraph); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node1MainGraph)->MSuccessors.size(), + 1lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node2MainGraph)->MSuccessors.size(), + 1lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node1MainGraph)->MPredecessors.size(), + 0lu); + ASSERT_EQ(sycl::detail::getSyclObjImpl(Node2MainGraph)->MPredecessors.size(), + 1lu); + + // Finalize main graph and check schedule + auto MainGraphExec = MainGraph.finalize(); + auto MainGraphExecImpl = sycl::detail::getSyclObjImpl(MainGraphExec); + auto Schedule = MainGraphExecImpl->getSchedule(); + auto ScheduleIt = Schedule.begin(); + // The schedule list must contain 5 nodes: 2 main graph nodes and 3 subgraph + // nodes which have been merged. + ASSERT_EQ(Schedule.size(), 5ul); + ASSERT_TRUE( + (*ScheduleIt)->isSimilar(sycl::detail::getSyclObjImpl(Node1MainGraph))); + ScheduleIt++; + ASSERT_TRUE((*ScheduleIt) + ->isSimilar(sycl::detail::getSyclObjImpl(Node1Graph), + CompareContentOnly)); + ScheduleIt++; + ASSERT_TRUE((*ScheduleIt) + ->isSimilar(sycl::detail::getSyclObjImpl(Node2Graph), + CompareContentOnly)); + ScheduleIt++; + ASSERT_TRUE((*ScheduleIt)->isEmpty()); // empty node inside the subgraph + ScheduleIt++; + ASSERT_TRUE( + (*ScheduleIt)->isSimilar(sycl::detail::getSyclObjImpl(Node3MainGraph))); + ASSERT_EQ(Queue.get_context(), MainGraphExecImpl->getContext()); +} + +TEST_F(CommandGraphTest, RecordSubGraph) { + // Record sub-graph with two nodes + Graph.begin_recording(Queue); + auto Node1Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2Graph = Queue.submit([&](sycl::handler &cgh) { + cgh.depends_on(Node1Graph); + cgh.single_task>([]() {}); + }); + Graph.end_recording(Queue); + auto GraphExec = Graph.finalize(); + + // Add node to main graph followed by sub-graph and another node + experimental::command_graph MainGraph(Queue.get_context(), Dev); + MainGraph.begin_recording(Queue); + auto Node1MainGraph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2MainGraph = Queue.submit([&](handler &cgh) { + cgh.depends_on(Node1MainGraph); + cgh.ext_oneapi_graph(GraphExec); + }); + auto Node3MainGraph = Queue.submit([&](sycl::handler &cgh) { + cgh.depends_on(Node2MainGraph); + cgh.single_task>([]() {}); + }); + MainGraph.end_recording(Queue); + + // Finalize main graph and check schedule + auto MainGraphExec = MainGraph.finalize(); + auto MainGraphExecImpl = sycl::detail::getSyclObjImpl(MainGraphExec); + auto Schedule = MainGraphExecImpl->getSchedule(); + + // The schedule list must contain 4 nodes: 2 main graph nodes and 2 subgraph + // nodes which have been merged in to the main graph. + ASSERT_EQ(Schedule.size(), 4ul); + + ASSERT_EQ(Queue.get_context(), MainGraphExecImpl->getContext()); +} + +TEST_F(CommandGraphTest, InOrderQueue) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue{Dev, Properties}; + experimental::command_graph + InOrderGraph{InOrderQueue.get_context(), InOrderQueue.get_device()}; + + // Record in-order queue with three nodes + InOrderGraph.begin_recording(InOrderQueue); + auto Node1Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode1 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode1, nullptr); + ASSERT_TRUE(PtrNode1->MPredecessors.empty()); + + auto Node2Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode2 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode2, nullptr); + ASSERT_NE(PtrNode2, PtrNode1); + ASSERT_EQ(PtrNode1->MSuccessors.size(), 1lu); + ASSERT_EQ(PtrNode1->MSuccessors.front().lock(), PtrNode2); + ASSERT_EQ(PtrNode2->MPredecessors.size(), 1lu); + ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); + + auto Node3Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode3 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode3, nullptr); + ASSERT_NE(PtrNode3, PtrNode2); + ASSERT_EQ(PtrNode2->MSuccessors.size(), 1lu); + ASSERT_EQ(PtrNode2->MSuccessors.front().lock(), PtrNode3); + ASSERT_EQ(PtrNode3->MPredecessors.size(), 1lu); + ASSERT_EQ(PtrNode3->MPredecessors.front().lock(), PtrNode2); + + InOrderGraph.end_recording(InOrderQueue); + + // Finalize main graph and check schedule + auto GraphExec = InOrderGraph.finalize(); + auto GraphExecImpl = sycl::detail::getSyclObjImpl(GraphExec); + auto Schedule = GraphExecImpl->getSchedule(); + auto ScheduleIt = Schedule.begin(); + ASSERT_EQ(Schedule.size(), 3ul); + ASSERT_TRUE((*ScheduleIt)->isSimilar(PtrNode1)); + ScheduleIt++; + ASSERT_TRUE((*ScheduleIt)->isSimilar(PtrNode2)); + ScheduleIt++; + ASSERT_TRUE((*ScheduleIt)->isSimilar(PtrNode3)); + ASSERT_EQ(InOrderQueue.get_context(), GraphExecImpl->getContext()); +} + +TEST_F(CommandGraphTest, InOrderQueueWithEmpty) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue{Dev, Properties}; + experimental::command_graph + InOrderGraph{InOrderQueue.get_context(), InOrderQueue.get_device()}; + + // Record in-order queue with a regular node then empty node then a regular + // node + InOrderGraph.begin_recording(InOrderQueue); + auto Node1Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode1 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode1, nullptr); + ASSERT_TRUE(PtrNode1->MPredecessors.empty()); + + auto Node2Graph = InOrderQueue.submit([&](sycl::handler &cgh) {}); + + auto PtrNode2 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode2, nullptr); + ASSERT_NE(PtrNode2, PtrNode1); + ASSERT_EQ(PtrNode1->MSuccessors.size(), 1lu); + ASSERT_EQ(PtrNode1->MSuccessors.front().lock(), PtrNode2); + ASSERT_EQ(PtrNode2->MPredecessors.size(), 1lu); + ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); + + auto Node3Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode3 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode3, nullptr); + ASSERT_NE(PtrNode3, PtrNode2); + ASSERT_EQ(PtrNode2->MSuccessors.size(), 1lu); + ASSERT_EQ(PtrNode2->MSuccessors.front().lock(), PtrNode3); + ASSERT_EQ(PtrNode3->MPredecessors.size(), 1lu); + ASSERT_EQ(PtrNode3->MPredecessors.front().lock(), PtrNode2); + + InOrderGraph.end_recording(InOrderQueue); + + // Finalize main graph and check schedule + // Note that empty nodes are not scheduled + auto GraphExec = InOrderGraph.finalize(); + auto GraphExecImpl = sycl::detail::getSyclObjImpl(GraphExec); + auto Schedule = GraphExecImpl->getSchedule(); + auto ScheduleIt = Schedule.begin(); + // the schedule list contains all types of nodes (even empty nodes) + ASSERT_EQ(Schedule.size(), 3ul); + ASSERT_TRUE((*ScheduleIt)->isSimilar(PtrNode1)); + ScheduleIt++; + ASSERT_TRUE((*ScheduleIt)->isEmpty()); + ScheduleIt++; + ASSERT_TRUE((*ScheduleIt)->isSimilar(PtrNode3)); + ASSERT_EQ(InOrderQueue.get_context(), GraphExecImpl->getContext()); +} + +TEST_F(CommandGraphTest, InOrderQueueWithEmptyFirst) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue{Dev, Properties}; + experimental::command_graph + InOrderGraph{InOrderQueue.get_context(), InOrderQueue.get_device()}; + + // Record in-order queue with an empty node then two regular nodes + InOrderGraph.begin_recording(InOrderQueue); + auto Node1Graph = InOrderQueue.submit([&](sycl::handler &cgh) {}); + + auto PtrNode1 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode1, nullptr); + ASSERT_TRUE(PtrNode1->MPredecessors.empty()); + + auto Node2Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode2 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode2, nullptr); + ASSERT_NE(PtrNode2, PtrNode1); + ASSERT_EQ(PtrNode1->MSuccessors.size(), 1lu); + ASSERT_EQ(PtrNode1->MSuccessors.front().lock(), PtrNode2); + ASSERT_EQ(PtrNode2->MPredecessors.size(), 1lu); + ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); + + auto Node3Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode3 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode3, nullptr); + ASSERT_NE(PtrNode3, PtrNode2); + ASSERT_EQ(PtrNode2->MSuccessors.size(), 1lu); + ASSERT_EQ(PtrNode2->MSuccessors.front().lock(), PtrNode3); + ASSERT_EQ(PtrNode3->MPredecessors.size(), 1lu); + ASSERT_EQ(PtrNode3->MPredecessors.front().lock(), PtrNode2); + + InOrderGraph.end_recording(InOrderQueue); + + // Finalize main graph and check schedule + // Note that empty nodes are not scheduled + auto GraphExec = InOrderGraph.finalize(); + auto GraphExecImpl = sycl::detail::getSyclObjImpl(GraphExec); + auto Schedule = GraphExecImpl->getSchedule(); + auto ScheduleIt = Schedule.begin(); + // the schedule list contains all types of nodes (even empty nodes) + ASSERT_EQ(Schedule.size(), 3ul); + ASSERT_TRUE((*ScheduleIt)->isEmpty()); + ScheduleIt++; + ASSERT_TRUE((*ScheduleIt)->isSimilar(PtrNode2)); + ScheduleIt++; + ASSERT_TRUE((*ScheduleIt)->isSimilar(PtrNode3)); + ASSERT_EQ(InOrderQueue.get_context(), GraphExecImpl->getContext()); +} + +TEST_F(CommandGraphTest, InOrderQueueWithEmptyLast) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue{Dev, Properties}; + experimental::command_graph + InOrderGraph{InOrderQueue.get_context(), InOrderQueue.get_device()}; + + // Record in-order queue with two regular nodes then an empty node + InOrderGraph.begin_recording(InOrderQueue); + auto Node1Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode1 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode1, nullptr); + ASSERT_TRUE(PtrNode1->MPredecessors.empty()); + + auto Node2Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode2 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode2, nullptr); + ASSERT_NE(PtrNode2, PtrNode1); + ASSERT_EQ(PtrNode1->MSuccessors.size(), 1lu); + ASSERT_EQ(PtrNode1->MSuccessors.front().lock(), PtrNode2); + ASSERT_EQ(PtrNode2->MPredecessors.size(), 1lu); + ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); + + auto Node3Graph = InOrderQueue.submit([&](sycl::handler &cgh) {}); + + auto PtrNode3 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode3, nullptr); + ASSERT_NE(PtrNode3, PtrNode2); + ASSERT_EQ(PtrNode2->MSuccessors.size(), 1lu); + ASSERT_EQ(PtrNode2->MSuccessors.front().lock(), PtrNode3); + ASSERT_EQ(PtrNode3->MPredecessors.size(), 1lu); + ASSERT_EQ(PtrNode3->MPredecessors.front().lock(), PtrNode2); + + InOrderGraph.end_recording(InOrderQueue); + + // Finalize main graph and check schedule + // Note that empty nodes are not scheduled + auto GraphExec = InOrderGraph.finalize(); + auto GraphExecImpl = sycl::detail::getSyclObjImpl(GraphExec); + auto Schedule = GraphExecImpl->getSchedule(); + auto ScheduleIt = Schedule.begin(); + // the schedule list contains all types of nodes (even empty nodes) + ASSERT_EQ(Schedule.size(), 3ul); + ASSERT_TRUE((*ScheduleIt)->isSimilar(PtrNode1)); + ScheduleIt++; + ASSERT_TRUE((*ScheduleIt)->isSimilar(PtrNode2)); + ScheduleIt++; + ASSERT_TRUE((*ScheduleIt)->isEmpty()); + ASSERT_EQ(InOrderQueue.get_context(), GraphExecImpl->getContext()); +} + +TEST_F(CommandGraphTest, InOrderQueueWithPreviousHostTask) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue{Dev, Properties}; + experimental::command_graph + InOrderGraph{InOrderQueue.get_context(), InOrderQueue.get_device()}; + + auto EventInitial = + InOrderQueue.submit([&](handler &CGH) { CGH.host_task([=]() {}); }); + auto EventInitialImpl = sycl::detail::getSyclObjImpl(EventInitial); + + // Record in-order queue with three nodes. + InOrderGraph.begin_recording(InOrderQueue); + auto Node1Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode1 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode1, nullptr); + ASSERT_TRUE(PtrNode1->MPredecessors.empty()); + + auto Node2Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode2 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode2, nullptr); + ASSERT_NE(PtrNode2, PtrNode1); + ASSERT_EQ(PtrNode1->MSuccessors.size(), 1lu); + ASSERT_EQ(PtrNode1->MSuccessors.front().lock(), PtrNode2); + ASSERT_EQ(PtrNode2->MPredecessors.size(), 1lu); + ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); + + auto Node3Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode3 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode3, nullptr); + ASSERT_NE(PtrNode3, PtrNode2); + ASSERT_EQ(PtrNode2->MSuccessors.size(), 1lu); + ASSERT_EQ(PtrNode2->MSuccessors.front().lock(), PtrNode3); + ASSERT_EQ(PtrNode3->MPredecessors.size(), 1lu); + ASSERT_EQ(PtrNode3->MPredecessors.front().lock(), PtrNode2); + + InOrderGraph.end_recording(InOrderQueue); + + auto EventLast = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto EventLastImpl = sycl::detail::getSyclObjImpl(EventLast); + auto WaitList = EventLastImpl->getWaitList(); + // Previous task is a host task. Explicit dependency is needed to enforce the + // execution order. + ASSERT_EQ(WaitList.size(), 1lu); + ASSERT_EQ(WaitList[0], EventInitialImpl); +} + +TEST_F(CommandGraphTest, InOrderQueueHostTaskAndGraph) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue{Dev, Properties}; + experimental::command_graph + InOrderGraph{InOrderQueue.get_context(), InOrderQueue.get_device()}; + + auto EventInitial = + InOrderQueue.submit([&](handler &CGH) { CGH.host_task([=]() {}); }); + auto EventInitialImpl = sycl::detail::getSyclObjImpl(EventInitial); + + // Record in-order queue with three nodes. + InOrderGraph.begin_recording(InOrderQueue); + auto Node1Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode1 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode1, nullptr); + ASSERT_TRUE(PtrNode1->MPredecessors.empty()); + + auto Node2Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode2 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode2, nullptr); + ASSERT_NE(PtrNode2, PtrNode1); + ASSERT_EQ(PtrNode1->MSuccessors.size(), 1lu); + ASSERT_EQ(PtrNode1->MSuccessors.front().lock(), PtrNode2); + ASSERT_EQ(PtrNode2->MPredecessors.size(), 1lu); + ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); + + auto Node3Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode3 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode3, nullptr); + ASSERT_NE(PtrNode3, PtrNode2); + ASSERT_EQ(PtrNode2->MSuccessors.size(), 1lu); + ASSERT_EQ(PtrNode2->MSuccessors.front().lock(), PtrNode3); + ASSERT_EQ(PtrNode3->MPredecessors.size(), 1lu); + ASSERT_EQ(PtrNode3->MPredecessors.front().lock(), PtrNode2); + + InOrderGraph.end_recording(InOrderQueue); + + auto InOrderGraphExec = InOrderGraph.finalize(); + auto EventGraph = InOrderQueue.submit( + [&](sycl::handler &CGH) { CGH.ext_oneapi_graph(InOrderGraphExec); }); + + auto EventGraphImpl = sycl::detail::getSyclObjImpl(EventGraph); + auto EventGraphWaitList = EventGraphImpl->getWaitList(); + // Previous task is a host task. Explicit dependency is needed to enforce the + // execution order. + ASSERT_EQ(EventGraphWaitList.size(), 1lu); + ASSERT_EQ(EventGraphWaitList[0], EventInitialImpl); + + auto EventLast = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto EventLastImpl = sycl::detail::getSyclObjImpl(EventLast); + auto EventLastWaitList = EventLastImpl->getWaitList(); + // Previous task is not a host task. Explicit dependency is still needed + // to properly handle blocked tasks (the event will be filtered out before + // submission to the backend). + ASSERT_EQ(EventLastWaitList.size(), 1lu); + ASSERT_EQ(EventLastWaitList[0], EventGraphImpl); +} + +TEST_F(CommandGraphTest, InOrderQueueMemsetAndGraph) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue{Dev, Properties}; + experimental::command_graph + InOrderGraph{InOrderQueue.get_context(), InOrderQueue.get_device()}; + + // Check if device has usm shared allocation. + if (!InOrderQueue.get_device().has(sycl::aspect::usm_shared_allocations)) + return; + size_t Size = 128; + std::vector TestDataHost(Size); + int *TestData = sycl::malloc_shared(Size, InOrderQueue); + + auto EventInitial = InOrderQueue.memset(TestData, 1, Size * sizeof(int)); + auto EventInitialImpl = sycl::detail::getSyclObjImpl(EventInitial); + + // Record in-order queue with three nodes. + InOrderGraph.begin_recording(InOrderQueue); + auto Node1Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode1 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode1, nullptr); + ASSERT_TRUE(PtrNode1->MPredecessors.empty()); + + auto Node2Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode2 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode2, nullptr); + ASSERT_NE(PtrNode2, PtrNode1); + ASSERT_EQ(PtrNode1->MSuccessors.size(), 1lu); + ASSERT_EQ(PtrNode1->MSuccessors.front().lock(), PtrNode2); + ASSERT_EQ(PtrNode2->MPredecessors.size(), 1lu); + ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); + + auto Node3Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode3 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode3, nullptr); + ASSERT_NE(PtrNode3, PtrNode2); + ASSERT_EQ(PtrNode2->MSuccessors.size(), 1lu); + ASSERT_EQ(PtrNode2->MSuccessors.front().lock(), PtrNode3); + ASSERT_EQ(PtrNode3->MPredecessors.size(), 1lu); + ASSERT_EQ(PtrNode3->MPredecessors.front().lock(), PtrNode2); + + InOrderGraph.end_recording(InOrderQueue); + + auto InOrderGraphExec = InOrderGraph.finalize(); + auto EventGraph = InOrderQueue.submit( + [&](sycl::handler &CGH) { CGH.ext_oneapi_graph(InOrderGraphExec); }); + + auto EventGraphImpl = sycl::detail::getSyclObjImpl(EventGraph); + auto EventGraphWaitList = EventGraphImpl->getWaitList(); + // Previous task is a memset. Explicit dependency is needed to enforce the + // execution order. + ASSERT_EQ(EventGraphWaitList.size(), 1lu); + ASSERT_EQ(EventGraphWaitList[0], EventInitialImpl); + + auto EventLast = + InOrderQueue.memcpy(TestData, TestDataHost.data(), Size * sizeof(int)); + auto EventLastImpl = sycl::detail::getSyclObjImpl(EventLast); + auto EventLastWaitList = EventLastImpl->getWaitList(); + // Previous task is not a host task. In Order queue dependency is managed by + // the backend for non-host kernels. + ASSERT_EQ(EventLastWaitList.size(), 0lu); +} + +TEST_F(CommandGraphTest, InOrderQueueMemcpyAndGraph) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue{Dev, Properties}; + experimental::command_graph + InOrderGraph{InOrderQueue.get_context(), InOrderQueue.get_device()}; + + // Check if device has usm shared allocation. + if (!InOrderQueue.get_device().has(sycl::aspect::usm_shared_allocations)) + return; + size_t Size = 128; + std::vector TestDataHost(Size); + int *TestData = sycl::malloc_shared(Size, InOrderQueue); + + auto EventInitial = + InOrderQueue.memcpy(TestData, TestDataHost.data(), Size * sizeof(int)); + auto EventInitialImpl = sycl::detail::getSyclObjImpl(EventInitial); + + // Record in-order queue with three nodes. + InOrderGraph.begin_recording(InOrderQueue); + auto Node1Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode1 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode1, nullptr); + ASSERT_TRUE(PtrNode1->MPredecessors.empty()); + + auto Node2Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode2 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode2, nullptr); + ASSERT_NE(PtrNode2, PtrNode1); + ASSERT_EQ(PtrNode1->MSuccessors.size(), 1lu); + ASSERT_EQ(PtrNode1->MSuccessors.front().lock(), PtrNode2); + ASSERT_EQ(PtrNode2->MPredecessors.size(), 1lu); + ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); + + auto Node3Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode3 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode3, nullptr); + ASSERT_NE(PtrNode3, PtrNode2); + ASSERT_EQ(PtrNode2->MSuccessors.size(), 1lu); + ASSERT_EQ(PtrNode2->MSuccessors.front().lock(), PtrNode3); + ASSERT_EQ(PtrNode3->MPredecessors.size(), 1lu); + ASSERT_EQ(PtrNode3->MPredecessors.front().lock(), PtrNode2); + + InOrderGraph.end_recording(InOrderQueue); + + auto InOrderGraphExec = InOrderGraph.finalize(); + auto EventGraph = InOrderQueue.submit( + [&](sycl::handler &CGH) { CGH.ext_oneapi_graph(InOrderGraphExec); }); + + auto EventGraphImpl = sycl::detail::getSyclObjImpl(EventGraph); + auto EventGraphWaitList = EventGraphImpl->getWaitList(); + // Previous task is a memcpy. Explicit dependency is needed to enforce the + // execution order + ASSERT_EQ(EventGraphWaitList.size(), 1lu); + ASSERT_EQ(EventGraphWaitList[0], EventInitialImpl); + + auto EventLast = + InOrderQueue.memcpy(TestData, TestDataHost.data(), Size * sizeof(int)); + auto EventLastImpl = sycl::detail::getSyclObjImpl(EventLast); + auto EventLastWaitList = EventLastImpl->getWaitList(); + // Previous task is not a host task. In Order queue dependency is managed by + // the backend for non-host kernels. + ASSERT_EQ(EventLastWaitList.size(), 0lu); +} + +TEST_F(CommandGraphTest, ExplicitBarrierException) { + + std::error_code ExceptionCode = make_error_code(sycl::errc::success); + try { + auto Barrier = + Graph.add([&](sycl::handler &cgh) { cgh.ext_oneapi_barrier(); }); + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); +} + +TEST_F(CommandGraphTest, EnqueueBarrier) { + Graph.begin_recording(Queue); + auto Node1Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node3Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto Barrier = + Queue.submit([&](sycl::handler &cgh) { cgh.ext_oneapi_barrier(); }); + + auto Node4Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node5Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Graph.end_recording(Queue); + + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + + // Check the graph structure + // (1) (2) (3) + // \ | / + // \ | / + // (B) + // / \ + // (4) (5) + ASSERT_EQ(GraphImpl->MRoots.size(), 3lu); + for (auto Root : GraphImpl->MRoots) { + auto Node = Root.lock(); + ASSERT_EQ(Node->MSuccessors.size(), 1lu); + auto BarrierNode = Node->MSuccessors.front().lock(); + ASSERT_EQ(BarrierNode->MCGType, sycl::detail::CG::Barrier); + ASSERT_EQ(GraphImpl->getEventForNode(BarrierNode), + sycl::detail::getSyclObjImpl(Barrier)); + ASSERT_EQ(BarrierNode->MPredecessors.size(), 3lu); + ASSERT_EQ(BarrierNode->MSuccessors.size(), 2lu); + } +} + +TEST_F(CommandGraphTest, EnqueueBarrierMultipleQueues) { + sycl::queue Queue2{Queue.get_context(), Dev}; + Graph.begin_recording({Queue, Queue2}); + auto Node1Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node3Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto Barrier = + Queue2.submit([&](sycl::handler &cgh) { cgh.ext_oneapi_barrier(); }); + + auto Node4Graph = Queue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node5Graph = Queue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Graph.end_recording(); + + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + + // Check the graph structure + // (1) (2) (3) + // \ | / + // \ | / + // (B) + // / \ + // (4) (5) + ASSERT_EQ(GraphImpl->MRoots.size(), 3lu); + for (auto Root : GraphImpl->MRoots) { + auto Node = Root.lock(); + ASSERT_EQ(Node->MSuccessors.size(), 1lu); + auto BarrierNode = Node->MSuccessors.front().lock(); + ASSERT_EQ(BarrierNode->MCGType, sycl::detail::CG::Barrier); + ASSERT_EQ(GraphImpl->getEventForNode(BarrierNode), + sycl::detail::getSyclObjImpl(Barrier)); + ASSERT_EQ(BarrierNode->MPredecessors.size(), 3lu); + ASSERT_EQ(BarrierNode->MSuccessors.size(), 2lu); + } +} + +TEST_F(CommandGraphTest, EnqueueBarrierWaitList) { + Graph.begin_recording(Queue); + auto Node1Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node3Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto Barrier = Queue.submit([&](sycl::handler &cgh) { + cgh.ext_oneapi_barrier({Node1Graph, Node2Graph}); + }); + + auto Node4Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node5Graph = Queue.submit([&](sycl::handler &cgh) { + cgh.depends_on(Node3Graph); + cgh.single_task>([]() {}); + }); + + Graph.end_recording(Queue); + + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + + // Check the graph structure + // (1) (2) (3) + // \ | | + // \ | | + // (B) | + // / \ / + // (4) (5) + ASSERT_EQ(GraphImpl->MRoots.size(), 3lu); + for (auto Root : GraphImpl->MRoots) { + auto Node = Root.lock(); + ASSERT_EQ(Node->MSuccessors.size(), 1lu); + auto SuccNode = Node->MSuccessors.front().lock(); + if (SuccNode->MCGType == sycl::detail::CG::Barrier) { + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(Barrier)); + ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 2lu); + } else { + // Node 5 + ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); + } + } +} + +TEST_F(CommandGraphTest, EnqueueBarrierWaitListMultipleQueues) { + sycl::queue Queue2{Queue.get_context(), Dev}; + Graph.begin_recording({Queue, Queue2}); + auto Node1Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2Graph = Queue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node3Graph = Queue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + // Node1Graph comes from Queue, and Node2Graph comes from Queue2 + auto Barrier = Queue.submit([&](sycl::handler &cgh) { + cgh.ext_oneapi_barrier({Node1Graph, Node2Graph}); + }); + + auto Node4Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node5Graph = Queue.submit([&](sycl::handler &cgh) { + cgh.depends_on(Node3Graph); + cgh.single_task>([]() {}); + }); + + auto Barrier2 = Queue2.submit([&](sycl::handler &cgh) { + cgh.ext_oneapi_barrier({Barrier, Node4Graph, Node5Graph}); + }); + + Graph.end_recording(); + + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + + // Check the graph structure + // (1) (2) (3) + // \ | | + // \ | | + // (B) | + // /|\ / + // (4)|(5) + // \ | / + // \|/ + // (B2) + ASSERT_EQ(GraphImpl->MRoots.size(), 3lu); + for (auto Root : GraphImpl->MRoots) { + auto Node = Root.lock(); + ASSERT_EQ(Node->MSuccessors.size(), 1lu); + auto SuccNode = Node->MSuccessors.front().lock(); + if (SuccNode->MCGType == sycl::detail::CG::Barrier) { + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(Barrier)); + ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 3lu); + } else { + // Node 5 + ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); + } + } +} + +TEST_F(CommandGraphTest, EnqueueMultipleBarrier) { + Graph.begin_recording(Queue); + auto Node1Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node3Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto Barrier1 = Queue.submit([&](sycl::handler &cgh) { + cgh.ext_oneapi_barrier({Node1Graph, Node2Graph}); + }); + + auto Node4Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node5Graph = Queue.submit([&](sycl::handler &cgh) { + cgh.depends_on(Node3Graph); + cgh.single_task>([]() {}); + }); + + auto Barrier2 = + Queue.submit([&](sycl::handler &cgh) { cgh.ext_oneapi_barrier(); }); + + auto Node6Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node7Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node8Graph = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + Graph.end_recording(Queue); + + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + + // Check the graph structure + // (1) (2) (3) + // \ | | + // \ | | + // (B1) | + // /|\ / + // (4)|(5) + // \|/ + // (B2) + // /|\ + // / | \ + // (6) (7) (8) + ASSERT_EQ(GraphImpl->MRoots.size(), 3lu); + for (auto Root : GraphImpl->MRoots) { + auto Node = Root.lock(); + ASSERT_EQ(Node->MSuccessors.size(), 1lu); + auto SuccNode = Node->MSuccessors.front().lock(); + if (SuccNode->MCGType == sycl::detail::CG::Barrier) { + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(Barrier1)); + ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 3lu); + for (auto Succ1 : SuccNode->MSuccessors) { + auto SuccBarrier1 = Succ1.lock(); + if (SuccBarrier1->MCGType == sycl::detail::CG::Barrier) { + ASSERT_EQ(GraphImpl->getEventForNode(SuccBarrier1), + sycl::detail::getSyclObjImpl(Barrier2)); + ASSERT_EQ(SuccBarrier1->MPredecessors.size(), 3lu); + ASSERT_EQ(SuccBarrier1->MSuccessors.size(), 3lu); + for (auto Succ2 : SuccBarrier1->MSuccessors) { + auto SuccBarrier2 = Succ2.lock(); + // Nodes 6, 7, 8 + ASSERT_EQ(SuccBarrier2->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccBarrier2->MSuccessors.size(), 0lu); + } + } else { + // Node 4 or Node 5 + if (GraphImpl->getEventForNode(SuccBarrier1) == + sycl::detail::getSyclObjImpl(Node4Graph)) { + // Node 4 + ASSERT_EQ(SuccBarrier1->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccBarrier1->MSuccessors.size(), 1lu); + } + } + } + } else { + // Node 5 + ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 1lu); + } + } +} + +TEST_F(CommandGraphTest, DependencyLeavesKeyword1) { + auto Node1Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node3Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto EmptyNode = + Graph.add([&](sycl::handler &cgh) { /*empty node */ }, + {experimental::property::node::depends_on_all_leaves()}); + + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + + // Check the graph structure + // (1) (2) (3) + // \ | / + // \ | / + // (E) + ASSERT_EQ(GraphImpl->MRoots.size(), 3lu); + auto EmptyImpl = sycl::detail::getSyclObjImpl(EmptyNode); + ASSERT_EQ(EmptyImpl->MPredecessors.size(), 3lu); + ASSERT_EQ(EmptyImpl->MSuccessors.size(), 0lu); + + auto Node1Impl = sycl::detail::getSyclObjImpl(Node1Graph); + ASSERT_EQ(Node1Impl->MSuccessors.size(), 1lu); + ASSERT_EQ(Node1Impl->MSuccessors[0].lock(), EmptyImpl); + auto Node2Impl = sycl::detail::getSyclObjImpl(Node2Graph); + ASSERT_EQ(Node2Impl->MSuccessors.size(), 1lu); + ASSERT_EQ(Node2Impl->MSuccessors[0].lock(), EmptyImpl); + auto Node3Impl = sycl::detail::getSyclObjImpl(Node3Graph); + ASSERT_EQ(Node3Impl->MSuccessors.size(), 1lu); + ASSERT_EQ(Node3Impl->MSuccessors[0].lock(), EmptyImpl); +} + +TEST_F(CommandGraphTest, DependencyLeavesKeyword2) { + auto Node1Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node3Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node4Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(Node3Graph)}); + + auto EmptyNode = + Graph.add([&](sycl::handler &cgh) { /*empty node */ }, + {experimental::property::node::depends_on_all_leaves()}); + + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + + // Check the graph structure + // (1) (2) (3) + // \ | / + // \ | (4) + // \| / + // (E) + ASSERT_EQ(GraphImpl->MRoots.size(), 3lu); + auto EmptyImpl = sycl::detail::getSyclObjImpl(EmptyNode); + ASSERT_EQ(EmptyImpl->MPredecessors.size(), 3lu); + ASSERT_EQ(EmptyImpl->MSuccessors.size(), 0lu); + + auto Node1Impl = sycl::detail::getSyclObjImpl(Node1Graph); + ASSERT_EQ(Node1Impl->MSuccessors.size(), 1lu); + ASSERT_EQ(Node1Impl->MSuccessors[0].lock(), EmptyImpl); + auto Node2Impl = sycl::detail::getSyclObjImpl(Node2Graph); + ASSERT_EQ(Node2Impl->MSuccessors.size(), 1lu); + ASSERT_EQ(Node2Impl->MSuccessors[0].lock(), EmptyImpl); + auto Node3Impl = sycl::detail::getSyclObjImpl(Node3Graph); + ASSERT_EQ(Node3Impl->MSuccessors.size(), 1lu); + + auto Node4Impl = sycl::detail::getSyclObjImpl(Node4Graph); + ASSERT_EQ(Node4Impl->MPredecessors.size(), 1lu); + ASSERT_EQ(Node4Impl->MSuccessors.size(), 1lu); + ASSERT_EQ(Node4Impl->MSuccessors[0].lock(), EmptyImpl); +} + +TEST_F(CommandGraphTest, DependencyLeavesKeyword3) { + auto Node1Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto EmptyNode = + Graph.add([&](sycl::handler &cgh) { /*empty node */ }, + {experimental::property::node::depends_on_all_leaves()}); + auto Node3Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(Node1Graph)}); + auto Node4Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(EmptyNode)}); + + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + + // Check the graph structure + // (1)(2) + // |\ | + // | (E) + // (3) | + // (4) + ASSERT_EQ(GraphImpl->MRoots.size(), 2lu); + auto EmptyImpl = sycl::detail::getSyclObjImpl(EmptyNode); + ASSERT_EQ(EmptyImpl->MPredecessors.size(), 2lu); + ASSERT_EQ(EmptyImpl->MSuccessors.size(), 1lu); + + auto Node1Impl = sycl::detail::getSyclObjImpl(Node1Graph); + auto Node2Impl = sycl::detail::getSyclObjImpl(Node2Graph); + ASSERT_EQ(Node1Impl->MSuccessors.size(), 2lu); + ASSERT_EQ(Node2Impl->MSuccessors.size(), 1lu); + ASSERT_EQ(Node2Impl->MSuccessors[0].lock(), EmptyImpl); + + auto Node3Impl = sycl::detail::getSyclObjImpl(Node3Graph); + ASSERT_EQ(Node3Impl->MPredecessors.size(), 1lu); + ASSERT_EQ(Node3Impl->MPredecessors[0].lock(), Node1Impl); + + auto Node4Impl = sycl::detail::getSyclObjImpl(Node4Graph); + ASSERT_EQ(Node4Impl->MPredecessors.size(), 1lu); + ASSERT_EQ(Node4Impl->MPredecessors[0].lock(), EmptyImpl); +} + +TEST_F(CommandGraphTest, DependencyLeavesKeyword4) { + auto Node1Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto EmptyNode = + Graph.add([&](sycl::handler &cgh) { /*empty node */ }, + {experimental::property::node::depends_on_all_leaves()}); + auto Node3Graph = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto EmptyNode2 = + Graph.add([&](sycl::handler &cgh) { /*empty node */ }, + {experimental::property::node::depends_on_all_leaves()}); + + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + + // Check the graph structure + // (1)(2) + // \/ + // (E1) (3) + // \ / + // (E2) + ASSERT_EQ(GraphImpl->MRoots.size(), 3lu); + auto EmptyImpl = sycl::detail::getSyclObjImpl(EmptyNode); + ASSERT_EQ(EmptyImpl->MPredecessors.size(), 2lu); + ASSERT_EQ(EmptyImpl->MSuccessors.size(), 1lu); + + auto Node1Impl = sycl::detail::getSyclObjImpl(Node1Graph); + ASSERT_EQ(Node1Impl->MSuccessors.size(), 1lu); + ASSERT_EQ(Node1Impl->MSuccessors[0].lock(), EmptyImpl); + auto Node2Impl = sycl::detail::getSyclObjImpl(Node2Graph); + ASSERT_EQ(Node2Impl->MSuccessors.size(), 1lu); + ASSERT_EQ(Node2Impl->MSuccessors[0].lock(), EmptyImpl); + + auto EmptyImpl2 = sycl::detail::getSyclObjImpl(EmptyNode2); + auto Node3Impl = sycl::detail::getSyclObjImpl(Node3Graph); + ASSERT_EQ(Node3Impl->MPredecessors.size(), 0lu); + ASSERT_EQ(Node3Impl->MSuccessors.size(), 1lu); + ASSERT_EQ(Node3Impl->MSuccessors[0].lock(), EmptyImpl2); + + ASSERT_EQ(EmptyImpl2->MPredecessors.size(), 2lu); +} + +TEST_F(CommandGraphTest, FusionExtensionExceptionCheck) { + device D; + if (!D.get_info< + ext::codeplay::experimental::info::device::supports_fusion>()) { + // Skip this test if the device does not support fusion. Otherwise, the + // queue construction in the next step would fail. + GTEST_SKIP(); + } + + queue Q{D, ext::codeplay::experimental::property::queue::enable_fusion{}}; + + experimental::command_graph Graph{ + Q.get_context(), Q.get_device()}; + + ext::codeplay::experimental::fusion_wrapper fw{Q}; + + // Test: Start fusion on a queue that is in recording mode + Graph.begin_recording(Q); + + std::error_code ExceptionCode = make_error_code(sycl::errc::success); + try { + fw.start_fusion(); + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + Graph.end_recording(Q); + + // Test: begin recording a queue in fusion mode + + fw.start_fusion(); + + ExceptionCode = make_error_code(sycl::errc::success); + try { + Graph.begin_recording(Q); + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); +} + +TEST_F(CommandGraphTest, Memcpy2DExceptionCheck) { + constexpr size_t RECT_WIDTH = 30; + constexpr size_t RECT_HEIGHT = 21; + constexpr size_t SRC_ELEMS = RECT_WIDTH * RECT_HEIGHT; + constexpr size_t DST_ELEMS = SRC_ELEMS; + + using T = int; + + Graph.begin_recording(Queue); + + T *USMMemSrc = malloc_device(SRC_ELEMS, Queue); + T *USMMemDst = malloc_device(DST_ELEMS, Queue); + + addMemcpy2D( + Graph, Queue, USMMemDst, RECT_WIDTH * sizeof(T), USMMemSrc, + RECT_WIDTH * sizeof(T), RECT_WIDTH * sizeof(T), RECT_HEIGHT); + + addMemcpy2D( + Graph, Queue, USMMemDst, RECT_WIDTH * sizeof(T), USMMemSrc, + RECT_WIDTH * sizeof(T), RECT_WIDTH * sizeof(T), RECT_HEIGHT); + + Graph.end_recording(); + + addMemcpy2D( + Graph, Queue, USMMemDst, RECT_WIDTH * sizeof(T), USMMemSrc, + RECT_WIDTH * sizeof(T), RECT_WIDTH * sizeof(T), RECT_HEIGHT); + + sycl::free(USMMemSrc, Queue); + sycl::free(USMMemDst, Queue); +} + +// Tests that using reductions in a graph will throw. +TEST_F(CommandGraphTest, Reductions) { + int ReduVar = 0; + ASSERT_THROW( + { + try { + Graph.add([&](handler &CGH) { + CGH.parallel_for( + range<1>{1}, reduction(&ReduVar, int{0}, sycl::plus<>()), + [=](item<1> idx, auto &Sum) {}); + }); + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); +} + +TEST_F(CommandGraphTest, BindlessExceptionCheck) { + auto Ctxt = Queue.get_context(); + + // declare image data + size_t Height = 13; + size_t Width = 7; + size_t Depth = 11; + size_t N = Height * Width * Depth; + std::vector DataIn(N); + + // Extension: image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor Desc( + {Width, Height, Depth}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + + // Extension: allocate memory on device and create the handle + // Input images memory + sycl::ext::oneapi::experimental::image_mem ImgMem(Desc, Dev, Ctxt); + // Extension: returns the device pointer to USM allocated pitched memory + size_t Pitch = 0; + auto ImgMemUSM = sycl::ext::oneapi::experimental::pitched_alloc_device( + &Pitch, Desc, Queue); + + Graph.begin_recording(Queue); + + addImagesCopies(Graph, Queue, ImgMem, DataIn, + ImgMemUSM, Pitch, Desc); + + addImagesCopies(Graph, Queue, ImgMem, DataIn, + ImgMemUSM, Pitch, Desc); + + Graph.end_recording(); + + addImagesCopies(Graph, Queue, ImgMem, DataIn, + ImgMemUSM, Pitch, Desc); + + sycl::free(ImgMemUSM, Ctxt); +} + +TEST_F(CommandGraphTest, MakeEdgeErrors) { + // Set up some nodes in the graph + auto NodeA = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto NodeB = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + // Test error on calling make_edge when a queue is recording to the graph + Graph.begin_recording(Queue); + ASSERT_THROW( + { + try { + Graph.make_edge(NodeA, NodeB); + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); + + Graph.end_recording(Queue); + + // Test error on Src and Dest being the same + ASSERT_THROW( + { + try { + Graph.make_edge(NodeA, NodeA); + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); + + // Test Src or Dest not being found in the graph + experimental::command_graph GraphOther{ + Queue.get_context(), Queue.get_device()}; + auto NodeOther = GraphOther.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + ASSERT_THROW( + { + try { + Graph.make_edge(NodeA, NodeOther); + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); + ASSERT_THROW( + { + try { + Graph.make_edge(NodeOther, NodeB); + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); + + // Test that adding a cycle with cycle checks leaves the graph in the correct + // state. + + auto CheckGraphStructure = [&]() { + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + auto NodeAImpl = sycl::detail::getSyclObjImpl(NodeA); + auto NodeBImpl = sycl::detail::getSyclObjImpl(NodeB); + + ASSERT_EQ(GraphImpl->MRoots.size(), 1lu); + ASSERT_EQ((*GraphImpl->MRoots.begin()).lock(), NodeAImpl); + + ASSERT_EQ(NodeAImpl->MSuccessors.size(), 1lu); + ASSERT_EQ(NodeAImpl->MPredecessors.size(), 0lu); + ASSERT_EQ(NodeAImpl->MSuccessors.front().lock(), NodeBImpl); + + ASSERT_EQ(NodeBImpl->MSuccessors.size(), 0lu); + ASSERT_EQ(NodeBImpl->MPredecessors.size(), 1lu); + ASSERT_EQ(NodeBImpl->MPredecessors.front().lock(), NodeAImpl); + }; + // Make a normal edge + ASSERT_NO_THROW(Graph.make_edge(NodeA, NodeB)); + + // Check the expected structure of the graph + CheckGraphStructure(); + + // Introduce a cycle, make sure it throws + ASSERT_THROW( + { + try { + Graph.make_edge(NodeB, NodeA); + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); + + // Re-check graph structure to make sure the graph state has not been modified + CheckGraphStructure(); +} + +TEST_F(CommandGraphTest, InvalidBuffer) { + // Check that using a buffer with write_back enabled in a graph will throw. + int Data; + // Create a buffer which does not have write-back disabled. + buffer Buffer{&Data, range<1>{1}}; + + // Use this buffer in the graph, this should throw. + ASSERT_THROW( + { + try { + Graph.add([&](handler &CGH) { + auto Acc = Buffer.get_access(CGH); + }); + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); +} + +TEST_F(CommandGraphTest, InvalidHostAccessor) { + // Check that creating a host_accessor on a buffer which is in use by a graph + // will throw. + + // Create a buffer which does not have write-back disabled. + buffer Buffer{range<1>{1}}; + + { + // Create a graph in local scope so we can destroy it + ext::oneapi::experimental::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {experimental::property::graph::assume_buffer_outlives_graph{}}}; + + // Add the buffer to the graph. + Graph.add([&](handler &CGH) { + auto Acc = Buffer.get_access(CGH); + }); + + // Attempt to create a host_accessor, which should throw. + ASSERT_THROW( + { + try { + host_accessor HostAcc{Buffer}; + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); + } + // Graph is now out of scope so we should be able to create a host_accessor + ASSERT_NO_THROW({ host_accessor HostAcc{Buffer}; }); +} + +TEST_F(CommandGraphTest, GraphPartitionsMerging) { + // Tests that the parition merging algo works as expected in case of backward + // dependencies + auto NodeA = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto NodeB = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(NodeA)}); + auto NodeHT1 = Graph.add([&](sycl::handler &cgh) { cgh.host_task([=]() {}); }, + {experimental::property::node::depends_on(NodeB)}); + auto NodeC = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(NodeHT1)}); + auto NodeD = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(NodeB)}); + auto NodeHT2 = Graph.add([&](sycl::handler &cgh) { cgh.host_task([=]() {}); }, + {experimental::property::node::depends_on(NodeD)}); + auto NodeE = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(NodeHT2)}); + auto NodeF = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(NodeHT2)}); + + // Backward dependency + Graph.make_edge(NodeE, NodeHT1); + + auto GraphExec = Graph.finalize(); + auto GraphExecImpl = sycl::detail::getSyclObjImpl(GraphExec); + auto PartitionsList = GraphExecImpl->getPartitions(); + ASSERT_EQ(PartitionsList.size(), 5ul); + ASSERT_FALSE(PartitionsList[0]->isHostTask()); + ASSERT_TRUE(PartitionsList[1]->isHostTask()); + ASSERT_FALSE(PartitionsList[2]->isHostTask()); + ASSERT_TRUE(PartitionsList[3]->isHostTask()); + ASSERT_FALSE(PartitionsList[4]->isHostTask()); +} + +TEST_F(CommandGraphTest, GetNodeQueries) { + // Tests graph and node queries for correctness + + // Add some nodes to the graph for testing and test after each addition. + auto RootA = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + { + auto GraphRoots = Graph.get_root_nodes(); + auto GraphNodes = Graph.get_nodes(); + ASSERT_EQ(GraphRoots.size(), 1lu); + ASSERT_EQ(GraphNodes.size(), 1lu); + } + auto RootB = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + { + auto GraphRoots = Graph.get_root_nodes(); + auto GraphNodes = Graph.get_nodes(); + ASSERT_EQ(GraphRoots.size(), 2lu); + ASSERT_EQ(GraphNodes.size(), 2lu); + } + auto NodeA = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(RootA, RootB)}); + { + auto GraphRoots = Graph.get_root_nodes(); + auto GraphNodes = Graph.get_nodes(); + ASSERT_EQ(GraphRoots.size(), 2lu); + ASSERT_EQ(GraphNodes.size(), 3lu); + } + auto NodeB = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(RootB)}); + { + auto GraphRoots = Graph.get_root_nodes(); + auto GraphNodes = Graph.get_nodes(); + ASSERT_EQ(GraphRoots.size(), 2lu); + ASSERT_EQ(GraphNodes.size(), 4lu); + } + auto RootC = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + { + auto GraphRoots = Graph.get_root_nodes(); + auto GraphNodes = Graph.get_nodes(); + ASSERT_EQ(GraphRoots.size(), 3lu); + ASSERT_EQ(GraphNodes.size(), 5lu); + } + + ASSERT_EQ(RootA.get_predecessors().size(), 0lu); + ASSERT_EQ(RootA.get_successors().size(), 1lu); + ASSERT_EQ(RootB.get_predecessors().size(), 0lu); + ASSERT_EQ(RootB.get_successors().size(), 2lu); + ASSERT_EQ(RootC.get_predecessors().size(), 0lu); + ASSERT_EQ(RootC.get_successors().size(), 0lu); + ASSERT_EQ(NodeA.get_predecessors().size(), 2lu); + ASSERT_EQ(NodeA.get_successors().size(), 0lu); + ASSERT_EQ(NodeB.get_predecessors().size(), 1lu); + ASSERT_EQ(NodeB.get_successors().size(), 0lu); + + // List of nodes that we've added in the order they were added. + std::vector NodeList{RootA, RootB, NodeA, NodeB, RootC}; + auto GraphNodes = Graph.get_nodes(); + + // Check ordering of all nodes is correct + for (size_t i = 0; i < GraphNodes.size(); i++) { + ASSERT_EQ(sycl::detail::getSyclObjImpl(GraphNodes[i]), + sycl::detail::getSyclObjImpl(NodeList[i])); + } +} + +TEST_F(CommandGraphTest, NodeTypeQueries) { + + // Allocate some pointers for testing memory nodes + int *PtrA = malloc_device(16, Queue); + int *PtrB = malloc_device(16, Queue); + + auto NodeKernel = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + ASSERT_EQ(NodeKernel.get_type(), experimental::node_type::kernel); + + auto NodeMemcpy = Graph.add( + [&](sycl::handler &cgh) { cgh.memcpy(PtrA, PtrB, 16 * sizeof(int)); }); + ASSERT_EQ(NodeMemcpy.get_type(), experimental::node_type::memcpy); + + auto NodeMemset = Graph.add( + [&](sycl::handler &cgh) { cgh.memset(PtrB, 7, 16 * sizeof(int)); }); + ASSERT_EQ(NodeMemset.get_type(), experimental::node_type::memfill); + + auto NodeMemfill = + Graph.add([&](sycl::handler &cgh) { cgh.fill(PtrB, 7, 16); }); + ASSERT_EQ(NodeMemfill.get_type(), experimental::node_type::memfill); + + auto NodePrefetch = Graph.add( + [&](sycl::handler &cgh) { cgh.prefetch(PtrA, 16 * sizeof(int)); }); + ASSERT_EQ(NodePrefetch.get_type(), experimental::node_type::prefetch); + + auto NodeMemadvise = Graph.add( + [&](sycl::handler &cgh) { cgh.mem_advise(PtrA, 16 * sizeof(int), 1); }); + ASSERT_EQ(NodeMemadvise.get_type(), experimental::node_type::memadvise); + + // Use queue recording for barrier since it is not supported in explicit API + Graph.begin_recording(Queue); + auto EventBarrier = + Queue.submit([&](sycl::handler &cgh) { cgh.ext_oneapi_barrier(); }); + Graph.end_recording(); + + auto NodeBarrier = experimental::node::get_node_from_event(EventBarrier); + ASSERT_EQ(NodeBarrier.get_type(), + experimental::node_type::ext_oneapi_barrier); + + auto NodeHostTask = + Graph.add([&](sycl::handler &cgh) { cgh.host_task([]() {}); }); + ASSERT_EQ(NodeHostTask.get_type(), experimental::node_type::host_task); + + auto NodeEmpty = Graph.add(); + ASSERT_EQ(NodeEmpty.get_type(), experimental::node_type::empty); + + experimental::command_graph Subgraph(Queue.get_context(), Dev); + // Add an empty node to the subgraph + Subgraph.add(); + + auto SubgraphExec = Subgraph.finalize(); + auto NodeSubgraph = Graph.add( + [&](sycl::handler &cgh) { cgh.ext_oneapi_graph(SubgraphExec); }); + ASSERT_EQ(NodeSubgraph.get_type(), experimental::node_type::subgraph); +} + +TEST_F(CommandGraphTest, GetNodeFromEvent) { + // Test getting a node from a recorded event and using that as a dependency + // for an explicit node + Graph.begin_recording(Queue); + auto EventKernel = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Graph.end_recording(); + + experimental::node NodeKernelA = + experimental::node::get_node_from_event(EventKernel); + + // Add node as a dependency with the property + auto NodeKernelB = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + experimental::property::node::depends_on(NodeKernelA)); + + // Test adding a dependency through make_edge + auto NodeKernelC = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + ASSERT_NO_THROW(Graph.make_edge(NodeKernelA, NodeKernelC)); + + auto GraphExec = Graph.finalize(); +} + +TEST_F(CommandGraphTest, ProfilingException) { + Graph.begin_recording(Queue); + auto Event1 = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Event2 = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Graph.end_recording(Queue); + + try { + Event1.get_profiling_info(); + } catch (exception &Exception) { + ASSERT_FALSE( + std::string(Exception.what()) + .find("Profiling information is unavailable for events returned " + "from a submission to a queue in the recording state.") == + std::string::npos); + } +} + +class MultiThreadGraphTest : public CommandGraphTest { +public: + MultiThreadGraphTest() + : CommandGraphTest(), NumThreads(std::thread::hardware_concurrency()), + SyncPoint(NumThreads) { + Threads.reserve(NumThreads); + } + +protected: + const unsigned NumThreads; + Barrier SyncPoint; + std::vector Threads; +}; + +TEST_F(MultiThreadGraphTest, BeginEndRecording) { + auto RecordGraph = [&]() { + queue MyQueue{Queue.get_context(), Queue.get_device()}; + + SyncPoint.wait(); + + Graph.begin_recording(MyQueue); + runKernels(MyQueue); + Graph.end_recording(MyQueue); + }; + + for (unsigned i = 0; i < NumThreads; ++i) { + Threads.emplace_back(RecordGraph); + } + + for (unsigned i = 0; i < NumThreads; ++i) { + Threads[i].join(); + } + + // Reference computation + queue QueueRef{Queue.get_context(), Queue.get_device()}; + experimental::command_graph GraphRef{ + Queue.get_context(), Queue.get_device()}; + + for (unsigned i = 0; i < NumThreads; ++i) { + queue MyQueue{Queue.get_context(), Queue.get_device()}; + GraphRef.begin_recording(MyQueue); + runKernels(MyQueue); + GraphRef.end_recording(MyQueue); + } + + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + auto GraphRefImpl = sycl::detail::getSyclObjImpl(GraphRef); + ASSERT_EQ(GraphImpl->hasSimilarStructure(GraphRefImpl), true); +} + +TEST_F(MultiThreadGraphTest, ExplicitAddNodes) { + auto RecordGraph = [&]() { + queue MyQueue{Queue.get_context(), Queue.get_device()}; + + SyncPoint.wait(); + addKernels(Graph); + }; + + for (unsigned i = 0; i < NumThreads; ++i) { + Threads.emplace_back(RecordGraph); + } + + for (unsigned i = 0; i < NumThreads; ++i) { + Threads[i].join(); + } + + // Reference computation + queue QueueRef; + experimental::command_graph GraphRef{ + Queue.get_context(), Queue.get_device()}; + + for (unsigned i = 0; i < NumThreads; ++i) { + addKernels(GraphRef); + } + + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + auto GraphRefImpl = sycl::detail::getSyclObjImpl(GraphRef); + ASSERT_EQ(GraphImpl->hasSimilarStructure(GraphRefImpl), true); +} + +TEST_F(MultiThreadGraphTest, RecordAddNodes) { + Graph.begin_recording(Queue); + auto RecordGraph = [&]() { + SyncPoint.wait(); + runKernels(Queue); + }; + + for (unsigned i = 0; i < NumThreads; ++i) { + Threads.emplace_back(RecordGraph); + } + + for (unsigned i = 0; i < NumThreads; ++i) { + Threads[i].join(); + } + + // We stop recording the Queue when all threads have finished their processing + Graph.end_recording(Queue); + + // Reference computation + queue QueueRef{Queue.get_context(), Queue.get_device()}; + experimental::command_graph GraphRef{ + Queue.get_context(), Queue.get_device()}; + + GraphRef.begin_recording(QueueRef); + for (unsigned i = 0; i < NumThreads; ++i) { + runKernels(QueueRef); + } + GraphRef.end_recording(QueueRef); + + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + auto GraphRefImpl = sycl::detail::getSyclObjImpl(GraphRef); + ASSERT_EQ(GraphImpl->hasSimilarStructure(GraphRefImpl), true); +} + +TEST_F(MultiThreadGraphTest, RecordAddNodesInOrderQueue) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + queue InOrderQueue{Dev, Properties}; + + experimental::command_graph + InOrderGraph{InOrderQueue.get_context(), InOrderQueue.get_device()}; + + InOrderGraph.begin_recording(InOrderQueue); + auto RecordGraph = [&]() { + SyncPoint.wait(); + runKernelsInOrder(InOrderQueue); + }; + + for (unsigned i = 0; i < NumThreads; ++i) { + Threads.emplace_back(RecordGraph); + } + + for (unsigned i = 0; i < NumThreads; ++i) { + Threads[i].join(); + } + + // We stop recording the Queue when all threads have finished their processing + InOrderGraph.end_recording(InOrderQueue); + + // Reference computation + queue InOrderQueueRef{Dev, Properties}; + experimental::command_graph + InOrderGraphRef{InOrderQueueRef.get_context(), + InOrderQueueRef.get_device()}; + + InOrderGraphRef.begin_recording(InOrderQueueRef); + for (unsigned i = 0; i < NumThreads; ++i) { + runKernelsInOrder(InOrderQueueRef); + } + InOrderGraphRef.end_recording(InOrderQueueRef); + + auto GraphImpl = sycl::detail::getSyclObjImpl(InOrderGraph); + auto GraphRefImpl = sycl::detail::getSyclObjImpl(InOrderGraphRef); + ASSERT_EQ(GraphImpl->getNumberOfNodes(), GraphRefImpl->getNumberOfNodes()); + + // In-order graph must have only a single root + ASSERT_EQ(GraphImpl->MRoots.size(), 1lu); + + // Check structure graph + auto CurrentNode = (*GraphImpl->MRoots.begin()).lock(); + for (size_t i = 1; i <= GraphImpl->getNumberOfNodes(); i++) { + EXPECT_LE(CurrentNode->MSuccessors.size(), 1lu); + + // Checking the last node has no successors + if (i == GraphImpl->getNumberOfNodes()) { + EXPECT_EQ(CurrentNode->MSuccessors.size(), 0lu); + } else { + // Check other nodes have 1 successor + EXPECT_EQ(CurrentNode->MSuccessors.size(), 1lu); + CurrentNode = CurrentNode->MSuccessors[0].lock(); + } + } +} + +TEST_F(MultiThreadGraphTest, Finalize) { + addKernels(Graph); + + std::mutex MutexMap; + + std::map> + GraphsExecMap; + auto FinalizeGraph = [&](int ThreadNum) { + SyncPoint.wait(); + auto GraphExec = Graph.finalize(); + Queue.submit([&](sycl::handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); + + std::lock_guard Guard(MutexMap); + GraphsExecMap.insert( + std::map>:: + value_type(ThreadNum, GraphExec)); + }; + + for (unsigned i = 0; i < NumThreads; ++i) { + Threads.emplace_back(FinalizeGraph, i); + } + + for (unsigned i = 0; i < NumThreads; ++i) { + Threads[i].join(); + } + + // Reference computation + queue QueueRef; + experimental::command_graph GraphRef{ + Queue.get_context(), Queue.get_device()}; + + addKernels(GraphRef); + + for (unsigned i = 0; i < NumThreads; ++i) { + auto GraphExecRef = GraphRef.finalize(); + QueueRef.submit( + [&](sycl::handler &CGH) { CGH.ext_oneapi_graph(GraphExecRef); }); + auto GraphExecImpl = + sycl::detail::getSyclObjImpl(GraphsExecMap.find(i)->second); + auto GraphExecRefImpl = sycl::detail::getSyclObjImpl(GraphExecRef); + ASSERT_EQ(checkExecGraphSchedule(GraphExecImpl, GraphExecRefImpl), true); + } +} + +// Test adding fill and memset nodes to a graph +TEST_F(CommandGraphTest, FillMemsetNodes) { + const int Value = 7; + // Buffer fill + buffer Buffer{range<1>{1}}; + Buffer.set_write_back(false); + + { + ext::oneapi::experimental::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {experimental::property::graph::assume_buffer_outlives_graph{}}}; + + auto NodeA = Graph.add([&](handler &CGH) { + auto Acc = Buffer.get_access(CGH); + CGH.fill(Acc, Value); + }); + auto NodeB = Graph.add([&](handler &CGH) { + auto Acc = Buffer.get_access(CGH); + CGH.fill(Acc, Value); + }); + + auto NodeAImpl = sycl::detail::getSyclObjImpl(NodeA); + auto NodeBImpl = sycl::detail::getSyclObjImpl(NodeB); + + // Check Operator== + EXPECT_EQ(NodeAImpl, NodeAImpl); + EXPECT_NE(NodeAImpl, NodeBImpl); + } + + // USM + { + int *USMPtr = malloc_device(1, Queue); + + // We need to create some differences between nodes because unlike buffer + // fills they are not differentiated on accessor ptr value. + auto FillNodeA = + Graph.add([&](handler &CGH) { CGH.fill(USMPtr, Value, 1); }); + auto FillNodeB = + Graph.add([&](handler &CGH) { CGH.fill(USMPtr, Value + 1, 1); }); + auto MemsetNodeA = + Graph.add([&](handler &CGH) { CGH.memset(USMPtr, Value, 1); }); + auto MemsetNodeB = + Graph.add([&](handler &CGH) { CGH.memset(USMPtr, Value, 2); }); + + auto FillNodeAImpl = sycl::detail::getSyclObjImpl(FillNodeA); + auto FillNodeBImpl = sycl::detail::getSyclObjImpl(FillNodeB); + auto MemsetNodeAImpl = sycl::detail::getSyclObjImpl(MemsetNodeA); + auto MemsetNodeBImpl = sycl::detail::getSyclObjImpl(MemsetNodeB); + + // Check Operator== + EXPECT_EQ(FillNodeAImpl, FillNodeAImpl); + EXPECT_EQ(FillNodeBImpl, FillNodeBImpl); + EXPECT_NE(FillNodeAImpl, FillNodeBImpl); + + EXPECT_EQ(MemsetNodeAImpl, MemsetNodeAImpl); + EXPECT_EQ(MemsetNodeBImpl, MemsetNodeBImpl); + EXPECT_NE(MemsetNodeAImpl, MemsetNodeBImpl); + sycl::free(USMPtr, Queue); + } +} From 221e852d3f5f6a5bfffa62b3f08613f941b4d853 Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Fri, 1 Mar 2024 13:38:27 +0000 Subject: [PATCH 04/26] Adjusted graph test to treat memset as memfill --- sycl/unittests/Extensions/CommandGraph/Queries.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/unittests/Extensions/CommandGraph/Queries.cpp b/sycl/unittests/Extensions/CommandGraph/Queries.cpp index 35057a19b86b7..0b05f57482d64 100644 --- a/sycl/unittests/Extensions/CommandGraph/Queries.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Queries.cpp @@ -110,7 +110,7 @@ TEST_F(CommandGraphTest, NodeTypeQueries) { auto NodeMemset = Graph.add( [&](sycl::handler &cgh) { cgh.memset(PtrB, 7, 16 * sizeof(int)); }); - ASSERT_EQ(NodeMemset.get_type(), experimental::node_type::memset); + ASSERT_EQ(NodeMemset.get_type(), experimental::node_type::memfill); auto NodeMemfill = Graph.add([&](sycl::handler &cgh) { cgh.fill(PtrB, 7, 16); }); From c696e69d26e90edc66b06215aa5b0b13363e386f Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Fri, 1 Mar 2024 13:51:26 +0000 Subject: [PATCH 05/26] Updated windows symbols --- sycl/plugins/unified_runtime/CMakeLists.txt | 2 +- sycl/test/abi/sycl_symbols_windows.dump | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 387458e7ee8ba..6678d0f16829b 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -82,7 +82,7 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/konradkusiak97/unified-runtime.git") - set(UNIFIED_RUNTIME_TAG e400d8f37e6ccef65ff62491b417fc43b8c659ce) + set(UNIFIED_RUNTIME_TAG URChangesForFillCombined) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index a3035337d2fcf..9d64d4e5fa3cd 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -1110,8 +1110,9 @@ ?fill_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22AEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?fill_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22AEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?fill_impl@handler@_V1@sycl@@AEAAXPEAXPEBX_K2@Z -?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KAEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KAEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z +?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KHV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z +?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KHV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?finalize@handler@_V1@sycl@@AEAA?AVevent@23@XZ ?finalize@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$command_graph@$00@34567@AEBVproperty_list@67@@Z ?finalizeImpl@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXXZ From 0fc3f06e1e9f2f70402a1630f3da80748c1408ed Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Fri, 29 Mar 2024 18:43:11 +0000 Subject: [PATCH 06/26] Changed to correctly setting memset node and modified native_cpu symbols --- sycl/source/handler.cpp | 2 +- sycl/test/abi/pi_nativecpu_symbol_check.dump | 2 +- sycl/unittests/Extensions/CommandGraph/Queries.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 4e2351d7cbea1..53c5738e6b66b 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -978,7 +978,7 @@ void handler::memset(void *Dest, int Value, size_t Count) { MDstPtr = Dest; MPattern.push_back(static_cast(Value)); MLength = Count; - setUserFacingNodeType(ext::oneapi::experimental::node_type::memfill); + setUserFacingNodeType(ext::oneapi::experimental::node_type::memset); setType(detail::CG::FillUSM); } diff --git a/sycl/test/abi/pi_nativecpu_symbol_check.dump b/sycl/test/abi/pi_nativecpu_symbol_check.dump index 6198c8aeb5832..c5cb7c2932119 100644 --- a/sycl/test/abi/pi_nativecpu_symbol_check.dump +++ b/sycl/test/abi/pi_nativecpu_symbol_check.dump @@ -154,11 +154,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc +piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D -piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/unittests/Extensions/CommandGraph/Queries.cpp b/sycl/unittests/Extensions/CommandGraph/Queries.cpp index 0b05f57482d64..35057a19b86b7 100644 --- a/sycl/unittests/Extensions/CommandGraph/Queries.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Queries.cpp @@ -110,7 +110,7 @@ TEST_F(CommandGraphTest, NodeTypeQueries) { auto NodeMemset = Graph.add( [&](sycl::handler &cgh) { cgh.memset(PtrB, 7, 16 * sizeof(int)); }); - ASSERT_EQ(NodeMemset.get_type(), experimental::node_type::memfill); + ASSERT_EQ(NodeMemset.get_type(), experimental::node_type::memset); auto NodeMemfill = Graph.add([&](sycl::handler &cgh) { cgh.fill(PtrB, 7, 16); }); From d98d82eb009aff84f3e072c7dbd78b3c4f1f7cd3 Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Wed, 3 Apr 2024 10:26:06 +0100 Subject: [PATCH 07/26] Updated graph usm fill tests and Command Graph docs --- sycl/doc/design/CommandGraph.md | 2 ++ sycl/test-e2e/Graph/Explicit/usm_fill.cpp | 2 ++ sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp | 3 +++ sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp | 3 +++ sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp | 2 ++ sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp | 3 +++ sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp | 3 +++ sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp | 2 ++ 8 files changed, 20 insertions(+) diff --git a/sycl/doc/design/CommandGraph.md b/sycl/doc/design/CommandGraph.md index c1e570d13ab21..acd159cb435ca 100644 --- a/sycl/doc/design/CommandGraph.md +++ b/sycl/doc/design/CommandGraph.md @@ -486,6 +486,8 @@ The types of commands which are unsupported, and lead to this exception are: This corresponds to a memory buffer write command. * `handler::copy(src, dest)` or `handler::memcpy(dest, src)` - Where both `src` and `dest` are USM pointers. This corresponds to a USM copy command. +* `handler::fill(ptr, pattern, count)` - This corresponds to a USM memory + fill command. * `handler::memset(ptr, value, numBytes)` - This corresponds to a USM memory fill command. * `handler::prefetch()`. diff --git a/sycl/test-e2e/Graph/Explicit/usm_fill.cpp b/sycl/test-e2e/Graph/Explicit/usm_fill.cpp index d2aff0dee2c20..26d3b88e64ff9 100644 --- a/sycl/test-e2e/Graph/Explicit/usm_fill.cpp +++ b/sycl/test-e2e/Graph/Explicit/usm_fill.cpp @@ -5,6 +5,8 @@ // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl #define GRAPH_E2E_EXPLICIT diff --git a/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp b/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp index 7acd5143f6f54..e005170a5feff 100644 --- a/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp +++ b/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp @@ -7,6 +7,9 @@ // REQUIRES: aspect-usm_host_allocations +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl + #define GRAPH_E2E_EXPLICIT #include "../Inputs/usm_fill_host.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp b/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp index 8b8c623f48107..bb40a5683843c 100644 --- a/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp +++ b/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp @@ -7,6 +7,9 @@ // REQUIRES: aspect-usm_shared_allocations +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl + #define GRAPH_E2E_EXPLICIT #include "../Inputs/usm_fill_shared.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp index 16bf82ac572ae..d2925683837c2 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp @@ -5,6 +5,8 @@ // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl #define GRAPH_E2E_RECORD_REPLAY diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp index 0e9b3dbdb39e8..c08b70c695e4f 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp @@ -7,6 +7,9 @@ // REQUIRES: aspect-usm_host_allocations +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl + #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/usm_fill_host.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp index 950afd3da8b97..f40f8d3aa4930 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp @@ -7,6 +7,9 @@ // REQUIRES: aspect-usm_shared_allocations +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl + #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/usm_fill_shared.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp index 9506e99cd73ad..84a5d42f0da04 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp @@ -2,6 +2,8 @@ // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{ %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// USM fill command not supported for OpenCL +// UNSUPPORTED: opencl // // Tests adding a USM memset queue shortcut operation as a graph node. From 00e451f85d4617025d262881cc49972b3c811eec Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Wed, 8 May 2024 22:23:22 +0100 Subject: [PATCH 08/26] Fixed a comment --- sycl/source/detail/queue_impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index d3adabe185802..14d2ad1b94834 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -199,7 +199,7 @@ event queue_impl::memcpy(const std::shared_ptr &Self, xpti::addMetadata(TEvent, "queue_id", MQueueID); }); xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); - // Notify XPTI about the memset submission + // Notify XPTI about the memcpy submission PrepareNotify.notify(); // Emit a begin/end scope for this call PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin); From 38a32f872005260ecac97e7979c6d8cacd76e5ec Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Tue, 4 Jun 2024 18:29:09 +0100 Subject: [PATCH 09/26] dummy change to trigger post-commit ci --- .github/workflows/sycl-linux-run-tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml index 2f8e62eabd701..360329b45ddcd 100644 --- a/.github/workflows/sycl-linux-run-tests.yml +++ b/.github/workflows/sycl-linux-run-tests.yml @@ -263,9 +263,9 @@ jobs: - run: sycl-ls --verbose - run: SYCL_PI_TRACE=-1 sycl-ls - run: | - if [ -f /usr/local/lib/igc/IGCTAG.txt ]; then - cat /usr/local/lib/igc/IGCTAG.txt - fi + if [ -f /usr/local/lib/igc/IGCTAG.txt ]; then + cat /usr/local/lib/igc/IGCTAG.txt + fi - name: Deduce E2E CMake options if: inputs.tests_selector == 'e2e' From c23474d5de6e88fa2414ed8c68bbb6a0dbaa6b23 Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Fri, 14 Jun 2024 17:03:32 +0100 Subject: [PATCH 10/26] Left the L0 fill as it was for the time being --- sycl/include/sycl/handler.hpp | 8 ++++++++ sycl/source/handler.cpp | 1 - 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 240f37f74cfb1..f322cf285a399 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -2899,7 +2899,15 @@ class __SYCL_EXPORT handler { template void fill(void *Ptr, const T &Pattern, size_t Count) { static_assert(is_device_copyable::value, "Pattern must be device copyable"); + setUserFacingNodeType(ext::oneapi::experimental::node_type::memfill); +#if defined(__SPIR__) || defined(__SPIRV__) + parallel_for<__usmfill>(range<1>(Count), [=](id<1> Index) { + T *CastedPtr = static_cast(Ptr); + CastedPtr[Index] = Pattern; + }); +#else this->fill_impl(Ptr, &Pattern, sizeof(T), Count); +#endif } /// Prevents any commands submitted afterward to this queue from executing diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 924236456fb9c..63e03dcddadea 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -994,7 +994,6 @@ void handler::fill_impl(void *Dest, const void *Value, size_t ValueSize, MPattern.resize(ValueSize); std::memcpy(MPattern.data(), Value, ValueSize); MLength = Count * ValueSize; - setUserFacingNodeType(ext::oneapi::experimental::node_type::memfill); setType(detail::CG::FillUSM); } From dd0cb78f0abf7799c3e3f449eb28015b1dd095b5 Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Mon, 17 Jun 2024 12:49:29 +0100 Subject: [PATCH 11/26] Added helper function to get backend info --- sycl/include/sycl/handler.hpp | 20 ++++++++++++-------- sycl/source/handler.cpp | 4 ++++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index f322cf285a399..84acf5373bb0e 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -2900,14 +2900,14 @@ class __SYCL_EXPORT handler { static_assert(is_device_copyable::value, "Pattern must be device copyable"); setUserFacingNodeType(ext::oneapi::experimental::node_type::memfill); -#if defined(__SPIR__) || defined(__SPIRV__) - parallel_for<__usmfill>(range<1>(Count), [=](id<1> Index) { - T *CastedPtr = static_cast(Ptr); - CastedPtr[Index] = Pattern; - }); -#else - this->fill_impl(Ptr, &Pattern, sizeof(T), Count); -#endif + if (getDeviceBackend() == backend::ext_oneapi_level_zero) { + parallel_for<__usmfill>(range<1>(Count), [=](id<1> Index) { + T *CastedPtr = static_cast(Ptr); + CastedPtr[Index] = Pattern; + }); + } else { + this->fill_impl(Ptr, &Pattern, sizeof(T), Count); + } } /// Prevents any commands submitted afterward to this queue from executing @@ -3496,6 +3496,10 @@ class __SYCL_EXPORT handler { // Helper function for getting a loose bound on work-items. id<2> computeFallbackKernelBounds(size_t Width, size_t Height); + // Function to get information about the backend for which the code is + // compiled for + backend getDeviceBackend(); + // Common function for launching a 2D USM memcpy kernel to avoid redefinitions // of the kernel from copy and memcpy. template diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 63e03dcddadea..551b8e5b04c05 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1591,6 +1591,10 @@ id<2> handler::computeFallbackKernelBounds(size_t Width, size_t Height) { return id<2>{std::min(ItemLimit[0], Height), std::min(ItemLimit[1], Width)}; } +backend handler::getDeviceBackend() { + return MQueue->getDeviceImplPtr()->getBackend(); +} + void handler::ext_intel_read_host_pipe(detail::string_view Name, void *Ptr, size_t Size, bool Block) { MImpl->HostPipeName = Name.data(); From 6bca4f40be3957733bee489f76cbc2748d49c7a6 Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Mon, 17 Jun 2024 13:18:00 +0100 Subject: [PATCH 12/26] Updated sycl symbols linux --- sycl/test/abi/sycl_symbols_linux.dump | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 7100fcad51826..cea63e0279953 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3119,6 +3119,7 @@ _ZN4sycl3_V15queue10mem_adviseEPKvmiRKSt6vectorINS0_5eventESaIS5_EERKNS0_6detail _ZN4sycl3_V15queue10wait_proxyERKNS0_6detail13code_locationE _ZN4sycl3_V15queue11submit_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE _ZN4sycl3_V15queue11submit_implESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationE +_ZN4sycl3_V15queue15ext_oneapi_prodEv _ZN4sycl3_V15queue17discard_or_returnERKNS0_5eventE _ZN4sycl3_V15queue18throw_asynchronousEv _ZN4sycl3_V15queue20memcpyToDeviceGlobalEPvPKvbmmRKSt6vectorINS0_5eventESaIS6_EE @@ -3587,6 +3588,7 @@ _ZN4sycl3_V17handler15ext_oneapi_copyEPvNS0_5rangeILi3EEES2_S4_RKNS0_3ext6oneapi _ZN4sycl3_V17handler15ext_oneapi_copyEPvNS0_5rangeILi3EEES4_NS0_3ext6oneapi12experimental16image_mem_handleES4_RKNS7_16image_descriptorES4_ _ZN4sycl3_V17handler15ext_oneapi_copyEPvS2_RKNS0_3ext6oneapi12experimental16image_descriptorEm _ZN4sycl3_V17handler16ext_oneapi_graphENS0_3ext6oneapi12experimental13command_graphILNS4_11graph_stateE1EEE +_ZN4sycl3_V17handler16getDeviceBackendEv _ZN4sycl3_V17handler16getMaxWorkGroupsEv _ZN4sycl3_V17handler17supportsUSMFill2DEv _ZN4sycl3_V17handler17use_kernel_bundleERKNS0_13kernel_bundleILNS0_12bundle_stateE2EEE @@ -3622,6 +3624,7 @@ _ZN4sycl3_V17handler28memcpyToHostOnlyDeviceGlobalEPKvS3_mbmm _ZN4sycl3_V17handler28setStateExplicitKernelBundleEv _ZN4sycl3_V17handler30memcpyFromHostOnlyDeviceGlobalEPvPKvbmm _ZN4sycl3_V17handler30verifyUsedKernelBundleInternalENS0_6detail11string_viewE +_ZN4sycl3_V17handler32verifyDeviceHasProgressGuaranteeENS0_3ext6oneapi12experimental26forward_progress_guaranteeENS4_15execution_scopeES6_ _ZN4sycl3_V17handler34ext_oneapi_wait_external_semaphoreENS0_3ext6oneapi12experimental24interop_semaphore_handleE _ZN4sycl3_V17handler36ext_oneapi_signal_external_semaphoreENS0_3ext6oneapi12experimental24interop_semaphore_handleE _ZN4sycl3_V17handler6memcpyEPvPKvm @@ -3635,7 +3638,6 @@ _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEEb _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEEb -_ZN4sycl3_V17handler32verifyDeviceHasProgressGuaranteeENS0_3ext6oneapi12experimental26forward_progress_guaranteeENS4_15execution_scopeES6_ _ZN4sycl3_V17samplerC1ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE _ZN4sycl3_V17samplerC1EP11_cl_samplerRKNS0_7contextE _ZN4sycl3_V17samplerC2ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE @@ -3750,7 +3752,6 @@ _ZNK4sycl3_V15queue12has_propertyINS0_8property5queue16enable_profilingEEEbv _ZNK4sycl3_V15queue12has_propertyINS0_8property5queue4cuda18use_default_streamEEEbv _ZNK4sycl3_V15queue12has_propertyINS0_8property5queue8in_orderEEEbv _ZNK4sycl3_V15queue16ext_oneapi_emptyEv -_ZN4sycl3_V15queue15ext_oneapi_prodEv _ZNK4sycl3_V15queue16get_backend_infoINS0_4info6device15backend_versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv _ZNK4sycl3_V15queue16get_backend_infoINS0_4info6device7versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv _ZNK4sycl3_V15queue16get_backend_infoINS0_4info8platform7versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv @@ -3975,6 +3976,12 @@ _ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device22m _ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device22max_image_linear_widthEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device23max_image_linear_heightEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device26max_image_linear_row_pitchEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31sub_group_progress_capabilitiesILNS5_15execution_scopeE2EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31sub_group_progress_capabilitiesILNS5_15execution_scopeE3EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31work_item_progress_capabilitiesILNS5_15execution_scopeE1EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31work_item_progress_capabilitiesILNS5_15execution_scopeE2EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31work_item_progress_capabilitiesILNS5_15execution_scopeE3EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device32work_group_progress_capabilitiesILNS5_15execution_scopeE3EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext8codeplay12experimental4info6device15supports_fusionEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext8codeplay12experimental4info6device28max_registers_per_work_groupEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_4info6device10extensionsEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv @@ -4086,12 +4093,6 @@ _ZNK4sycl3_V16device13get_info_implINS0_4info6device7versionEEENS0_6detail11ABIN _ZNK4sycl3_V16device13get_info_implINS0_4info6device8atomic64EEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_4info6device8platformEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_4info6device9vendor_idEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv -_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device32work_group_progress_capabilitiesILNS5_15execution_scopeE3EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv -_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31sub_group_progress_capabilitiesILNS5_15execution_scopeE3EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv -_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31sub_group_progress_capabilitiesILNS5_15execution_scopeE2EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv -_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31work_item_progress_capabilitiesILNS5_15execution_scopeE2EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv -_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31work_item_progress_capabilitiesILNS5_15execution_scopeE3EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv -_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31work_item_progress_capabilitiesILNS5_15execution_scopeE1EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13has_extensionERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZNK4sycl3_V16device14is_acceleratorEv _ZNK4sycl3_V16device16get_backend_infoINS0_4info6device15backend_versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv From d2e891217a3e497cbac8fbb6f6db5190ffa5ac50 Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Mon, 17 Jun 2024 14:07:57 +0100 Subject: [PATCH 13/26] Moved some code from fill_impl to general fill --- sycl/include/sycl/handler.hpp | 3 ++- sycl/source/handler.cpp | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 84acf5373bb0e..dc0c6be2ecd86 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -2897,9 +2897,10 @@ class __SYCL_EXPORT handler { /// device copyable. /// \param Count is the number of times to fill Pattern into Ptr. template void fill(void *Ptr, const T &Pattern, size_t Count) { + throwIfActionIsCreated(); + setUserFacingNodeType(ext::oneapi::experimental::node_type::memfill); static_assert(is_device_copyable::value, "Pattern must be device copyable"); - setUserFacingNodeType(ext::oneapi::experimental::node_type::memfill); if (getDeviceBackend() == backend::ext_oneapi_level_zero) { parallel_for<__usmfill>(range<1>(Count), [=](id<1> Index) { T *CastedPtr = static_cast(Ptr); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 551b8e5b04c05..b814024a84d6f 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -989,7 +989,6 @@ void handler::mem_advise(const void *Ptr, size_t Count, int Advice) { void handler::fill_impl(void *Dest, const void *Value, size_t ValueSize, size_t Count) { - throwIfActionIsCreated(); MDstPtr = Dest; MPattern.resize(ValueSize); std::memcpy(MPattern.data(), Value, ValueSize); From 7685a565cfb86153ea5208e41931c21c6d9dedde Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Mon, 17 Jun 2024 17:38:27 +0100 Subject: [PATCH 14/26] Fixed getDeviceBackend function for graphs --- sycl/include/sycl/handler.hpp | 2 +- sycl/source/handler.cpp | 7 +++++-- sycl/test/abi/sycl_symbols_linux.dump | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index dc0c6be2ecd86..e6a9f7025b86e 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -3499,7 +3499,7 @@ class __SYCL_EXPORT handler { // Function to get information about the backend for which the code is // compiled for - backend getDeviceBackend(); + backend getDeviceBackend() const; // Common function for launching a 2D USM memcpy kernel to avoid redefinitions // of the kernel from copy and memcpy. diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index b814024a84d6f..5031a1fa6f32a 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1590,8 +1590,11 @@ id<2> handler::computeFallbackKernelBounds(size_t Width, size_t Height) { return id<2>{std::min(ItemLimit[0], Height), std::min(ItemLimit[1], Width)}; } -backend handler::getDeviceBackend() { - return MQueue->getDeviceImplPtr()->getBackend(); +backend handler::getDeviceBackend() const { + if (MGraph) + return MGraph->getDevice().get_backend(); + else + return MQueue->getDeviceImplPtr()->getBackend(); } void handler::ext_intel_read_host_pipe(detail::string_view Name, void *Ptr, diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index cea63e0279953..66ee8e8f4ad34 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3588,7 +3588,6 @@ _ZN4sycl3_V17handler15ext_oneapi_copyEPvNS0_5rangeILi3EEES2_S4_RKNS0_3ext6oneapi _ZN4sycl3_V17handler15ext_oneapi_copyEPvNS0_5rangeILi3EEES4_NS0_3ext6oneapi12experimental16image_mem_handleES4_RKNS7_16image_descriptorES4_ _ZN4sycl3_V17handler15ext_oneapi_copyEPvS2_RKNS0_3ext6oneapi12experimental16image_descriptorEm _ZN4sycl3_V17handler16ext_oneapi_graphENS0_3ext6oneapi12experimental13command_graphILNS4_11graph_stateE1EEE -_ZN4sycl3_V17handler16getDeviceBackendEv _ZN4sycl3_V17handler16getMaxWorkGroupsEv _ZN4sycl3_V17handler17supportsUSMFill2DEv _ZN4sycl3_V17handler17use_kernel_bundleERKNS0_13kernel_bundleILNS0_12bundle_stateE2EEE @@ -4229,6 +4228,7 @@ _ZNK4sycl3_V17context8get_infoINS0_4info7context7devicesEEENS0_6detail20is_conte _ZNK4sycl3_V17context8get_infoINS0_4info7context8platformEEENS0_6detail20is_context_info_descIT_E11return_typeEv _ZNK4sycl3_V17context9getNativeEv _ZNK4sycl3_V17handler15getCommandGraphEv +_ZNK4sycl3_V17handler16getDeviceBackendEv _ZNK4sycl3_V17handler17getContextImplPtrEv _ZNK4sycl3_V17handler27isStateExplicitKernelBundleEv _ZNK4sycl3_V17handler30getOrInsertHandlerKernelBundleEb From 33b132cc875b5220de42112175ac4ce31496718a Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Mon, 17 Jun 2024 18:32:27 +0100 Subject: [PATCH 15/26] Updated symbols linux --- sycl/test/abi/sycl_symbols_windows.dump | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 071547d8015c0..1d177d3c9ca06 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -41,18 +41,12 @@ ??$get_info@U?$max_work_groups@$00@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$id@$00@23@XZ ??$get_info@U?$max_work_groups@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$id@$01@23@XZ ??$get_info@U?$max_work_groups@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$id@$02@23@XZ +??$get_info@U?$sub_group_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ??$get_info@U?$sub_group_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -??$get_info@U?$work_item_progress_capabilities@$00@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ??$get_info@U?$work_group_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -??$get_info_impl@U?$work_item_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ +??$get_info@U?$work_item_progress_capabilities@$00@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ??$get_info@U?$work_item_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -??$get_info_impl@U?$work_item_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ??$get_info@U?$work_item_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -??$get_info_impl@U?$sub_group_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -??$get_info_impl@U?$work_item_progress_capabilities@$00@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -??$get_info@U?$sub_group_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -??$get_info_impl@U?$work_group_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ -??$get_info_impl@U?$sub_group_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ??$get_info@Uarchitecture@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AW4architecture@experimental@oneapi@ext@23@XZ ??$get_info@Uatomic_fence_order_capabilities@context@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$vector@W4memory_order@_V1@sycl@@V?$allocator@W4memory_order@_V1@sycl@@@std@@@std@@XZ ??$get_info@Uatomic_fence_scope_capabilities@context@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$vector@W4memory_scope@_V1@sycl@@V?$allocator@W4memory_scope@_V1@sycl@@@std@@@std@@XZ @@ -108,6 +102,12 @@ ??$get_info_impl@U?$max_work_item_sizes@$00@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$range@$00@12@XZ ??$get_info_impl@U?$max_work_item_sizes@$01@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$range@$01@12@XZ ??$get_info_impl@U?$max_work_item_sizes@$02@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$range@$02@12@XZ +??$get_info_impl@U?$sub_group_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ +??$get_info_impl@U?$sub_group_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ +??$get_info_impl@U?$work_group_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ +??$get_info_impl@U?$work_item_progress_capabilities@$00@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ +??$get_info_impl@U?$work_item_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ +??$get_info_impl@U?$work_item_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ??$get_info_impl@Uaddress_bits@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Uarchitecture@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AW4architecture@experimental@oneapi@ext@12@XZ ??$get_info_impl@Uaspects@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4aspect@_V1@sycl@@V?$allocator@W4aspect@_V1@sycl@@@std@@@std@@XZ @@ -4076,7 +4076,6 @@ ?ext_intel_read_host_pipe@handler@_V1@sycl@@AEAAXVstring_view@detail@23@PEAX_K_N@Z ?ext_intel_write_host_pipe@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAX_K_N@Z ?ext_intel_write_host_pipe@handler@_V1@sycl@@AEAAXVstring_view@detail@23@PEAX_K_N@Z -?verifyDeviceHasProgressGuarantee@handler@_V1@sycl@@AEAAXW4forward_progress_guarantee@experimental@oneapi@ext@23@W4execution_scope@56723@1@Z ?ext_oneapi_advise_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEBX_KW4_pi_mem_advice@@V?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_architecture_is@device@_V1@sycl@@QEAA_NW4arch_category@experimental@oneapi@ext@23@@Z ?ext_oneapi_architecture_is@device@_V1@sycl@@QEAA_NW4architecture@experimental@oneapi@ext@23@@Z @@ -4092,7 +4091,6 @@ ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@0AEBUimage_descriptor@56723@@Z ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@PEAXAEBUimage_descriptor@56723@@Z ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@56723@PEAX111@Z -?ext_oneapi_prod@queue@_V1@sycl@@QEAAXXZ ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEAX0AEBUimage_descriptor@experimental@oneapi@ext@23@_KAEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEAX0AEBUimage_descriptor@experimental@oneapi@ext@23@_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEAX0AEBUimage_descriptor@experimental@oneapi@ext@23@_KV423@AEBUcode_location@detail@23@@Z @@ -4154,6 +4152,7 @@ ?ext_oneapi_owner_before@?$OwnerLessBase@Vstream@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vstream@_V1@sycl@@@2oneapi@ext@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vstream@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVstream@34@@Z ?ext_oneapi_prefetch_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KV?$vector@IV?$allocator@I@std@@@6@PEAI@Z +?ext_oneapi_prod@queue@_V1@sycl@@QEAAXXZ ?ext_oneapi_set_external_event@queue@_V1@sycl@@QEAAXAEBVevent@23@@Z ?ext_oneapi_signal_external_semaphore@handler@_V1@sycl@@QEAAXUinterop_semaphore_handle@experimental@oneapi@ext@23@@Z ?ext_oneapi_signal_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@AEBUcode_location@detail@23@@Z @@ -4218,6 +4217,7 @@ ?getCommandGraph@handler@_V1@sycl@@AEBA?AV?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@XZ ?getContextImplPtr@handler@_V1@sycl@@AEBAAEBV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@XZ ?getCurrentDSODir@OSUtil@detail@_V1@sycl@@SA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ +?getDeviceBackend@handler@_V1@sycl@@AEBA?AW4backend@23@XZ ?getDeviceFromHandler@detail@_V1@sycl@@YA?AVdevice@23@AEAVhandler@23@@Z ?getDevices@image_impl@detail@_V1@sycl@@AEAA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@V?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@6@@Z ?getDirName@OSUtil@detail@_V1@sycl@@SA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEBD@Z @@ -4645,6 +4645,7 @@ ?useHostPtr@SYCLMemObjT@detail@_V1@sycl@@QEAA_NXZ ?use_kernel_bundle@handler@_V1@sycl@@QEAAXAEBV?$kernel_bundle@$01@23@@Z ?usesPinnedHostMemory@SYCLMemObjT@detail@_V1@sycl@@UEBA_NXZ +?verifyDeviceHasProgressGuarantee@handler@_V1@sycl@@AEAAXW4forward_progress_guarantee@experimental@oneapi@ext@23@W4execution_scope@56723@1@Z ?verifyKernelInvoc@handler@_V1@sycl@@AEAAXAEBVkernel@23@@Z ?verifyUsedKernelBundle@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z ?verifyUsedKernelBundleInternal@handler@_V1@sycl@@AEAAXVstring_view@detail@23@@Z From 679bcd43c9772affe3b56b0ca44d440d413e4b3b Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Tue, 18 Jun 2024 10:51:59 +0100 Subject: [PATCH 16/26] Added condition to FileCheck RUN in DiscardEvents tests --- .../DiscardEvents/discard_events_usm.cpp | 16 ++++++++++++---- .../discard_events_usm_ooo_queue.cpp | 16 ++++++++++++---- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp b/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp index 269706b10321d..9352d4d4b87cb 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp @@ -1,6 +1,10 @@ // RUN: %{build} -o %t.out - -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt +// +// On level_zero Q.fill uses piEnqueueKernelLaunch and not piextUSMEnqueueFill +// as on other targets due to https://github.com/intel/llvm/issues/13787 +// +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt --check-prefixes=CHECK%if level_zero %{,CHECK-L0%} %else %{,CHECK-OTHER%} +// // REQUIRES: aspect-usm_shared_allocations // The test checks that the last parameter is `nullptr` for all PI calls that // should discard events. @@ -20,7 +24,9 @@ // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // -// CHECK: ---> piextUSMEnqueueFill( +// Level-zero backend doesn't yet use piextUSMEnqueueFill +// CHECK-L0: ---> piEnqueueKernelLaunch( +// CHECK-OTHER: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // @@ -64,7 +70,9 @@ // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // -// CHECK: ---> piextUSMEnqueueFill( +// Level-zero backend doesn't yet use piextUSMEnqueueFill +// CHECK-L0: ---> piEnqueueKernelLaunch( +// CHECK-OTHER: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // diff --git a/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp b/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp index fb7555bab9d9f..d731850be4cff 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp @@ -1,6 +1,10 @@ // RUN: %{build} -o %t.out - -// RUN: env SYCL_PI_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt +// +// On level_zero Q.fill uses piEnqueueKernelLaunch and not piextUSMEnqueueFill +// as on other targets due to https://github.com/intel/llvm/issues/13787 +// +// RUN: env SYCL_PI_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt --check-prefixes=CHECK%if level_zero %{,CHECK-L0%} %else %{,CHECK-OTHER%} +// // REQUIRES: aspect-usm_shared_allocations // The test checks that the last parameter is not `nullptr` for all PI calls // that should discard events. @@ -22,7 +26,9 @@ // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS // -// CHECK: ---> piextUSMEnqueueFill( +// Level-zero backend doesn't yet use piextUSMEnqueueFill +// CHECK-L0: ---> piEnqueueKernelLaunch( +// CHECK-OTHER: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -78,7 +84,9 @@ // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS // -// CHECK: ---> piextUSMEnqueueFill( +// Level-zero backend doesn't yet use piextUSMEnqueueFill +// CHECK-L0: ---> piEnqueueKernelLaunch( +// CHECK-OTHER: ---> piextUSMEnqueueFill( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS From 3e17f6d527c9ac8fe384887c65252b872bfa975b Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Tue, 18 Jun 2024 14:24:13 +0100 Subject: [PATCH 17/26] Removed triggering post-commit --- .github/workflows/sycl-linux-run-tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml index 360329b45ddcd..2f8e62eabd701 100644 --- a/.github/workflows/sycl-linux-run-tests.yml +++ b/.github/workflows/sycl-linux-run-tests.yml @@ -263,9 +263,9 @@ jobs: - run: sycl-ls --verbose - run: SYCL_PI_TRACE=-1 sycl-ls - run: | - if [ -f /usr/local/lib/igc/IGCTAG.txt ]; then - cat /usr/local/lib/igc/IGCTAG.txt - fi + if [ -f /usr/local/lib/igc/IGCTAG.txt ]; then + cat /usr/local/lib/igc/IGCTAG.txt + fi - name: Deduce E2E CMake options if: inputs.tests_selector == 'e2e' From c3227526f06420fbd958bc88a394923c969b720d Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Fri, 21 Jun 2024 11:50:09 +0100 Subject: [PATCH 18/26] Cleanup and modified memory manager symbols not to break the ABI --- sycl/source/detail/memory_manager.cpp | 17 +++++++++++++---- sycl/source/detail/memory_manager.hpp | 8 ++++++++ .../DiscardEvents/discard_events_usm.cpp | 6 +++--- .../discard_events_usm_ooo_queue.cpp | 6 +++--- sycl/test/abi/sycl_symbols_linux.dump | 1 + 5 files changed, 28 insertions(+), 10 deletions(-) diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index ef09a809e025e..3f304f8a78ed2 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -1017,8 +1017,7 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { - std::vector vecPattern(sizeof(Pattern)); - std::memcpy(vecPattern.data(), &Pattern, sizeof(Pattern)); + std::vector vecPattern{static_cast(Pattern)}; MemoryManager::fill_usm(Mem, Queue, Length, vecPattern, DepEvents, OutEvent, OutEventImpl); } @@ -1028,8 +1027,7 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, int Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent) { - std::vector vecPattern(sizeof(Pattern)); - std::memcpy(vecPattern.data(), &Pattern, sizeof(Pattern)); + std::vector vecPattern{static_cast(Pattern)}; MemoryManager::fill_usm(Mem, Queue, Length, vecPattern, DepEvents, OutEvent, nullptr); // OutEventImpl); } @@ -1711,6 +1709,17 @@ void MemoryManager::ext_oneapi_fill_usm_cmd_buffer( Deps.data(), OutSyncPoint); } +// TODO: This function will remain until ABI-breaking change +void MemoryManager::ext_oneapi_fill_usm_cmd_buffer( + sycl::detail::ContextImplPtr Context, + sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem, + size_t Len, int Pattern, std::vector Deps, + sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { + std::vector vecPattern{static_cast(Pattern)}; + ext_oneapi_fill_usm_cmd_buffer(Context, CommandBuffer, DstMem, Len, + vecPattern, Deps, OutSyncPoint); +} + void MemoryManager::ext_oneapi_fill_cmd_buffer( sycl::detail::ContextImplPtr Context, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj, diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp index 0636fe6f3d1c0..a5cda0e3a4d4e 100644 --- a/sycl/source/detail/memory_manager.hpp +++ b/sycl/source/detail/memory_manager.hpp @@ -330,6 +330,14 @@ class __SYCL_EXPORT MemoryManager { std::vector Deps, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); + // TODO: This function will remain until ABI-breaking change + static void ext_oneapi_fill_usm_cmd_buffer( + sycl::detail::ContextImplPtr Context, + sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem, + size_t Len, int Pattern, + std::vector Deps, + sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); + static void ext_oneapi_fill_cmd_buffer(sycl::detail::ContextImplPtr Context, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, diff --git a/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp b/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp index 9352d4d4b87cb..111e72d121cfc 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out // // On level_zero Q.fill uses piEnqueueKernelLaunch and not piextUSMEnqueueFill -// as on other targets due to https://github.com/intel/llvm/issues/13787 +// due to https://github.com/intel/llvm/issues/13787 // // RUN: env SYCL_PI_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt --check-prefixes=CHECK%if level_zero %{,CHECK-L0%} %else %{,CHECK-OTHER%} // @@ -24,7 +24,7 @@ // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // -// Level-zero backend doesn't yet use piextUSMEnqueueFill +// Level-zero backend doesn't use piextUSMEnqueueFill // CHECK-L0: ---> piEnqueueKernelLaunch( // CHECK-OTHER: ---> piextUSMEnqueueFill( // CHECK: pi_event * : @@ -70,7 +70,7 @@ // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // -// Level-zero backend doesn't yet use piextUSMEnqueueFill +// Level-zero backend doesn't use piextUSMEnqueueFill // CHECK-L0: ---> piEnqueueKernelLaunch( // CHECK-OTHER: ---> piextUSMEnqueueFill( // CHECK: pi_event * : diff --git a/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp b/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp index d731850be4cff..d27be6ab48abc 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out // // On level_zero Q.fill uses piEnqueueKernelLaunch and not piextUSMEnqueueFill -// as on other targets due to https://github.com/intel/llvm/issues/13787 +// due to https://github.com/intel/llvm/issues/13787 // // RUN: env SYCL_PI_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt --check-prefixes=CHECK%if level_zero %{,CHECK-L0%} %else %{,CHECK-OTHER%} // @@ -26,7 +26,7 @@ // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS // -// Level-zero backend doesn't yet use piextUSMEnqueueFill +// Level-zero backend doesn't use piextUSMEnqueueFill // CHECK-L0: ---> piEnqueueKernelLaunch( // CHECK-OTHER: ---> piextUSMEnqueueFill( // CHECK: pi_event * : @@ -84,7 +84,7 @@ // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS // -// Level-zero backend doesn't yet use piextUSMEnqueueFill +// Level-zero backend doesn't use piextUSMEnqueueFill // CHECK-L0: ---> piEnqueueKernelLaunch( // CHECK-OTHER: ---> piextUSMEnqueueFill( // CHECK: pi_event * : diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index a7f4505a4645c..14c9a2574f8a0 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3298,6 +3298,7 @@ _ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyD2H_cmd_bufferESt10shared_ptr _ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyH2D_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPcjNS0_5rangeILi3EEENS0_2idILi3EEEjPvjSC_SC_SE_jSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager30ext_oneapi_copy_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEPKvP22_pi_ext_command_buffermPvSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager30ext_oneapi_fill_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmRKSt6vectorIcSaIcEES9_IjSaIjEEPj +_ZN4sycl3_V16detail13MemoryManager30ext_oneapi_fill_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmiSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager32ext_oneapi_advise_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPKvm14_pi_mem_adviceSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager34ext_oneapi_prefetch_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager3mapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEENS0_6access4modeEjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_ From 3d579c10084b15bf43a4ae4886fc64257f5bb48f Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Fri, 21 Jun 2024 12:08:49 +0100 Subject: [PATCH 19/26] Updated symbols windows --- sycl/test/abi/sycl_symbols_windows.dump | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 1c734e86a7408..337d8316c329c 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -4126,6 +4126,7 @@ ?ext_oneapi_fill2d_impl@handler@_V1@sycl@@AEAAXPEAX_KPEBX111@Z ?ext_oneapi_fill_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAVSYCLMemObjI@234@PEAX_KPEBDIV?$range@$02@34@6V?$id@$02@34@IV?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_fill_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KAEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@IV?$allocator@I@std@@@6@PEAI@Z +?ext_oneapi_fill_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KHV?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_get_composite_devices@platform@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ ?ext_oneapi_get_default_context@platform@_V1@sycl@@QEBA?AVcontext@23@XZ ?ext_oneapi_get_graph@queue@_V1@sycl@@QEBA?AV?$command_graph@$0A@@experimental@oneapi@ext@23@XZ From 38722b481f638dfd858cb4accc7caf3e970fb655 Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Thu, 27 Jun 2024 18:36:20 +0100 Subject: [PATCH 20/26] Removed overloads of fill_usm for abi-break --- sycl/source/detail/memory_manager.cpp | 22 ---------------------- sycl/source/detail/memory_manager.hpp | 15 --------------- sycl/source/detail/reduction.cpp | 2 +- sycl/test/abi/sycl_symbols_linux.dump | 3 +-- 4 files changed, 2 insertions(+), 40 deletions(-) diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 40bd3da1fd541..2e0cb4300d766 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -973,17 +973,6 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, DepEvents.size(), DepEvents.data(), OutEvent); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, - int Pattern, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent, - const detail::EventImplPtr &OutEventImpl) { - std::vector vecPattern{static_cast(Pattern)}; - MemoryManager::fill_usm(Mem, Queue, Length, vecPattern, DepEvents, OutEvent, - OutEventImpl); -} - void MemoryManager::prefetch_usm( void *Mem, QueueImplPtr Queue, size_t Length, std::vector DepEvents, @@ -1595,17 +1584,6 @@ void MemoryManager::ext_oneapi_fill_usm_cmd_buffer( Deps.data(), OutSyncPoint); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::ext_oneapi_fill_usm_cmd_buffer( - sycl::detail::ContextImplPtr Context, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem, - size_t Len, int Pattern, std::vector Deps, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { - std::vector vecPattern{static_cast(Pattern)}; - ext_oneapi_fill_usm_cmd_buffer(Context, CommandBuffer, DstMem, Len, - vecPattern, Deps, OutSyncPoint); -} - void MemoryManager::ext_oneapi_fill_cmd_buffer( sycl::detail::ContextImplPtr Context, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj, diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp index 0c6368322a1e7..6af9e51b20aca 100644 --- a/sycl/source/detail/memory_manager.hpp +++ b/sycl/source/detail/memory_manager.hpp @@ -152,13 +152,6 @@ class __SYCL_EXPORT MemoryManager { sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, - int Pattern, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent, - const detail::EventImplPtr &OutEventImpl); - static void prefetch_usm(void *Ptr, QueueImplPtr Queue, size_t Len, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, @@ -253,14 +246,6 @@ class __SYCL_EXPORT MemoryManager { std::vector Deps, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); - // TODO: This function will remain until ABI-breaking change - static void ext_oneapi_fill_usm_cmd_buffer( - sycl::detail::ContextImplPtr Context, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem, - size_t Len, int Pattern, - std::vector Deps, - sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); - static void ext_oneapi_fill_cmd_buffer(sycl::detail::ContextImplPtr Context, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, diff --git a/sycl/source/detail/reduction.cpp b/sycl/source/detail/reduction.cpp index 6fd170746d28d..a8a839b65b8f4 100644 --- a/sycl/source/detail/reduction.cpp +++ b/sycl/source/detail/reduction.cpp @@ -172,7 +172,7 @@ addCounterInit(handler &CGH, std::shared_ptr &Queue, auto EventImpl = std::make_shared(Queue); EventImpl->setContextImpl(detail::getSyclObjImpl(Queue->get_context())); EventImpl->setStateIncomplete(); - MemoryManager::fill_usm(Counter.get(), Queue, sizeof(int), 0, {}, + MemoryManager::fill_usm(Counter.get(), Queue, sizeof(int), {0}, {}, &EventImpl->getHandleRef(), EventImpl); CGH.depends_on(createSyclObjFromImpl(EventImpl)); } diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 5542d38dcaf47..e644ff4b0012b 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3288,7 +3288,6 @@ _ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyD2H_cmd_bufferESt10shared_ptr _ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyH2D_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPcjNS0_5rangeILi3EEENS0_2idILi3EEEjPvjSC_SC_SE_jSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager30ext_oneapi_copy_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEPKvP22_pi_ext_command_buffermPvSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager30ext_oneapi_fill_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmRKSt6vectorIcSaIcEES9_IjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager30ext_oneapi_fill_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmiSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager32ext_oneapi_advise_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPKvm14_pi_mem_adviceSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager34ext_oneapi_prefetch_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager3mapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEENS0_6access4modeEjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_ @@ -3298,7 +3297,7 @@ _ZN4sycl3_V16detail13MemoryManager5unmapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1 _ZN4sycl3_V16detail13MemoryManager7releaseESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event _ZN4sycl3_V16detail13MemoryManager8allocateESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEbPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event _ZN4sycl3_V16detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EEPS9_RKS4_INS1_10event_implEE +_ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmRKSt6vectorIcSaIcEES7_IP9_pi_eventSaISD_EEPSD_RKS4_INS1_10event_implEE _ZN4sycl3_V16detail13host_pipe_map3addEPKvPKc _ZN4sycl3_V16detail13lgamma_r_implENS1_9half_impl4halfEPi _ZN4sycl3_V16detail13lgamma_r_implEdPi From ca2f561eac6ea47fadb562986338835d766ee10d Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Thu, 27 Jun 2024 19:24:49 +0100 Subject: [PATCH 21/26] Updated symbols windows --- sycl/test/abi/sycl_symbols_windows.dump | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 40d2e25f8333e..b31436ac69a2d 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -4111,7 +4111,6 @@ ?ext_oneapi_fill2d_impl@handler@_V1@sycl@@AEAAXPEAX_KPEBX111@Z ?ext_oneapi_fill_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAVSYCLMemObjI@234@PEAX_KPEBDIV?$range@$02@34@6V?$id@$02@34@IV?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_fill_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KAEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@IV?$allocator@I@std@@@6@PEAI@Z -?ext_oneapi_fill_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KHV?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_get_composite_devices@platform@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ ?ext_oneapi_get_default_context@platform@_V1@sycl@@QEBA?AVcontext@23@XZ ?ext_oneapi_get_graph@queue@_V1@sycl@@QEBA?AV?$command_graph@$0A@@experimental@oneapi@ext@23@XZ @@ -4169,7 +4168,8 @@ ?extractArgsAndReqsFromLambda@handler@_V1@sycl@@AEAAXPEAD_KPEBUkernel_param_desc_t@detail@23@_N@Z ?fill@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KPEBDIV?$range@$02@34@5V?$id@$02@34@IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@7@@Z ?fill_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22AEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z -?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KHV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z +?fill_impl@handler@_V1@sycl@@AEAAXPEAXPEBX_K2@Z +?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KAEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?finalize@handler@_V1@sycl@@AEAA?AVevent@23@XZ ?finalize@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$command_graph@$00@34567@AEBVproperty_list@67@@Z ?finalizeImpl@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXXZ From 51a9136c90337c7e616bd4706876089cdb2d59f8 Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Mon, 1 Jul 2024 12:55:28 +0100 Subject: [PATCH 22/26] merged sycl tip and resolved conflicts --- sycl/test/abi/sycl_symbols_linux.dump | 88 ++++++++++++++++----------- 1 file changed, 54 insertions(+), 34 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 3f6f6c6615145..7ddfd1dbd4ac1 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3154,10 +3154,37 @@ _ZN4sycl3_V16detail10build_implERKNS0_13kernel_bundleILNS0_12bundle_stateE0EEERK _ZN4sycl3_V16detail10frexp_implENS1_9half_impl4halfEPi _ZN4sycl3_V16detail10frexp_implEdPi _ZN4sycl3_V16detail10frexp_implEfPi +_ZN4sycl3_V16detail10image_impl10getDevicesESt10shared_ptrINS1_12context_implEE +_ZN4sycl3_V16detail10image_impl11allocateMemESt10shared_ptrINS1_12context_implEEbPvRP9_pi_event +_ZN4sycl3_V16detail10image_impl14checkImageDescERK14_pi_image_descSt10shared_ptrINS1_12context_implEEPv +_ZN4sycl3_V16detail10image_impl16checkImageFormatERK16_pi_image_formatSt10shared_ptrINS1_12context_implEE +_ZN4sycl3_V16detail10image_impl34sampledImageDestructorNotificationEPv +_ZN4sycl3_V16detail10image_impl35sampledImageConstructorNotificationERKNS1_13code_locationEPvPKvjPmNS0_12image_formatERKNS0_13image_samplerE +_ZN4sycl3_V16detail10image_impl36unsampledImageDestructorNotificationEPv +_ZN4sycl3_V16detail10image_impl37unsampledImageConstructorNotificationERKNS1_13code_locationEPvPKvjPmNS0_12image_formatE +_ZN4sycl3_V16detail10image_implC1EP7_cl_memRKNS0_7contextENS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteISA_EEh +_ZN4sycl3_V16detail10image_implC1EmRKNS0_7contextENS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EEhNS0_19image_channel_orderENS0_18image_channel_typeEbNS0_5rangeILi3EEE +_ZN4sycl3_V16detail10image_implC2EP7_cl_memRKNS0_7contextENS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteISA_EEh +_ZN4sycl3_V16detail10image_implC2EmRKNS0_7contextENS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EEhNS0_19image_channel_orderENS0_18image_channel_typeEbNS0_5rangeILi3EEE _ZN4sycl3_V16detail10make_eventEmRKNS0_7contextENS0_7backendE _ZN4sycl3_V16detail10make_eventEmRKNS0_7contextEbNS0_7backendE _ZN4sycl3_V16detail10make_queueEmiRKNS0_7contextEPKNS0_6deviceEbRKNS0_13property_listERKSt8functionIFvNS0_14exception_listEEENS0_7backendE _ZN4sycl3_V16detail10waitEventsESt6vectorINS0_5eventESaIS3_EE +_ZN4sycl3_V16detail11SYCLMemObjT10releaseMemESt10shared_ptrINS1_12context_implEEPv +_ZN4sycl3_V16detail11SYCLMemObjT16determineHostPtrERKSt10shared_ptrINS1_12context_implEEbRPvRb +_ZN4sycl3_V16detail11SYCLMemObjT16updateHostMemoryEPv +_ZN4sycl3_V16detail11SYCLMemObjT16updateHostMemoryEv +_ZN4sycl3_V16detail11SYCLMemObjT20getBufSizeForContextERKSt10shared_ptrINS1_12context_implEEm +_ZN4sycl3_V16detail11SYCLMemObjT27handleWriteAccessorCreationEv +_ZN4sycl3_V16detail11SYCLMemObjTC1EmRKNS0_7contextEbNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE +_ZN4sycl3_V16detail11SYCLMemObjTC1EmRKNS0_7contextEbNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE23_pi_image_channel_order22_pi_image_channel_typeNS0_5rangeILi3EEEjm +_ZN4sycl3_V16detail11SYCLMemObjTC1EmRKNS0_7contextEmNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE +_ZN4sycl3_V16detail11SYCLMemObjTC2EmRKNS0_7contextEbNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE +_ZN4sycl3_V16detail11SYCLMemObjTC2EmRKNS0_7contextEbNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE23_pi_image_channel_order22_pi_image_channel_typeNS0_5rangeILi3EEEjm +_ZN4sycl3_V16detail11SYCLMemObjTC2EmRKNS0_7contextEmNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE +_ZN4sycl3_V16detail11buffer_impl11allocateMemESt10shared_ptrINS1_12context_implEEbPvRP9_pi_event +_ZN4sycl3_V16detail11buffer_impl22destructorNotificationEPv +_ZN4sycl3_V16detail11buffer_impl23constructorNotificationERKNS1_13code_locationEPvPKvS8_jjPm _ZN4sycl3_V16detail11image_plain14set_write_backEb _ZN4sycl3_V16detail11image_plain23set_final_data_internalERKSt8functionIFvRKS3_IFvPvEEEE _ZN4sycl3_V16detail11image_plain23set_final_data_internalEv @@ -3200,6 +3227,13 @@ _ZN4sycl3_V16detail11remquo_implEffPi _ZN4sycl3_V16detail11sincos_implENS1_9half_impl4halfEPS3_ _ZN4sycl3_V16detail11sincos_implEdPd _ZN4sycl3_V16detail11sincos_implEfPf +_ZN4sycl3_V16detail11stream_impl14initStreamHostESt10shared_ptrINS1_10queue_implEE +_ZN4sycl3_V16detail11stream_impl15accessGlobalBufERNS0_7handlerE +_ZN4sycl3_V16detail11stream_impl18accessGlobalOffsetERNS0_7handlerE +_ZN4sycl3_V16detail11stream_impl20accessGlobalFlushBufERNS0_7handlerE +_ZN4sycl3_V16detail11stream_impl5flushERKSt10shared_ptrINS1_10event_implEE +_ZN4sycl3_V16detail11stream_implC1EmmRKNS0_13property_listE +_ZN4sycl3_V16detail11stream_implC2EmmRKNS0_13property_listE _ZN4sycl3_V16detail12buffer_plain14deleteAccPropsERKNS1_16PropWithDataKindE _ZN4sycl3_V16detail12buffer_plain14set_write_backEb _ZN4sycl3_V16detail12buffer_plain23constructorNotificationERKNS1_13code_locationEPvPKvS8_jjPm @@ -3220,7 +3254,7 @@ _ZN4sycl3_V16detail12buffer_plainC2EmNS0_7contextESt10unique_ptrINS1_19SYCLMemOb _ZN4sycl3_V16detail12buffer_plainC2EmmRKNS0_13property_listESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS7_EE _ZN4sycl3_V16detail12compile_implERKNS0_13kernel_bundleILNS0_12bundle_stateE0EEERKSt6vectorINS0_6deviceESaIS8_EERKNS0_13property_listE _ZN4sycl3_V16detail12isOutOfRangeENS0_3vecIiLi4EEENS0_15addressing_modeENS0_5rangeILi3EEE -_ZN4sycl3_V16detail12make_contextEmRKSt8functionIFvNS0_14exception_listEEENS0_7backendE +_ZN4sycl3_V16detail12make_contextEmRKSt8functionIFvNS0_14exception_listEEENS0_7backendEbRKSt6vectorINS0_6deviceESaISA_EE _ZN4sycl3_V16detail12sampler_impl18getOrCreateSamplerERKNS0_7contextE _ZN4sycl3_V16detail12sampler_implC1ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE _ZN4sycl3_V16detail12sampler_implC1EP11_cl_samplerRKNS0_7contextE @@ -3228,39 +3262,6 @@ _ZN4sycl3_V16detail12sampler_implC2ENS0_29coordinate_normalization_modeENS0_15ad _ZN4sycl3_V16detail12sampler_implC2EP11_cl_samplerRKNS0_7contextE _ZN4sycl3_V16detail12sampler_implD1Ev _ZN4sycl3_V16detail12sampler_implD2Ev -_ZN4sycl3_V16detail13MemoryManager10advise_usmEPKvSt10shared_ptrINS1_10queue_implEEm14_pi_mem_adviceSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager11copy_2d_usmEPKvmSt10shared_ptrINS1_10queue_implEEPvmmmSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager11fill_2d_usmEPvSt10shared_ptrINS1_10queue_implEEmmmRKSt6vectorIcSaIcEES7_IP9_pi_eventSaISD_EEPSD_RKS4_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager12prefetch_usmEPvSt10shared_ptrINS1_10queue_implEEmSt6vectorIP9_pi_eventSaIS9_EEPS9_RKS4_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager13memset_2d_usmEPvSt10shared_ptrINS1_10queue_implEEmmmcSt6vectorIP9_pi_eventSaIS9_EEPS9_RKS4_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager13releaseMemObjESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvS8_ -_ZN4sycl3_V16detail13MemoryManager16allocateMemImageESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvbmRK14_pi_image_descRK16_pi_image_formatRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager17allocateMemBufferESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvbmRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager18allocateHostMemoryEPNS1_11SYCLMemObjIEPvbmRKNS0_13property_listE -_ZN4sycl3_V16detail13MemoryManager19allocateImageObjectESt10shared_ptrINS1_12context_implEEPvbRK14_pi_image_descRK16_pi_image_formatRKNS0_13property_listE -_ZN4sycl3_V16detail13MemoryManager19copy_image_bindlessEPvSt10shared_ptrINS1_10queue_implEES3_RK14_pi_image_descRK16_pi_image_format20_pi_image_copy_flags22pi_image_offset_structSE_22pi_image_region_structSF_RKSt6vectorIP9_pi_eventSaISI_EEPSI_ -_ZN4sycl3_V16detail13MemoryManager20allocateBufferObjectESt10shared_ptrINS1_12context_implEEPvbmRKNS0_13property_listE -_ZN4sycl3_V16detail13MemoryManager20allocateMemSubBufferESt10shared_ptrINS1_12context_implEEPvmmNS0_5rangeILi3EEESt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager21copy_to_device_globalEPKvbSt10shared_ptrINS1_10queue_implEEmmS4_RKSt6vectorIP9_pi_eventSaISA_EEPSA_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager23copy_from_device_globalEPKvbSt10shared_ptrINS1_10queue_implEEmmPvRKSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager24allocateInteropMemObjectESt10shared_ptrINS1_12context_implEEPvRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager26ext_oneapi_fill_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPvmPKcjNS0_5rangeILi3EEESE_NS0_2idILi3EEEjSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyD2D_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPvjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSA_jSC_SC_SE_jSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyD2H_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPvjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjPcjSC_SE_jSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyH2D_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPcjNS0_5rangeILi3EEENS0_2idILi3EEEjPvjSC_SC_SE_jSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager30ext_oneapi_copy_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEPKvP22_pi_ext_command_buffermPvSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager30ext_oneapi_fill_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmRKSt6vectorIcSaIcEES9_IjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager32ext_oneapi_advise_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPKvm14_pi_mem_adviceSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager34ext_oneapi_prefetch_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager3mapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEENS0_6access4modeEjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_ -_ZN4sycl3_V16detail13MemoryManager4copyEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEjNS0_5rangeILi3EEESA_NS0_2idILi3EEEjS5_S8_jSA_SA_SC_jSt6vectorIP9_pi_eventSaISF_EERSF_RKS6_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager4fillEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEmPKcjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_RKS6_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager5unmapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEES5_St6vectorIP9_pi_eventSaISB_EERSB_ -_ZN4sycl3_V16detail13MemoryManager7releaseESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager8allocateESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEbPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmRKSt6vectorIcSaIcEES7_IP9_pi_eventSaISD_EEPSD_RKS4_INS1_10event_implEE -_ZN4sycl3_V16detail12make_contextEmRKSt8functionIFvNS0_14exception_listEEENS0_7backendEbRKSt6vectorINS0_6deviceESaISA_EE _ZN4sycl3_V16detail13host_pipe_map3addEPKvPKc _ZN4sycl3_V16detail13lgamma_r_implENS1_9half_impl4halfEPi _ZN4sycl3_V16detail13lgamma_r_implEdPi @@ -3287,15 +3288,23 @@ _ZN4sycl3_V16detail16AccessorBaseHostC1ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6a _ZN4sycl3_V16detail16AccessorBaseHostC1ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviimbRKNS0_13property_listE _ZN4sycl3_V16detail16AccessorBaseHostC2ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviibmbRKNS0_13property_listE _ZN4sycl3_V16detail16AccessorBaseHostC2ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviimbRKNS0_13property_listE +_ZN4sycl3_V16detail16AccessorImplHost6resizeEm +_ZN4sycl3_V16detail16AccessorImplHostD1Ev +_ZN4sycl3_V16detail16AccessorImplHostD2Ev _ZN4sycl3_V16detail16reduGetMaxWGSizeESt10shared_ptrINS1_10queue_implEEm _ZN4sycl3_V16detail17HostProfilingInfo3endEv _ZN4sycl3_V16detail17HostProfilingInfo5startEv _ZN4sycl3_V16detail17device_global_map3addEPKvPKc _ZN4sycl3_V16detail17reduComputeWGSizeEmmRm +_ZN4sycl3_V16detail18convertChannelTypeE22_pi_image_channel_type +_ZN4sycl3_V16detail18convertChannelTypeENS0_18image_channel_typeE _ZN4sycl3_V16detail18get_kernel_id_implENS1_11string_viewE _ZN4sycl3_V16detail18make_kernel_bundleEmRKNS0_7contextENS0_12bundle_stateENS0_7backendE _ZN4sycl3_V16detail18make_kernel_bundleEmRKNS0_7contextEbNS0_12bundle_stateENS0_7backendE _ZN4sycl3_V16detail18stringifyErrorCodeEi +_ZN4sycl3_V16detail19convertChannelOrderE23_pi_image_channel_order +_ZN4sycl3_V16detail19convertChannelOrderENS0_19image_channel_orderE +_ZN4sycl3_V16detail19getImageElementSizeEhNS0_18image_channel_typeE _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_get_kernelERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_has_kernelERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZN4sycl3_V16detail19kernel_bundle_plain32set_specialization_constant_implEPKcPvm @@ -3311,6 +3320,7 @@ _ZN4sycl3_V16detail21LocalAccessorBaseHost7getSizeEv _ZN4sycl3_V16detail21LocalAccessorBaseHostC1ENS0_5rangeILi3EEEiiRKNS0_13property_listE _ZN4sycl3_V16detail21LocalAccessorBaseHostC2ENS0_5rangeILi3EEEiiRKNS0_13property_listE _ZN4sycl3_V16detail22addHostAccessorAndWaitEPNS1_16AccessorImplHostE +_ZN4sycl3_V16detail22getImageNumberChannelsENS0_19image_channel_orderE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateERKSt8functionIFbRKSt10shared_ptrINS1_17device_image_implEEEE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EERKS5_INS0_9kernel_idESaISB_EENS0_12bundle_stateE @@ -3701,6 +3711,11 @@ _ZNK4sycl3_V15queue9getNativeERi _ZNK4sycl3_V16ONEAPI15filter_selector13select_deviceEv _ZNK4sycl3_V16ONEAPI15filter_selector5resetEv _ZNK4sycl3_V16ONEAPI15filter_selectorclERKNS0_6deviceE +_ZNK4sycl3_V16detail11SYCLMemObjT18detachMemoryObjectERKSt10shared_ptrIS2_E +_ZNK4sycl3_V16detail11SYCLMemObjT9getPluginEv +_ZNK4sycl3_V16detail11SYCLMemObjT9isInteropEv +_ZNK4sycl3_V16detail11buffer_impl15getNativeVectorENS0_7backendE +_ZNK4sycl3_V16detail11buffer_impl16addInteropObjectERSt6vectorImSaImEE _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device10gpu_slicesEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device11free_memoryEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device11pci_addressEEENT_11return_typeEv @@ -3781,6 +3796,8 @@ _ZNK4sycl3_V16detail11image_plain8get_sizeEv _ZNK4sycl3_V16detail11image_plain9get_countEv _ZNK4sycl3_V16detail11image_plain9get_pitchEv _ZNK4sycl3_V16detail11image_plain9get_rangeEv +_ZNK4sycl3_V16detail11stream_impl25get_work_item_buffer_sizeEv +_ZNK4sycl3_V16detail11stream_impl4sizeEv _ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v _ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi8property5queue12priority_lowEEET_v _ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi8property5queue13priority_highEEET_v @@ -3819,6 +3836,9 @@ _ZNK4sycl3_V16detail12buffer_plain13handleReleaseEv _ZNK4sycl3_V16detail12buffer_plain15getNativeVectorENS0_7backendE _ZNK4sycl3_V16detail12buffer_plain22get_allocator_internalEv _ZNK4sycl3_V16detail12buffer_plain7getSizeEv +_ZNK4sycl3_V16detail12sampler_impl18get_filtering_modeEv +_ZNK4sycl3_V16detail12sampler_impl19get_addressing_modeEv +_ZNK4sycl3_V16detail12sampler_impl33get_coordinate_normalization_modeEv _ZNK4sycl3_V16detail16AccessorBaseHost11getElemSizeEv _ZNK4sycl3_V16detail16AccessorBaseHost11getPropListEv _ZNK4sycl3_V16detail16AccessorBaseHost13isPlaceholderEv From 6c6d81af3ae44e8d4ac06f0a4d9dbfb4fdcf022c Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Fri, 5 Jul 2024 08:58:08 +0100 Subject: [PATCH 23/26] Updated symbols linux --- sycl/test/abi/sycl_symbols_linux.dump | 60 --------------------------- 1 file changed, 60 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index a4950ad546b62..c4339fd0bc325 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3167,37 +3167,10 @@ _ZN4sycl3_V16detail10build_implERKNS0_13kernel_bundleILNS0_12bundle_stateE0EEERK _ZN4sycl3_V16detail10frexp_implENS1_9half_impl4halfEPi _ZN4sycl3_V16detail10frexp_implEdPi _ZN4sycl3_V16detail10frexp_implEfPi -_ZN4sycl3_V16detail10image_impl10getDevicesESt10shared_ptrINS1_12context_implEE -_ZN4sycl3_V16detail10image_impl11allocateMemESt10shared_ptrINS1_12context_implEEbPvRP9_pi_event -_ZN4sycl3_V16detail10image_impl14checkImageDescERK14_pi_image_descSt10shared_ptrINS1_12context_implEEPv -_ZN4sycl3_V16detail10image_impl16checkImageFormatERK16_pi_image_formatSt10shared_ptrINS1_12context_implEE -_ZN4sycl3_V16detail10image_impl34sampledImageDestructorNotificationEPv -_ZN4sycl3_V16detail10image_impl35sampledImageConstructorNotificationERKNS1_13code_locationEPvPKvjPmNS0_12image_formatERKNS0_13image_samplerE -_ZN4sycl3_V16detail10image_impl36unsampledImageDestructorNotificationEPv -_ZN4sycl3_V16detail10image_impl37unsampledImageConstructorNotificationERKNS1_13code_locationEPvPKvjPmNS0_12image_formatE -_ZN4sycl3_V16detail10image_implC1EP7_cl_memRKNS0_7contextENS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteISA_EEh -_ZN4sycl3_V16detail10image_implC1EmRKNS0_7contextENS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EEhNS0_19image_channel_orderENS0_18image_channel_typeEbNS0_5rangeILi3EEE -_ZN4sycl3_V16detail10image_implC2EP7_cl_memRKNS0_7contextENS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteISA_EEh -_ZN4sycl3_V16detail10image_implC2EmRKNS0_7contextENS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EEhNS0_19image_channel_orderENS0_18image_channel_typeEbNS0_5rangeILi3EEE _ZN4sycl3_V16detail10make_eventEmRKNS0_7contextENS0_7backendE _ZN4sycl3_V16detail10make_eventEmRKNS0_7contextEbNS0_7backendE _ZN4sycl3_V16detail10make_queueEmiRKNS0_7contextEPKNS0_6deviceEbRKNS0_13property_listERKSt8functionIFvNS0_14exception_listEEENS0_7backendE _ZN4sycl3_V16detail10waitEventsESt6vectorINS0_5eventESaIS3_EE -_ZN4sycl3_V16detail11SYCLMemObjT10releaseMemESt10shared_ptrINS1_12context_implEEPv -_ZN4sycl3_V16detail11SYCLMemObjT16determineHostPtrERKSt10shared_ptrINS1_12context_implEEbRPvRb -_ZN4sycl3_V16detail11SYCLMemObjT16updateHostMemoryEPv -_ZN4sycl3_V16detail11SYCLMemObjT16updateHostMemoryEv -_ZN4sycl3_V16detail11SYCLMemObjT20getBufSizeForContextERKSt10shared_ptrINS1_12context_implEEm -_ZN4sycl3_V16detail11SYCLMemObjT27handleWriteAccessorCreationEv -_ZN4sycl3_V16detail11SYCLMemObjTC1EmRKNS0_7contextEbNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE -_ZN4sycl3_V16detail11SYCLMemObjTC1EmRKNS0_7contextEbNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE23_pi_image_channel_order22_pi_image_channel_typeNS0_5rangeILi3EEEjm -_ZN4sycl3_V16detail11SYCLMemObjTC1EmRKNS0_7contextEmNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE -_ZN4sycl3_V16detail11SYCLMemObjTC2EmRKNS0_7contextEbNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE -_ZN4sycl3_V16detail11SYCLMemObjTC2EmRKNS0_7contextEbNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE23_pi_image_channel_order22_pi_image_channel_typeNS0_5rangeILi3EEEjm -_ZN4sycl3_V16detail11SYCLMemObjTC2EmRKNS0_7contextEmNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE -_ZN4sycl3_V16detail11buffer_impl11allocateMemESt10shared_ptrINS1_12context_implEEbPvRP9_pi_event -_ZN4sycl3_V16detail11buffer_impl22destructorNotificationEPv -_ZN4sycl3_V16detail11buffer_impl23constructorNotificationERKNS1_13code_locationEPvPKvS8_jjPm _ZN4sycl3_V16detail11image_plain14set_write_backEb _ZN4sycl3_V16detail11image_plain23set_final_data_internalERKSt8functionIFvRKS3_IFvPvEEEE _ZN4sycl3_V16detail11image_plain23set_final_data_internalEv @@ -3240,13 +3213,6 @@ _ZN4sycl3_V16detail11remquo_implEffPi _ZN4sycl3_V16detail11sincos_implENS1_9half_impl4halfEPS3_ _ZN4sycl3_V16detail11sincos_implEdPd _ZN4sycl3_V16detail11sincos_implEfPf -_ZN4sycl3_V16detail11stream_impl14initStreamHostESt10shared_ptrINS1_10queue_implEE -_ZN4sycl3_V16detail11stream_impl15accessGlobalBufERNS0_7handlerE -_ZN4sycl3_V16detail11stream_impl18accessGlobalOffsetERNS0_7handlerE -_ZN4sycl3_V16detail11stream_impl20accessGlobalFlushBufERNS0_7handlerE -_ZN4sycl3_V16detail11stream_impl5flushERKSt10shared_ptrINS1_10event_implEE -_ZN4sycl3_V16detail11stream_implC1EmmRKNS0_13property_listE -_ZN4sycl3_V16detail11stream_implC2EmmRKNS0_13property_listE _ZN4sycl3_V16detail12buffer_plain14deleteAccPropsERKNS1_16PropWithDataKindE _ZN4sycl3_V16detail12buffer_plain14set_write_backEb _ZN4sycl3_V16detail12buffer_plain23constructorNotificationERKNS1_13code_locationEPvPKvS8_jjPm @@ -3269,13 +3235,6 @@ _ZN4sycl3_V16detail12compile_implERKNS0_13kernel_bundleILNS0_12bundle_stateE0EEE _ZN4sycl3_V16detail12get_pi_errorERKNS0_9exceptionE _ZN4sycl3_V16detail12isOutOfRangeENS0_3vecIiLi4EEENS0_15addressing_modeENS0_5rangeILi3EEE _ZN4sycl3_V16detail12make_contextEmRKSt8functionIFvNS0_14exception_listEEENS0_7backendEbRKSt6vectorINS0_6deviceESaISA_EE -_ZN4sycl3_V16detail12sampler_impl18getOrCreateSamplerERKNS0_7contextE -_ZN4sycl3_V16detail12sampler_implC1ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE -_ZN4sycl3_V16detail12sampler_implC1EP11_cl_samplerRKNS0_7contextE -_ZN4sycl3_V16detail12sampler_implC2ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE -_ZN4sycl3_V16detail12sampler_implC2EP11_cl_samplerRKNS0_7contextE -_ZN4sycl3_V16detail12sampler_implD1Ev -_ZN4sycl3_V16detail12sampler_implD2Ev _ZN4sycl3_V16detail13host_pipe_map3addEPKvPKc _ZN4sycl3_V16detail13lgamma_r_implENS1_9half_impl4halfEPi _ZN4sycl3_V16detail13lgamma_r_implEdPi @@ -3302,23 +3261,15 @@ _ZN4sycl3_V16detail16AccessorBaseHostC1ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6a _ZN4sycl3_V16detail16AccessorBaseHostC1ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviimbRKNS0_13property_listE _ZN4sycl3_V16detail16AccessorBaseHostC2ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviibmbRKNS0_13property_listE _ZN4sycl3_V16detail16AccessorBaseHostC2ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviimbRKNS0_13property_listE -_ZN4sycl3_V16detail16AccessorImplHost6resizeEm -_ZN4sycl3_V16detail16AccessorImplHostD1Ev -_ZN4sycl3_V16detail16AccessorImplHostD2Ev _ZN4sycl3_V16detail16reduGetMaxWGSizeESt10shared_ptrINS1_10queue_implEEm _ZN4sycl3_V16detail17HostProfilingInfo3endEv _ZN4sycl3_V16detail17HostProfilingInfo5startEv _ZN4sycl3_V16detail17device_global_map3addEPKvPKc _ZN4sycl3_V16detail17reduComputeWGSizeEmmRm -_ZN4sycl3_V16detail18convertChannelTypeE22_pi_image_channel_type -_ZN4sycl3_V16detail18convertChannelTypeENS0_18image_channel_typeE _ZN4sycl3_V16detail18get_kernel_id_implENS1_11string_viewE _ZN4sycl3_V16detail18make_kernel_bundleEmRKNS0_7contextENS0_12bundle_stateENS0_7backendE _ZN4sycl3_V16detail18make_kernel_bundleEmRKNS0_7contextEbNS0_12bundle_stateENS0_7backendE _ZN4sycl3_V16detail18stringifyErrorCodeEi -_ZN4sycl3_V16detail19convertChannelOrderE23_pi_image_channel_order -_ZN4sycl3_V16detail19convertChannelOrderENS0_19image_channel_orderE -_ZN4sycl3_V16detail19getImageElementSizeEhNS0_18image_channel_typeE _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_get_kernelERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_has_kernelERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZN4sycl3_V16detail19kernel_bundle_plain32set_specialization_constant_implEPKcPvm @@ -3334,7 +3285,6 @@ _ZN4sycl3_V16detail21LocalAccessorBaseHost7getSizeEv _ZN4sycl3_V16detail21LocalAccessorBaseHostC1ENS0_5rangeILi3EEEiiRKNS0_13property_listE _ZN4sycl3_V16detail21LocalAccessorBaseHostC2ENS0_5rangeILi3EEEiiRKNS0_13property_listE _ZN4sycl3_V16detail22addHostAccessorAndWaitEPNS1_16AccessorImplHostE -_ZN4sycl3_V16detail22getImageNumberChannelsENS0_19image_channel_orderE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateERKSt8functionIFbRKSt10shared_ptrINS1_17device_image_implEEEE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EERKS5_INS0_9kernel_idESaISB_EENS0_12bundle_stateE @@ -3726,11 +3676,6 @@ _ZNK4sycl3_V15queue9getNativeERi _ZNK4sycl3_V16ONEAPI15filter_selector13select_deviceEv _ZNK4sycl3_V16ONEAPI15filter_selector5resetEv _ZNK4sycl3_V16ONEAPI15filter_selectorclERKNS0_6deviceE -_ZNK4sycl3_V16detail11SYCLMemObjT18detachMemoryObjectERKSt10shared_ptrIS2_E -_ZNK4sycl3_V16detail11SYCLMemObjT9getPluginEv -_ZNK4sycl3_V16detail11SYCLMemObjT9isInteropEv -_ZNK4sycl3_V16detail11buffer_impl15getNativeVectorENS0_7backendE -_ZNK4sycl3_V16detail11buffer_impl16addInteropObjectERSt6vectorImSaImEE _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device10gpu_slicesEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device11free_memoryEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device11pci_addressEEENT_11return_typeEv @@ -3811,8 +3756,6 @@ _ZNK4sycl3_V16detail11image_plain8get_sizeEv _ZNK4sycl3_V16detail11image_plain9get_countEv _ZNK4sycl3_V16detail11image_plain9get_pitchEv _ZNK4sycl3_V16detail11image_plain9get_rangeEv -_ZNK4sycl3_V16detail11stream_impl25get_work_item_buffer_sizeEv -_ZNK4sycl3_V16detail11stream_impl4sizeEv _ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v _ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi8property5queue12priority_lowEEET_v _ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi8property5queue13priority_highEEET_v @@ -3851,9 +3794,6 @@ _ZNK4sycl3_V16detail12buffer_plain13handleReleaseEv _ZNK4sycl3_V16detail12buffer_plain15getNativeVectorENS0_7backendE _ZNK4sycl3_V16detail12buffer_plain22get_allocator_internalEv _ZNK4sycl3_V16detail12buffer_plain7getSizeEv -_ZNK4sycl3_V16detail12sampler_impl18get_filtering_modeEv -_ZNK4sycl3_V16detail12sampler_impl19get_addressing_modeEv -_ZNK4sycl3_V16detail12sampler_impl33get_coordinate_normalization_modeEv _ZNK4sycl3_V16detail16AccessorBaseHost11getElemSizeEv _ZNK4sycl3_V16detail16AccessorBaseHost11getPropListEv _ZNK4sycl3_V16detail16AccessorBaseHost13isPlaceholderEv From f4c59ca694fe444005c5ef6b7f86ff4a0c929968 Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Fri, 5 Jul 2024 09:20:11 +0100 Subject: [PATCH 24/26] Fixed EnqueueFunctionsEvents unittest --- .../Extensions/EnqueueFunctionsEvents.cpp | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp index 842e3cf271216..10c86342b3a4c 100644 --- a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp +++ b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp @@ -61,12 +61,12 @@ inline pi_result redefined_piextUSMEnqueueMemcpy(pi_queue, pi_bool, void *, return PI_SUCCESS; } -thread_local size_t counter_piextUSMEnqueueMemset = 0; -inline pi_result redefined_piextUSMEnqueueMemset(pi_queue, void *, pi_int32, - size_t, pi_uint32, - const pi_event *, - pi_event *event) { - ++counter_piextUSMEnqueueMemset; +thread_local size_t counter_piextUSMEnqueueFill = 0; +inline pi_result redefined_piextUSMEnqueueFill(pi_queue, void *, const void *, + size_t, size_t, pi_uint32, + const pi_event *, + pi_event *event) { + ++counter_piextUSMEnqueueFill; EXPECT_EQ(event, nullptr); return PI_SUCCESS; } @@ -112,7 +112,7 @@ class EnqueueFunctionsEventsTests : public ::testing::Test { void SetUp() override { counter_piEnqueueKernelLaunch = 0; counter_piextUSMEnqueueMemcpy = 0; - counter_piextUSMEnqueueMemset = 0; + counter_piextUSMEnqueueFill = 0; counter_piextUSMEnqueuePrefetch = 0; counter_piextUSMEnqueueMemAdvise = 0; counter_piEnqueueEventsWaitWithBarrier = 0; @@ -362,8 +362,8 @@ TEST_F(EnqueueFunctionsEventsTests, CopyShortcutNoEvent) { } TEST_F(EnqueueFunctionsEventsTests, SubmitMemsetNoEvent) { - Mock.redefine( - redefined_piextUSMEnqueueMemset); + Mock.redefine( + redefined_piextUSMEnqueueFill); constexpr size_t N = 1024; int *Dst = malloc_shared(N, Q); @@ -372,21 +372,21 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitMemsetNoEvent) { oneapiext::memset(CGH, Dst, int{1}, sizeof(int) * N); }); - ASSERT_EQ(counter_piextUSMEnqueueMemset, size_t{1}); + ASSERT_EQ(counter_piextUSMEnqueueFill, size_t{1}); free(Dst, Q); } TEST_F(EnqueueFunctionsEventsTests, MemsetShortcutNoEvent) { - Mock.redefine( - redefined_piextUSMEnqueueMemset); + Mock.redefine( + redefined_piextUSMEnqueueFill); constexpr size_t N = 1024; int *Dst = malloc_shared(N, Q); oneapiext::memset(Q, Dst, 1, sizeof(int) * N); - ASSERT_EQ(counter_piextUSMEnqueueMemset, size_t{1}); + ASSERT_EQ(counter_piextUSMEnqueueFill, size_t{1}); free(Dst, Q); } From 648d3b95f965bf2475373466f205b7e494fa4178 Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Fri, 5 Jul 2024 10:04:18 +0100 Subject: [PATCH 25/26] Changed char to unsigned char --- sycl/include/sycl/detail/cg.hpp | 20 ++++++++++---------- sycl/include/sycl/handler.hpp | 2 +- sycl/source/detail/memory_manager.cpp | 14 +++++++------- sycl/source/detail/memory_manager.hpp | 15 ++++++++------- sycl/source/detail/queue_impl.cpp | 2 +- sycl/source/handler.cpp | 2 +- sycl/test/abi/layout_handler.cpp | 12 +++++------- 7 files changed, 33 insertions(+), 34 deletions(-) diff --git a/sycl/include/sycl/detail/cg.hpp b/sycl/include/sycl/detail/cg.hpp index 5bb7ab5a78dfe..c1fdc6857b4a0 100644 --- a/sycl/include/sycl/detail/cg.hpp +++ b/sycl/include/sycl/detail/cg.hpp @@ -147,7 +147,7 @@ class CG { getAuxiliaryResources() const { return {}; } - virtual void clearAuxiliaryResources(){}; + virtual void clearAuxiliaryResources() {}; virtual ~CG() = default; @@ -247,11 +247,11 @@ class CGCopy : public CG { /// "Fill memory" command group class. class CGFill : public CG { public: - std::vector MPattern; + std::vector MPattern; AccessorImplHost *MPtr; - CGFill(std::vector Pattern, void *Ptr, CG::StorageInitHelper CGData, - detail::code_location loc = {}) + CGFill(std::vector Pattern, void *Ptr, + CG::StorageInitHelper CGData, detail::code_location loc = {}) : CG(Fill, std::move(CGData), std::move(loc)), MPattern(std::move(Pattern)), MPtr((AccessorImplHost *)Ptr) {} AccessorImplHost *getReqToFill() { return MPtr; } @@ -289,18 +289,18 @@ class CGCopyUSM : public CG { /// "Fill USM" command group class. class CGFillUSM : public CG { - std::vector MPattern; + std::vector MPattern; void *MDst; size_t MLength; public: - CGFillUSM(std::vector Pattern, void *DstPtr, size_t Length, + CGFillUSM(std::vector Pattern, void *DstPtr, size_t Length, CG::StorageInitHelper CGData, detail::code_location loc = {}) : CG(FillUSM, std::move(CGData), std::move(loc)), MPattern(std::move(Pattern)), MDst(DstPtr), MLength(Length) {} void *getDst() { return MDst; } size_t getLength() { return MLength; } - const std::vector &getPattern() { return MPattern; } + const std::vector &getPattern() { return MPattern; } }; /// "Prefetch USM" command group class. @@ -378,14 +378,14 @@ class CGCopy2DUSM : public CG { /// "Fill 2D USM" command group class. class CGFill2DUSM : public CG { - std::vector MPattern; + std::vector MPattern; void *MDst; size_t MPitch; size_t MWidth; size_t MHeight; public: - CGFill2DUSM(std::vector Pattern, void *DstPtr, size_t Pitch, + CGFill2DUSM(std::vector Pattern, void *DstPtr, size_t Pitch, size_t Width, size_t Height, CG::StorageInitHelper CGData, detail::code_location loc = {}) : CG(Fill2DUSM, std::move(CGData), std::move(loc)), @@ -395,7 +395,7 @@ class CGFill2DUSM : public CG { size_t getPitch() const { return MPitch; } size_t getWidth() const { return MWidth; } size_t getHeight() const { return MHeight; } - const std::vector &getPattern() const { return MPattern; } + const std::vector &getPattern() const { return MPattern; } }; /// "Memset 2D USM" command group class. diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 08a61fbd08e06..a26da04193e3b 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -3301,7 +3301,7 @@ class __SYCL_EXPORT handler { /// Length to copy or fill (for USM operations). size_t MLength = 0; /// Pattern that is used to fill memory object in case command type is fill. - std::vector MPattern; + std::vector MPattern; /// Storage for a lambda or function object. std::unique_ptr MHostKernel; /// Storage for lambda/function when using HostTask diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 70ccbd06b2e08..8a6f027dd8e27 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -807,7 +807,7 @@ void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, } void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, - size_t PatternSize, const char *Pattern, + size_t PatternSize, const unsigned char *Pattern, unsigned int Dim, sycl::range<3> MemRange, sycl::range<3> AccRange, sycl::id<3> Offset, unsigned int ElementSize, @@ -951,7 +951,7 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, } void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, - const std::vector &Pattern, + const std::vector &Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { @@ -1082,7 +1082,7 @@ void MemoryManager::copy_2d_usm( void MemoryManager::fill_2d_usm( void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, - const std::vector &Pattern, + const std::vector &Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { @@ -1570,7 +1570,7 @@ void MemoryManager::ext_oneapi_copy_usm_cmd_buffer( void MemoryManager::ext_oneapi_fill_usm_cmd_buffer( sycl::detail::ContextImplPtr Context, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem, - size_t Len, const std::vector &Pattern, + size_t Len, const std::vector &Pattern, std::vector Deps, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { @@ -1588,9 +1588,9 @@ void MemoryManager::ext_oneapi_fill_usm_cmd_buffer( void MemoryManager::ext_oneapi_fill_cmd_buffer( sycl::detail::ContextImplPtr Context, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj, - void *Mem, size_t PatternSize, const char *Pattern, unsigned int Dim, - sycl::range<3> Size, sycl::range<3> AccessRange, sycl::id<3> AccessOffset, - unsigned int ElementSize, + void *Mem, size_t PatternSize, const unsigned char *Pattern, + unsigned int Dim, sycl::range<3> Size, sycl::range<3> AccessRange, + sycl::id<3> AccessOffset, unsigned int ElementSize, std::vector Deps, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { assert(SYCLMemObj && "The SYCLMemObj is nullptr"); diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp index 0631a8f6f90fd..793d3b2c48252 100644 --- a/sycl/source/detail/memory_manager.hpp +++ b/sycl/source/detail/memory_manager.hpp @@ -120,9 +120,10 @@ class MemoryManager { const detail::EventImplPtr &OutEventImpl); static void fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, - size_t PatternSize, const char *Pattern, unsigned int Dim, - sycl::range<3> Size, sycl::range<3> AccessRange, - sycl::id<3> AccessOffset, unsigned int ElementSize, + size_t PatternSize, const unsigned char *Pattern, + unsigned int Dim, sycl::range<3> Size, + sycl::range<3> AccessRange, sycl::id<3> AccessOffset, + unsigned int ElementSize, std::vector DepEvents, sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl); @@ -146,7 +147,7 @@ class MemoryManager { const detail::EventImplPtr &OutEventImpl); static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, - const std::vector &Pattern, + const std::vector &Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); @@ -171,7 +172,7 @@ class MemoryManager { static void fill_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, - const std::vector &Pattern, + const std::vector &Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); @@ -241,7 +242,7 @@ class MemoryManager { static void ext_oneapi_fill_usm_cmd_buffer( sycl::detail::ContextImplPtr Context, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem, - size_t Len, const std::vector &Pattern, + size_t Len, const std::vector &Pattern, std::vector Deps, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); @@ -249,7 +250,7 @@ class MemoryManager { ext_oneapi_fill_cmd_buffer(sycl::detail::ContextImplPtr Context, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, SYCLMemObjI *SYCLMemObj, void *Mem, - size_t PatternSize, const char *Pattern, + size_t PatternSize, const unsigned char *Pattern, unsigned int Dim, sycl::range<3> Size, sycl::range<3> AccessRange, sycl::id<3> AccessOffset, unsigned int ElementSize, diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 9e8bd1f90f954..4adb0f2dccac1 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -172,7 +172,7 @@ event queue_impl::memset(const std::shared_ptr &Self, // Emit a begin/end scope for this call PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin); #endif - const std::vector Pattern{static_cast(Value)}; + const std::vector Pattern{static_cast(Value)}; return submitMemOpHelper( Self, DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.memset(Ptr, Value, Count); }, diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index c9713a8d32fe6..7dae7fdbf5726 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -995,7 +995,7 @@ void handler::ext_oneapi_memset2d_impl(void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height) { // Checks done in callers. MDstPtr = Dest; - MPattern.push_back(static_cast(Value)); + MPattern.push_back(static_cast(Value)); MImpl->MDstPitch = DestPitch; MImpl->MWidth = Width; MImpl->MHeight = Height; diff --git a/sycl/test/abi/layout_handler.cpp b/sycl/test/abi/layout_handler.cpp index 710f6c4991782..cc073064c724c 100644 --- a/sycl/test/abi/layout_handler.cpp +++ b/sycl/test/abi/layout_handler.cpp @@ -7,9 +7,7 @@ void foo() { sycl::queue Q; - Q.submit([](sycl::handler &CGH) { - CGH.single_task([]() {}); - }); + Q.submit([](sycl::handler &CGH) { CGH.single_task([]() {}); }); } // clang-format off @@ -118,10 +116,10 @@ void foo() { // CHECK-NEXT: 384 | void * MSrcPtr // CHECK-NEXT: 392 | void * MDstPtr // CHECK-NEXT: 400 | size_t MLength -// CHECK-NEXT: 408 | class std::vector MPattern -// CHECK-NEXT: 408 | struct std::_Vector_base > (base) -// CHECK-NEXT: 408 | struct std::_Vector_base >::_Vector_impl _M_impl -// CHECK-NEXT: 408 | class std::allocator (base) (empty) +// CHECK-NEXT: 408 | class std::vector MPattern +// CHECK-NEXT: 408 | struct std::_Vector_base > (base) +// CHECK-NEXT: 408 | struct std::_Vector_base >::_Vector_impl _M_impl +// CHECK-NEXT: 408 | class std::allocator (base) (empty) // CHECK: 408 | pointer _M_start // CHECK-NEXT: 416 | pointer _M_finish // CHECK-NEXT: 424 | pointer _M_end_of_storage From e2a9aa0b7cf1a96cea632da2194bf696ae06f6cf Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Fri, 5 Jul 2024 10:28:41 +0100 Subject: [PATCH 26/26] Updated symbols windows --- sycl/test/abi/sycl_symbols_windows.dump | 3 --- 1 file changed, 3 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 3628d543d2b8a..e36316424b2e0 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -4068,10 +4068,7 @@ ?ext_oneapi_wait_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@_KV423@AEBUcode_location@detail@23@@Z ?extractArgsAndReqs@handler@_V1@sycl@@AEAAXXZ ?extractArgsAndReqsFromLambda@handler@_V1@sycl@@AEAAXPEAD_KPEBUkernel_param_desc_t@detail@23@_N@Z -?fill@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KPEBDIV?$range@$02@34@5V?$id@$02@34@IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@7@@Z -?fill_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22AEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?fill_impl@handler@_V1@sycl@@AEAAXPEAXPEBX_K2@Z -?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KAEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?finalize@handler@_V1@sycl@@AEAA?AVevent@23@XZ ?finalize@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$command_graph@$00@34567@AEBVproperty_list@67@@Z ?finalizeImpl@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXXZ