From 123fc506e468875505073f0c9ab960dbd975189c Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Mon, 6 May 2024 15:59:25 -0700 Subject: [PATCH] Revert "[SYCL] Make queue fill use native functions (#12702)" This reverts commit 46e49ecb70f84ee9f6880d6e89d40657787b3189. --- sycl/doc/design/CommandGraph.md | 2 -- sycl/include/sycl/detail/cg.hpp | 2 +- sycl/include/sycl/detail/pi.def | 2 +- sycl/include/sycl/detail/pi.h | 27 +++++++------- sycl/include/sycl/handler.hpp | 10 +++--- sycl/plugins/cuda/pi_cuda.cpp | 12 +++---- sycl/plugins/hip/pi_hip.cpp | 12 +++---- sycl/plugins/level_zero/pi_level_zero.cpp | 23 ++++++------ sycl/plugins/native_cpu/pi_native_cpu.cpp | 12 +++---- sycl/plugins/opencl/pi_opencl.cpp | 12 +++---- sycl/plugins/unified_runtime/pi2ur.hpp | 14 ++++---- .../unified_runtime/pi_unified_runtime.cpp | 36 +++++++++---------- sycl/source/detail/graph_impl.hpp | 6 ++-- sycl/source/detail/memory_manager.cpp | 34 ++++++------------ sycl/source/detail/memory_manager.hpp | 9 +---- sycl/source/detail/queue_impl.cpp | 4 +-- sycl/source/detail/scheduler/commands.cpp | 4 +-- sycl/source/handler.cpp | 11 ------ .../DiscardEvents/discard_events_usm.cpp | 14 ++++---- .../discard_events_usm_ooo_queue.cpp | 14 ++++---- sycl/test-e2e/Graph/Explicit/usm_fill.cpp | 2 -- .../test-e2e/Graph/Explicit/usm_fill_host.cpp | 3 -- .../Graph/Explicit/usm_fill_shared.cpp | 3 -- sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp | 2 -- .../Graph/RecordReplay/usm_fill_host.cpp | 3 -- .../Graph/RecordReplay/usm_fill_shared.cpp | 3 -- .../RecordReplay/usm_memset_shortcut.cpp | 2 -- sycl/test/abi/pi_cuda_symbol_check.dump | 2 +- sycl/test/abi/pi_hip_symbol_check.dump | 2 +- sycl/test/abi/pi_level_zero_symbol_check.dump | 2 +- sycl/test/abi/pi_nativecpu_symbol_check.dump | 2 +- sycl/test/abi/pi_opencl_symbol_check.dump | 2 +- sycl/test/abi/sycl_symbols_linux.dump | 4 +-- sycl/test/abi/sycl_symbols_windows.dump | 4 +-- sycl/tools/xpti_helpers/usm_analyzer.hpp | 14 ++++---- sycl/unittests/SYCL2020/GetNativeOpenCL.cpp | 8 ++--- sycl/unittests/helpers/PiMockPlugin.hpp | 11 +++--- sycl/unittests/queue/USM.cpp | 6 ++-- sycl/unittests/queue/Wait.cpp | 5 ++- .../scheduler/InOrderQueueHostTaskDeps.cpp | 7 ++-- .../unittests/xpti_trace/QueueApiFailures.cpp | 20 +++++------ 41 files changed, 156 insertions(+), 211 deletions(-) diff --git a/sycl/doc/design/CommandGraph.md b/sycl/doc/design/CommandGraph.md index b0fe95f4e0c0c..9519067a00484 100644 --- a/sycl/doc/design/CommandGraph.md +++ b/sycl/doc/design/CommandGraph.md @@ -519,8 +519,6 @@ The types of commands which are unsupported, and lead to this exception are: This corresponds to a memory buffer write command. * `handler::copy(src, dest)` or `handler::memcpy(dest, src)` - Where both `src` and `dest` are USM pointers. This corresponds to a USM copy command. -* `handler::fill(ptr, pattern, count)` - This corresponds to a USM memory - fill command. * `handler::memset(ptr, value, numBytes)` - This corresponds to a USM memory fill command. * `handler::prefetch()`. diff --git a/sycl/include/sycl/detail/cg.hpp b/sycl/include/sycl/detail/cg.hpp index 08edae99c506f..5c2ffdc712c00 100644 --- a/sycl/include/sycl/detail/cg.hpp +++ b/sycl/include/sycl/detail/cg.hpp @@ -299,7 +299,7 @@ class CGFillUSM : public CG { MPattern(std::move(Pattern)), MDst(DstPtr), MLength(Length) {} void *getDst() { return MDst; } size_t getLength() { return MLength; } - const std::vector &getPattern() { return MPattern; } + int getFill() { return MPattern[0]; } }; /// "Prefetch USM" command group class. diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def index b49f43d3d6ce6..195beb0a24861 100644 --- a/sycl/include/sycl/detail/pi.def +++ b/sycl/include/sycl/detail/pi.def @@ -128,7 +128,7 @@ _PI_API(piextUSMHostAlloc) _PI_API(piextUSMDeviceAlloc) _PI_API(piextUSMSharedAlloc) _PI_API(piextUSMFree) -_PI_API(piextUSMEnqueueFill) +_PI_API(piextUSMEnqueueMemset) _PI_API(piextUSMEnqueueMemcpy) _PI_API(piextUSMEnqueuePrefetch) _PI_API(piextUSMEnqueueMemAdvise) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 2a3b832d2df18..204fb56b67c15 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -172,10 +172,9 @@ // - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D // - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM // - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D -// 16.51 Replaced piextUSMEnqueueMemset with piextUSMEnqueueFill -#define _PI_H_VERSION_MAJOR 16 -#define _PI_H_VERSION_MINOR 51 +#define _PI_H_VERSION_MAJOR 15 +#define _PI_H_VERSION_MINOR 50 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -2061,22 +2060,22 @@ __SYCL_EXPORT pi_result piextUSMPitchedAlloc( /// \param ptr is the memory to be freed __SYCL_EXPORT pi_result piextUSMFree(pi_context context, void *ptr); -/// USM Fill API +/// USM Memset API /// /// \param queue is the queue to submit to -/// \param ptr is the ptr to fill -/// \param pattern is the ptr with the bytes of the pattern to set -/// \param patternSize is the size in bytes of the pattern to set -/// \param count is the size in bytes to fill +/// \param ptr is the ptr to memset +/// \param value is value to set. It is interpreted as an 8-bit value and the +/// upper +/// 24 bits are ignored +/// \param count is the size in bytes to memset /// \param num_events_in_waitlist is the number of events to wait on /// \param events_waitlist is an array of events to wait on /// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueFill(pi_queue queue, void *ptr, - const void *pattern, - size_t patternSize, size_t count, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event); +__SYCL_EXPORT pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, + pi_int32 value, size_t count, + pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, + pi_event *event); /// USM Memcpy API /// diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 833b26c336b72..1734416c2a331 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -2870,9 +2870,14 @@ class __SYCL_EXPORT handler { /// device copyable. /// \param Count is the number of times to fill Pattern into Ptr. template void fill(void *Ptr, const T &Pattern, size_t Count) { + throwIfActionIsCreated(); + setUserFacingNodeType(ext::oneapi::experimental::node_type::memfill); static_assert(is_device_copyable::value, "Pattern must be device copyable"); - this->fill_impl(Ptr, &Pattern, sizeof(T), Count); + parallel_for<__usmfill>(range<1>(Count), [=](id<1> Index) { + T *CastedPtr = static_cast(Ptr); + CastedPtr[Index] = Pattern; + }); } /// Prevents any commands submitted afterward to this queue from executing @@ -3572,9 +3577,6 @@ class __SYCL_EXPORT handler { }); } - // Implementation of USM fill using command for native fill. - void fill_impl(void *Dest, const void *Value, size_t ValueSize, size_t Count); - // Implementation of ext_oneapi_memcpy2d using command for native 2D memcpy. void ext_oneapi_memcpy2d_impl(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height); diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index af2a70cd30158..8bf4eea26620c 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -902,12 +902,12 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, - size_t PatternSize, size_t Count, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, pi_event *Event) { - return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, - NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, + size_t Count, pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, + pi_event *Event) { + return pi2ur::piextUSMEnqueueMemset( + Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index d52069eaa5ff7..609750a4892b7 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -905,12 +905,12 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, - size_t PatternSize, size_t Count, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, pi_event *Event) { - return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, - NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, + size_t Count, pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, + pi_event *Event) { + return pi2ur::piextUSMEnqueueMemset( + Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index a65fa3beab0fe..8e6224ba5794a 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -929,22 +929,23 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -/// USM Fill API +/// USM Memset API /// /// @param Queue is the queue to submit to -/// @param Ptr is the ptr to fill -/// \param Pattern is the ptr with the bytes of the pattern to set -/// \param PatternSize is the size in bytes of the pattern to set -/// @param Count is the size in bytes to fill +/// @param Ptr is the ptr to memset +/// @param Value is value to set. It is interpreted as an 8-bit value and the +/// upper +/// 24 bits are ignored +/// @param Count is the size in bytes to memset /// @param NumEventsInWaitlist is the number of events to wait on /// @param EventsWaitlist is an array of events to wait on /// @param Event is the event that represents this operation -pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, - size_t PatternSize, size_t Count, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, pi_event *Event) { - return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, - NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, + size_t Count, pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, + pi_event *Event) { + return pi2ur::piextUSMEnqueueMemset( + Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/native_cpu/pi_native_cpu.cpp b/sycl/plugins/native_cpu/pi_native_cpu.cpp index f71833a30134a..35f17a5316bac 100644 --- a/sycl/plugins/native_cpu/pi_native_cpu.cpp +++ b/sycl/plugins/native_cpu/pi_native_cpu.cpp @@ -905,12 +905,12 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, - size_t PatternSize, size_t Count, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, pi_event *Event) { - return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, - NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, + size_t Count, pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, + pi_event *Event) { + return pi2ur::piextUSMEnqueueMemset( + Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 479f888035b22..9441e29804021 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -864,12 +864,12 @@ pi_result piextKernelSetArgPointer(pi_kernel Kernel, pi_uint32 ArgIndex, return pi2ur::piextKernelSetArgPointer(Kernel, ArgIndex, ArgSize, ArgValue); } -pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, const void *Pattern, - size_t PatternSize, size_t Count, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, pi_event *Event) { - return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, - NumEventsInWaitlist, EventsWaitlist, Event); +pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, + size_t Count, pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, + pi_event *Event) { + return pi2ur::piextUSMEnqueueMemset( + Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); } pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index afd574455cffd..889518aa76dba 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -3889,12 +3889,11 @@ inline pi_result piEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, return PI_SUCCESS; } -inline pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, - const void *Pattern, size_t PatternSize, - size_t Count, - pi_uint32 NumEventsInWaitList, - const pi_event *EventsWaitList, - pi_event *OutEvent) { +inline pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, + pi_int32 Value, size_t Count, + pi_uint32 NumEventsInWaitList, + const pi_event *EventsWaitList, + pi_event *OutEvent) { PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); if (!Ptr) { return PI_ERROR_INVALID_VALUE; @@ -3906,7 +3905,8 @@ inline pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, ur_event_handle_t *UREvent = reinterpret_cast(OutEvent); - HANDLE_ERRORS(urEnqueueUSMFill(UrQueue, Ptr, PatternSize, Pattern, Count, + size_t PatternSize = 1; + HANDLE_ERRORS(urEnqueueUSMFill(UrQueue, Ptr, PatternSize, &Value, Count, NumEventsInWaitList, UrEventsWaitList, UREvent)); diff --git a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp index c7a75fa3845a4..0f42f21d39093 100644 --- a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp +++ b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp @@ -437,24 +437,24 @@ __SYCL_EXPORT pi_result piQueueGetInfo(pi_queue Queue, pi_queue_info ParamName, ParamValueSizeRet); } -/// USM Fill API +/// USM Memset API /// -/// \param queue is the queue to submit to -/// \param ptr is the ptr to fill -/// \param pattern is the ptr with the bytes of the pattern to set -/// \param patternSize is the size in bytes of the pattern to set -/// \param count is the size in bytes to fill -/// \param num_events_in_waitlist is the number of events to wait on -/// \param events_waitlist is an array of events to wait on -/// \param event is the event that represents this operation -__SYCL_EXPORT pi_result piextUSMEnqueueFill(pi_queue Queue, void *Ptr, - const void *Pattern, - size_t PatternSize, size_t Count, - pi_uint32 NumEventsInWaitlist, - const pi_event *EventsWaitlist, - pi_event *Event) { - return pi2ur::piextUSMEnqueueFill(Queue, Ptr, Pattern, PatternSize, Count, - NumEventsInWaitlist, EventsWaitlist, Event); +/// @param Queue is the queue to submit to +/// @param Ptr is the ptr to memset +/// @param Value is value to set. It is interpreted as an 8-bit value and the +/// upper +/// 24 bits are ignored +/// @param Count is the size in bytes to memset +/// @param NumEventsInWaitlist is the number of events to wait on +/// @param EventsWaitlist is an array of events to wait on +/// @param Event is the event that represents this operation +__SYCL_EXPORT pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, + pi_int32 Value, size_t Count, + pi_uint32 NumEventsInWaitlist, + const pi_event *EventsWaitlist, + pi_event *Event) { + return pi2ur::piextUSMEnqueueMemset( + Queue, Ptr, Value, Count, NumEventsInWaitlist, EventsWaitlist, Event); } __SYCL_EXPORT pi_result piEnqueueMemBufferCopyRect( @@ -1490,7 +1490,7 @@ __SYCL_EXPORT pi_result piPluginInit(pi_plugin *PluginInit) { _PI_API(piEnqueueMemBufferMap) _PI_API(piEnqueueMemUnmap) _PI_API(piEnqueueMemBufferFill) - _PI_API(piextUSMEnqueueFill) + _PI_API(piextUSMEnqueueMemset) _PI_API(piEnqueueMemBufferCopyRect) _PI_API(piEnqueueMemBufferCopy) _PI_API(piextUSMEnqueueMemcpy) diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index a1057517c9121..fdd3dcbea2a85 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -684,10 +684,8 @@ class node_impl { sycl::detail::CGFillUSM *FillUSM = static_cast(MCommandGroup.get()); Stream << "Dst: " << FillUSM->getDst() - << " Length: " << FillUSM->getLength() << " Pattern: "; - for (auto byte : FillUSM->getPattern()) - Stream << byte; - Stream << "\\n"; + << " Length: " << FillUSM->getLength() + << " Pattern: " << FillUSM->getFill() << "\\n"; } break; case sycl::detail::CG::CGTYPE::PrefetchUSM: diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index ef09a809e025e..840f95ea7a643 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -947,7 +947,7 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { assert(!SrcQueue->getContextImplPtr()->is_host() && - "Host queue not supported in copy_usm."); + "Host queue not supported in fill_usm."); if (!Len) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { @@ -983,7 +983,7 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, } void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, - const std::vector &Pattern, + int Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl) { @@ -1006,21 +1006,9 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, if (OutEventImpl != nullptr) OutEventImpl->setHostEnqueueTime(); const PluginPtr &Plugin = Queue->getPlugin(); - Plugin->call( - Queue->getHandleRef(), Mem, Pattern.data(), Pattern.size(), Length, - DepEvents.size(), DepEvents.data(), OutEvent); -} - -// TODO: This function will remain until ABI-breaking change -void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, - int Pattern, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent, - const detail::EventImplPtr &OutEventImpl) { - std::vector vecPattern(sizeof(Pattern)); - std::memcpy(vecPattern.data(), &Pattern, sizeof(Pattern)); - MemoryManager::fill_usm(Mem, Queue, Length, vecPattern, DepEvents, OutEvent, - OutEventImpl); + Plugin->call( + Queue->getHandleRef(), Mem, Pattern, Length, DepEvents.size(), + DepEvents.data(), OutEvent); } // TODO: This function will remain until ABI-breaking change @@ -1028,9 +1016,7 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, int Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent) { - std::vector vecPattern(sizeof(Pattern)); - std::memcpy(vecPattern.data(), &Pattern, sizeof(Pattern)); - MemoryManager::fill_usm(Mem, Queue, Length, vecPattern, DepEvents, OutEvent, + MemoryManager::fill_usm(Mem, Queue, Length, Pattern, DepEvents, OutEvent, nullptr); // OutEventImpl); } @@ -1696,8 +1682,7 @@ void MemoryManager::ext_oneapi_copy_usm_cmd_buffer( void MemoryManager::ext_oneapi_fill_usm_cmd_buffer( sycl::detail::ContextImplPtr Context, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem, - size_t Len, const std::vector &Pattern, - std::vector Deps, + size_t Len, int Pattern, std::vector Deps, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) { if (!DstMem) @@ -1705,9 +1690,10 @@ void MemoryManager::ext_oneapi_fill_usm_cmd_buffer( PI_ERROR_INVALID_VALUE); const PluginPtr &Plugin = Context->getPlugin(); - + // Pattern is interpreted as an unsigned char so pattern size is always 1. + size_t PatternSize = 1; Plugin->call( - CommandBuffer, DstMem, Pattern.data(), Pattern.size(), Len, Deps.size(), + CommandBuffer, DstMem, &Pattern, PatternSize, Len, Deps.size(), Deps.data(), OutSyncPoint); } diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp index 0636fe6f3d1c0..1d2800bf9dadc 100644 --- a/sycl/source/detail/memory_manager.hpp +++ b/sycl/source/detail/memory_manager.hpp @@ -171,13 +171,6 @@ class __SYCL_EXPORT MemoryManager { std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent); - static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, - const std::vector &Pattern, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent, - const detail::EventImplPtr &OutEventImpl); - - // TODO: This function will remain until ABI-breaking change static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, int Pattern, std::vector DepEvents, @@ -326,7 +319,7 @@ class __SYCL_EXPORT MemoryManager { static void ext_oneapi_fill_usm_cmd_buffer( sycl::detail::ContextImplPtr Context, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *DstMem, - size_t Len, const std::vector &Pattern, + size_t Len, int Pattern, std::vector Deps, sycl::detail::pi::PiExtSyncPoint *OutSyncPoint); diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index d3adabe185802..5592619b2316b 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -157,11 +157,11 @@ event queue_impl::memset(const std::shared_ptr &Self, // Emit a begin/end scope for this call PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin); #endif - const std::vector Pattern{static_cast(Value)}; + return submitMemOpHelper( Self, DepEvents, [&](handler &CGH) { CGH.memset(Ptr, Value, Count); }, [](const auto &...Args) { MemoryManager::fill_usm(Args...); }, Ptr, Self, - Count, Pattern); + Count, Value); } void report(const code_location &CodeLoc) { diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index a335a38e810ae..3a48fd50f8259 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2851,7 +2851,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get(); MemoryManager::ext_oneapi_fill_usm_cmd_buffer( MQueue->getContextImplPtr(), MCommandBuffer, Fill->getDst(), - Fill->getLength(), Fill->getPattern(), std::move(MSyncPointDeps), + Fill->getLength(), Fill->getFill(), std::move(MSyncPointDeps), &OutSyncPoint); MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; @@ -3045,7 +3045,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { case CG::CGTYPE::FillUSM: { CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get(); MemoryManager::fill_usm(Fill->getDst(), MQueue, Fill->getLength(), - Fill->getPattern(), std::move(RawEvents), Event, + Fill->getFill(), std::move(RawEvents), Event, MEvent); return PI_SUCCESS; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 663ab844cc517..3d81881aa6ac6 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -976,17 +976,6 @@ void handler::mem_advise(const void *Ptr, size_t Count, int Advice) { setType(detail::CG::AdviseUSM); } -void handler::fill_impl(void *Dest, const void *Value, size_t ValueSize, - size_t Count) { - throwIfActionIsCreated(); - MDstPtr = Dest; - MPattern.resize(ValueSize); - std::memcpy(MPattern.data(), Value, ValueSize); - MLength = Count * ValueSize; - setUserFacingNodeType(ext::oneapi::experimental::node_type::memfill); - setType(detail::CG::FillUSM); -} - void handler::ext_oneapi_memcpy2d_impl(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height) { diff --git a/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp b/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp index 269706b10321d..48ab65c68896c 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_usm.cpp @@ -12,7 +12,7 @@ // Since it is a warning it is safe to ignore for this test. // // Everything that follows TestQueueOperations() -// CHECK: ---> piextUSMEnqueueFill( +// CHECK: ---> piextUSMEnqueueMemset( // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // @@ -20,7 +20,8 @@ // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // -// CHECK: ---> piextUSMEnqueueFill( +// Q.fill don't use piEnqueueMemBufferFill +// CHECK: ---> piEnqueueKernelLaunch( // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // @@ -46,7 +47,7 @@ // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // // RegularQueue -// CHECK: ---> piextUSMEnqueueFill( +// CHECK: ---> piextUSMEnqueueMemset( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -56,7 +57,7 @@ // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // // Everything that follows TestQueueOperationsViaSubmit() -// CHECK: ---> piextUSMEnqueueFill( +// CHECK: ---> piextUSMEnqueueMemset( // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // @@ -64,7 +65,8 @@ // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // -// CHECK: ---> piextUSMEnqueueFill( +// Q.fill don't use piEnqueueMemBufferFill +// CHECK: ---> piEnqueueKernelLaunch( // CHECK: pi_event * : // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // @@ -90,7 +92,7 @@ // CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ] // // RegularQueue -// CHECK: ---> piextUSMEnqueueFill( +// CHECK: ---> piextUSMEnqueueMemset( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS diff --git a/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp b/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp index fb7555bab9d9f..96d53a632beb6 100644 --- a/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp +++ b/sycl/test-e2e/DiscardEvents/discard_events_usm_ooo_queue.cpp @@ -12,7 +12,7 @@ // Since it is a warning it is safe to ignore for this test. // // Everything that follows TestQueueOperations() -// CHECK: ---> piextUSMEnqueueFill( +// CHECK: ---> piextUSMEnqueueMemset( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -22,7 +22,8 @@ // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS // -// CHECK: ---> piextUSMEnqueueFill( +// Q.fill don't use piEnqueueMemBufferFill +// CHECK: ---> piEnqueueKernelLaunch( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -57,7 +58,7 @@ // CHECK: ---> pi_result : PI_SUCCESS // // RegularQueue -// CHECK: ---> piextUSMEnqueueFill( +// CHECK: ---> piextUSMEnqueueMemset( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -68,7 +69,7 @@ // CHECK: ---> pi_result : PI_SUCCESS // // Everything that follows TestQueueOperationsViaSubmit() -// CHECK: ---> piextUSMEnqueueFill( +// CHECK: ---> piextUSMEnqueueMemset( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -78,7 +79,8 @@ // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS // -// CHECK: ---> piextUSMEnqueueFill( +// Q.fill don't use piEnqueueMemBufferFill +// CHECK: ---> piEnqueueKernelLaunch( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS @@ -113,7 +115,7 @@ // CHECK: ---> pi_result : PI_SUCCESS // // RegularQueue -// CHECK: ---> piextUSMEnqueueFill( +// CHECK: ---> piextUSMEnqueueMemset( // CHECK: pi_event * : // CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ] // CHECK: ---> pi_result : PI_SUCCESS diff --git a/sycl/test-e2e/Graph/Explicit/usm_fill.cpp b/sycl/test-e2e/Graph/Explicit/usm_fill.cpp index 26d3b88e64ff9..d2aff0dee2c20 100644 --- a/sycl/test-e2e/Graph/Explicit/usm_fill.cpp +++ b/sycl/test-e2e/Graph/Explicit/usm_fill.cpp @@ -5,8 +5,6 @@ // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // -// USM fill command not supported for OpenCL -// UNSUPPORTED: opencl #define GRAPH_E2E_EXPLICIT diff --git a/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp b/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp index e005170a5feff..7acd5143f6f54 100644 --- a/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp +++ b/sycl/test-e2e/Graph/Explicit/usm_fill_host.cpp @@ -7,9 +7,6 @@ // REQUIRES: aspect-usm_host_allocations -// USM fill command not supported for OpenCL -// UNSUPPORTED: opencl - #define GRAPH_E2E_EXPLICIT #include "../Inputs/usm_fill_host.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp b/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp index bb40a5683843c..8b8c623f48107 100644 --- a/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp +++ b/sycl/test-e2e/Graph/Explicit/usm_fill_shared.cpp @@ -7,9 +7,6 @@ // REQUIRES: aspect-usm_shared_allocations -// USM fill command not supported for OpenCL -// UNSUPPORTED: opencl - #define GRAPH_E2E_EXPLICIT #include "../Inputs/usm_fill_shared.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp index d2925683837c2..16bf82ac572ae 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_fill.cpp @@ -5,8 +5,6 @@ // Extra run to check for immediate-command-list in Level Zero // RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // -// USM fill command not supported for OpenCL -// UNSUPPORTED: opencl #define GRAPH_E2E_RECORD_REPLAY diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp index c08b70c695e4f..0e9b3dbdb39e8 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_fill_host.cpp @@ -7,9 +7,6 @@ // REQUIRES: aspect-usm_host_allocations -// USM fill command not supported for OpenCL -// UNSUPPORTED: opencl - #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/usm_fill_host.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp index f40f8d3aa4930..950afd3da8b97 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_fill_shared.cpp @@ -7,9 +7,6 @@ // REQUIRES: aspect-usm_shared_allocations -// USM fill command not supported for OpenCL -// UNSUPPORTED: opencl - #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/usm_fill_shared.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp index 84a5d42f0da04..9506e99cd73ad 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp @@ -2,8 +2,6 @@ // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{ %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} -// USM fill command not supported for OpenCL -// UNSUPPORTED: opencl // // Tests adding a USM memset queue shortcut operation as a graph node. diff --git a/sycl/test/abi/pi_cuda_symbol_check.dump b/sycl/test/abi/pi_cuda_symbol_check.dump index 462c9a6d5738b..ed3834ddfd9fe 100644 --- a/sycl/test/abi/pi_cuda_symbol_check.dump +++ b/sycl/test/abi/pi_cuda_symbol_check.dump @@ -154,11 +154,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc -piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D +piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/pi_hip_symbol_check.dump b/sycl/test/abi/pi_hip_symbol_check.dump index 336d792a52f10..327c514ebdddd 100644 --- a/sycl/test/abi/pi_hip_symbol_check.dump +++ b/sycl/test/abi/pi_hip_symbol_check.dump @@ -154,11 +154,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc -piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D +piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index b912cba8d14c3..2ebc6b56078a4 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -153,11 +153,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc -piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D +piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/pi_nativecpu_symbol_check.dump b/sycl/test/abi/pi_nativecpu_symbol_check.dump index 4be1842ff26ce..0bb2568eb3ff3 100644 --- a/sycl/test/abi/pi_nativecpu_symbol_check.dump +++ b/sycl/test/abi/pi_nativecpu_symbol_check.dump @@ -154,11 +154,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc -piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D +piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index 71d1b60b17b79..86860b50e57b6 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -153,11 +153,11 @@ piextQueueCreateWithNativeHandle piextQueueGetNativeHandle piextSignalExternalSemaphore piextUSMDeviceAlloc -piextUSMEnqueueFill piextUSMEnqueueFill2D piextUSMEnqueueMemAdvise piextUSMEnqueueMemcpy piextUSMEnqueueMemcpy2D +piextUSMEnqueueMemset piextUSMEnqueueMemset2D piextUSMEnqueuePrefetch piextUSMFree diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 2bb19a0022008..a1985cf5d841e 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3292,7 +3292,7 @@ _ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyD2D_cmd_bufferESt10shared_ptr _ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyD2H_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPvjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjPcjSC_SE_jSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyH2D_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPcjNS0_5rangeILi3EEENS0_2idILi3EEEjPvjSC_SC_SE_jSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager30ext_oneapi_copy_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEPKvP22_pi_ext_command_buffermPvSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager30ext_oneapi_fill_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmRKSt6vectorIcSaIcEES9_IjSaIjEEPj +_ZN4sycl3_V16detail13MemoryManager30ext_oneapi_fill_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmiSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager32ext_oneapi_advise_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPKvm14_pi_mem_adviceSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager34ext_oneapi_prefetch_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager3mapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEENS0_6access4modeEjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_ @@ -3305,7 +3305,6 @@ _ZN4sycl3_V16detail13MemoryManager7releaseESt10shared_ptrINS1_12context_implEEPN _ZN4sycl3_V16detail13MemoryManager8allocateESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEbPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event _ZN4sycl3_V16detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EEPSB_ _ZN4sycl3_V16detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmRKSt6vectorIcSaIcEES7_IP9_pi_eventSaISD_EEPSD_RKS4_INS1_10event_implEE _ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EEPS9_ _ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EEPS9_RKS4_INS1_10event_implEE _ZN4sycl3_V16detail13host_pipe_map3addEPKvPKc @@ -3625,7 +3624,6 @@ _ZN4sycl3_V17handler6memcpyEPvPKvm _ZN4sycl3_V17handler6memsetEPvim _ZN4sycl3_V17handler8finalizeEv _ZN4sycl3_V17handler8prefetchEPKvm -_ZN4sycl3_V17handler9fill_implEPvPKvmm _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEEb diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index f41cbd3ac062c..69473362c1985 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -4100,7 +4100,7 @@ ?ext_oneapi_enable_peer_access@device@_V1@sycl@@QEAAXAEBV123@@Z ?ext_oneapi_fill2d_impl@handler@_V1@sycl@@AEAAXPEAX_KPEBX111@Z ?ext_oneapi_fill_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAVSYCLMemObjI@234@PEAX_KPEBDIV?$range@$02@34@6V?$id@$02@34@IV?$vector@IV?$allocator@I@std@@@6@PEAI@Z -?ext_oneapi_fill_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KAEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@IV?$allocator@I@std@@@6@PEAI@Z +?ext_oneapi_fill_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KHV?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_get_composite_devices@platform@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ ?ext_oneapi_get_default_context@platform@_V1@sycl@@QEBA?AVcontext@23@XZ ?ext_oneapi_get_graph@queue@_V1@sycl@@QEBA?AV?$command_graph@$0A@@experimental@oneapi@ext@23@XZ @@ -4151,8 +4151,6 @@ ?fill@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KPEBDIV?$range@$02@34@5V?$id@$02@34@IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@7@@Z ?fill_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22AEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?fill_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22AEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z -?fill_impl@handler@_V1@sycl@@AEAAXPEAXPEBX_K2@Z -?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KAEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KHV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KHV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?finalize@handler@_V1@sycl@@AEAA?AVevent@23@XZ diff --git a/sycl/tools/xpti_helpers/usm_analyzer.hpp b/sycl/tools/xpti_helpers/usm_analyzer.hpp index 1eee3474c51ed..9cdf27dd98ddc 100644 --- a/sycl/tools/xpti_helpers/usm_analyzer.hpp +++ b/sycl/tools/xpti_helpers/usm_analyzer.hpp @@ -214,8 +214,8 @@ class USMAnalyzer { USMAnalyzer::handleUSMSharedAlloc); ArgHandlerPreCall.set_piextUSMFree(USMAnalyzer::handleUSMFree); ArgHandlerPreCall.set_piMemBufferCreate(USMAnalyzer::handleMemBufferCreate); - ArgHandlerPreCall.set_piextUSMEnqueueFill( - USMAnalyzer::handleUSMEnqueueFill); + ArgHandlerPreCall.set_piextUSMEnqueueMemset( + USMAnalyzer::handleUSMEnqueueMemset); ArgHandlerPreCall.set_piextUSMEnqueueMemcpy( USMAnalyzer::handleUSMEnqueueMemcpy); ArgHandlerPreCall.set_piextUSMEnqueuePrefetch( @@ -350,11 +350,11 @@ class USMAnalyzer { } } - static void handleUSMEnqueueFill(const pi_plugin &, std::optional, - pi_queue, void *ptr, const void *, size_t, - size_t numBytes, pi_uint32, const pi_event *, - pi_event *) { - CheckPointerValidness("input parameter", ptr, numBytes, "fill"); + static void handleUSMEnqueueMemset(const pi_plugin &, + std::optional, pi_queue, + void *ptr, pi_int32, size_t numBytes, + pi_uint32, const pi_event *, pi_event *) { + CheckPointerValidness("input parameter", ptr, numBytes, "memset"); } static void handleUSMEnqueueMemcpy(const pi_plugin &, diff --git a/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp b/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp index 3860833256048..27967973c1363 100644 --- a/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp +++ b/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp @@ -75,9 +75,9 @@ pi_result redefinedEventGetInfo(pi_event event, pi_event_info param_name, return PI_SUCCESS; } -static pi_result redefinedUSMEnqueueMemset(pi_queue, void *, const void *, - size_t, size_t, pi_uint32, - const pi_event *, pi_event *event) { +static pi_result redefinedUSMEnqueueMemset(pi_queue, void *, pi_int32, size_t, + pi_uint32, const pi_event *, + pi_event *event) { *event = reinterpret_cast(new int{}); return PI_SUCCESS; } @@ -97,7 +97,7 @@ TEST(GetNative, GetNativeHandle) { Mock.redefineBefore(redefinedMemRetain); Mock.redefineBefore( redefinedMemBufferCreate); - Mock.redefineBefore( + Mock.redefineBefore( redefinedUSMEnqueueMemset); context Context(Plt); diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index 4bb199eb748c9..a0f267bd97d50 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1201,12 +1201,11 @@ inline pi_result mock_piextUSMFree(pi_context context, void *ptr) { return PI_SUCCESS; } -inline pi_result mock_piextUSMEnqueueFill(pi_queue queue, void *ptr, - const void *pattern, - size_t patternSize, size_t count, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { +inline pi_result mock_piextUSMEnqueueMemset(pi_queue queue, void *ptr, + pi_int32 value, size_t count, + pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, + pi_event *event) { *event = createDummyHandle(); return PI_SUCCESS; } diff --git a/sycl/unittests/queue/USM.cpp b/sycl/unittests/queue/USM.cpp index 03ff8c538d7ed..30cdd2b37393e 100644 --- a/sycl/unittests/queue/USM.cpp +++ b/sycl/unittests/queue/USM.cpp @@ -48,8 +48,8 @@ pi_result redefinedUSMEnqueueMemcpyAfter(pi_queue, pi_bool, void *, return PI_SUCCESS; } -pi_result redefinedUSMEnqueueMemsetAfter(pi_queue, void *, const void *, size_t, - size_t, pi_uint32, const pi_event *, +pi_result redefinedUSMEnqueueMemsetAfter(pi_queue, void *, pi_int32, size_t, + pi_uint32, const pi_event *, pi_event *Event) { // Set MEMSET to the event produced by the original USMEnqueueMemcpy MEMSET = *Event; @@ -64,7 +64,7 @@ TEST(USM, NoOpPreservesDependencyChain) { redefinedEnqueueEventsWaitAfter); Mock.redefineAfter( redefinedUSMEnqueueMemcpyAfter); - Mock.redefineAfter( + Mock.redefineAfter( redefinedUSMEnqueueMemsetAfter); context Ctx{Plt.get_devices()[0]}; diff --git a/sycl/unittests/queue/Wait.cpp b/sycl/unittests/queue/Wait.cpp index f9ec3fb6083ac..8b2d72055d847 100644 --- a/sycl/unittests/queue/Wait.cpp +++ b/sycl/unittests/queue/Wait.cpp @@ -37,8 +37,7 @@ pi_result redefinedQueueCreateEx(pi_context context, pi_device device, return PI_SUCCESS; } -pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, - const void *Pattern, size_t PatternSize, +pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, size_t Count, pi_uint32 Num_events_in_waitlist, const pi_event *Events_waitlist, @@ -89,7 +88,7 @@ TEST(QueueWait, QueueWaitTest) { Mock.redefineBefore( redefinedQueueCreateEx); Mock.redefineBefore(redefinedQueueFinish); - Mock.redefineBefore( + Mock.redefineBefore( redefinedUSMEnqueueMemset); Mock.redefineBefore(redefinedEventsWait); Mock.redefineBefore( diff --git a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp index 07d36d479d3c0..8693ff5e4c52b 100644 --- a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp @@ -61,8 +61,7 @@ inline pi_result customEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, ExecutedCommands.push_back({CommandType::KERNEL, EventsCount}); return PI_SUCCESS; } -inline pi_result customextUSMEnqueueMemset(pi_queue, void *, const void *, - size_t, size_t, +inline pi_result customextUSMEnqueueMemset(pi_queue, void *, pi_int32, size_t, pi_uint32 EventsCount, const pi_event *, pi_event *) { ExecutedCommands.push_back({CommandType::MEMSET, EventsCount}); @@ -74,7 +73,7 @@ TEST_F(SchedulerTest, InOrderQueueCrossDeps) { sycl::unittest::PiMock Mock; Mock.redefineBefore( customEnqueueKernelLaunch); - Mock.redefineBefore( + Mock.redefineBefore( customextUSMEnqueueMemset); sycl::platform Plt = Mock.getPlatform(); @@ -127,7 +126,7 @@ TEST_F(SchedulerTest, InOrderQueueCrossDepsShortcutFuncs) { sycl::unittest::PiMock Mock; Mock.redefineBefore( customEnqueueKernelLaunch); - Mock.redefineBefore( + Mock.redefineBefore( customextUSMEnqueueMemset); sycl::platform Plt = Mock.getPlatform(); diff --git a/sycl/unittests/xpti_trace/QueueApiFailures.cpp b/sycl/unittests/xpti_trace/QueueApiFailures.cpp index c634f2dfce299..88c5fae49394c 100644 --- a/sycl/unittests/xpti_trace/QueueApiFailures.cpp +++ b/sycl/unittests/xpti_trace/QueueApiFailures.cpp @@ -145,8 +145,7 @@ TEST_F(QueueApiFailures, QueueSingleTask) { EXPECT_FALSE(queryReceivedNotifications(TraceType, Message)); } -pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, - const void *Pattern, size_t PatternSize, +pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, size_t Count, pi_uint32 Num_events_in_waitlist, const pi_event *Events_waitlist, @@ -155,7 +154,7 @@ pi_result redefinedUSMEnqueueMemset(pi_queue Queue, void *Ptr, } TEST_F(QueueApiFailures, QueueMemset) { - MockPlugin.redefine( + MockPlugin.redefine( redefinedUSMEnqueueMemset); MockPlugin.redefine( redefinedPluginGetLastError); @@ -242,17 +241,18 @@ TEST_F(QueueApiFailures, QueueCopy) { EXPECT_FALSE(queryReceivedNotifications(TraceType, Message)); } -pi_result redefinedUSMEnqueueFill(pi_queue Queue, void *Ptr, - const void *Pattern, size_t PatternSize, - size_t Count, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { +pi_result redefinedEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, + const void *Pattern, size_t PatternSize, + size_t Offset, size_t Size, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, + pi_event *Event) { return PI_ERROR_PLUGIN_SPECIFIC_ERROR; } TEST_F(QueueApiFailures, QueueFill) { - MockPlugin.redefine( - redefinedUSMEnqueueFill); + MockPlugin.redefine( + redefinedEnqueueMemBufferFill); MockPlugin.redefine( redefinedPluginGetLastError); sycl::queue Q;