diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 01a54347bfdb..5220a145676b 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -282,18 +282,22 @@ event queue_impl::memcpyFromDeviceGlobal( DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest); } -sycl::detail::optional queue_impl::getLastEvent() { +sycl::detail::optional +queue_impl::getLastEvent(const std::shared_ptr &Self) { // The external event is required to finish last if set, so it is considered // the last event if present. if (std::optional ExternalEvent = MInOrderExternalEvent.read()) return ExternalEvent; std::lock_guard Lock{MMutex}; - if (MGraph.expired() && !MDefaultGraphDeps.LastEventPtr) + if (MEmpty) return std::nullopt; - if (!MGraph.expired() && MExtGraphDeps.LastEventPtr) - return detail::createSyclObjFromImpl(MExtGraphDeps.LastEventPtr); - return detail::createSyclObjFromImpl(MDefaultGraphDeps.LastEventPtr); + auto &LastEvent = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr + : MExtGraphDeps.LastEventPtr; + if (LastEvent) + return detail::createSyclObjFromImpl(LastEvent); + // We insert a marker to represent an event at end. + return detail::createSyclObjFromImpl(insertMarkerEvent(Self)); } void queue_impl::addEvent(const event &Event) { @@ -344,9 +348,49 @@ event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF, HandlerImpl->MEventMode = SubmitInfo.EventMode(); - auto Event = finalizeHandler(Handler, SubmitInfo.PostProcessorFunc()); + auto isHostTask = Type == CGType::CodeplayHostTask; + + // TODO: this shouldn't be needed but without this + // the legacy adapter doesn't synchronize the operations properly + // when non-immediate command lists are used. + auto isGraphSubmission = Type == CGType::ExecCommandBuffer; + + auto requiresPostProcess = SubmitInfo.PostProcessorFunc() || Streams.size(); + auto noLastEventPath = !isHostTask && !isGraphSubmission && + MNoEventMode.load(std::memory_order_relaxed) && + !requiresPostProcess; + + if (noLastEventPath) { + std::unique_lock Lock(MMutex); + + // Check if we are still in no event mode. There could + // have been a concurrent submit. + if (MNoEventMode.load(std::memory_order_relaxed)) { + return finalizeHandlerInOrderNoEventsUnlocked(Handler); + } + } + + event Event; + if (!isInOrder()) { + Event = finalizeHandlerOutOfOrder(Handler); + addEvent(Event); + } else { + if (isHostTask) { + std::unique_lock Lock(MMutex); + Event = finalizeHandlerInOrderHostTaskUnlocked(Handler); + } else { + std::unique_lock Lock(MMutex); + + if (!isGraphSubmission && trySwitchingToNoEventsMode()) { + Event = finalizeHandlerInOrderNoEventsUnlocked(Handler); + } else { + Event = finalizeHandlerInOrderWithDepsUnlocked(Handler); + } + } + } - addEvent(Event); + if (SubmitInfo.PostProcessorFunc()) + handlerPostProcess(Handler, SubmitInfo.PostProcessorFunc(), Event); const auto &EventImpl = detail::getSyclObjImpl(Event); for (auto &Stream : Streams) { @@ -370,63 +414,14 @@ event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF, #ifndef __INTEL_PREVIEW_BREAKING_CHANGES event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF, const std::shared_ptr &Self, - const std::shared_ptr &PrimaryQueue, + const std::shared_ptr &, const std::shared_ptr &SecondaryQueue, bool CallerNeedsEvent, const detail::code_location &Loc, bool IsTopCodeLoc, const SubmissionInfo &SubmitInfo) { -#ifdef __INTEL_PREVIEW_BREAKING_CHANGES - detail::handler_impl HandlerImplVal(PrimaryQueue.get(), CallerNeedsEvent); - detail::handler_impl *HandlerImpl = &HandlerImplVal; - handler Handler(HandlerImpl, Self); -#else - handler Handler(Self, CallerNeedsEvent); - auto &HandlerImpl = detail::getSyclObjImpl(Handler); -#endif - -#if XPTI_ENABLE_INSTRUMENTATION - if (xptiTraceEnabled()) { - Handler.saveCodeLoc(Loc, IsTopCodeLoc); - } -#endif - - { - NestedCallsTracker tracker; - CGF(Handler); - } - - // Scheduler will later omit events, that are not required to execute tasks. - // Host and interop tasks, however, are not submitted to low-level runtimes - // and require separate dependency management. - const CGType Type = HandlerImpl->MCGType; - std::vector Streams; - if (Type == CGType::Kernel) - Streams = std::move(Handler.MStreamStorage); - - HandlerImpl->MEventMode = SubmitInfo.EventMode(); - - auto Event = finalizeHandler(Handler, SubmitInfo.PostProcessorFunc()); - - addEvent(Event); - - const auto &EventImpl = detail::getSyclObjImpl(Event); - for (auto &Stream : Streams) { - // We don't want stream flushing to be blocking operation that is why submit - // a host task to print stream buffer. It will fire up as soon as the kernel - // finishes execution. - auto L = [&](handler &ServiceCGH) { - Stream->generateFlushCommand(ServiceCGH); - }; - detail::type_erased_cgfo_ty CGF{L}; - event FlushEvent = - submit_impl(CGF, Self, PrimaryQueue, SecondaryQueue, - /*CallerNeedsEvent*/ true, Loc, IsTopCodeLoc, {}); - EventImpl->attachEventToCompleteWeak(detail::getSyclObjImpl(FlushEvent)); - registerStreamServiceEvent(detail::getSyclObjImpl(FlushEvent)); - } - - return Event; + return submit_impl(CGF, Self, SecondaryQueue.get(), CallerNeedsEvent, Loc, + IsTopCodeLoc, SubmitInfo); } #endif @@ -467,24 +462,19 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, const std::vector &ExpandedDepEvents = getExtendDependencyList(DepEvents, MutableDepEvents, Lock); + MEmpty = false; + // If we have a command graph set we need to capture the op through the // handler rather than by-passing the scheduler. if (MGraph.expired() && Scheduler::areEventsSafeForSchedulerBypass( ExpandedDepEvents, MContext)) { - if (!CallerNeedsEvent && supportsDiscardingPiEvents()) { + auto isNoEventsMode = trySwitchingToNoEventsMode(); + if (!CallerNeedsEvent && isNoEventsMode) { NestedCallsTracker tracker; MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents), /*PiEvent*/ nullptr); - event DiscardedEvent = createDiscardedEvent(); - if (isInOrder()) { - // Store the discarded event for proper in-order dependency tracking. - auto &EventToStoreIn = MGraph.expired() - ? MDefaultGraphDeps.LastEventPtr - : MExtGraphDeps.LastEventPtr; - EventToStoreIn = detail::getSyclObjImpl(DiscardedEvent); - } - return DiscardedEvent; + return createDiscardedEvent(); } event ResEvent = prepareSYCLEventAssociatedWithQueue(Self); @@ -509,7 +499,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, } } - if (isInOrder()) { + if (isInOrder() && !isNoEventsMode) { auto &EventToStoreIn = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr : MExtGraphDeps.LastEventPtr; EventToStoreIn = EventImpl; @@ -637,9 +627,11 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { } std::vector> WeakEvents; + EventImplPtr LastEvent; { std::lock_guard Lock(MMutex); WeakEvents.swap(MEventsWeak); + LastEvent = MDefaultGraphDeps.LastEventPtr; MMissedCleanupRequests.unset( [&](MissedCleanupRequestsType &MissedCleanupRequests) { @@ -664,6 +656,11 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { } } } + + if (LastEvent) { + LastEvent->wait(LastEvent); + } + const AdapterPtr &Adapter = getAdapter(); Adapter->call(getHandleRef()); @@ -754,21 +751,21 @@ ur_native_handle_t queue_impl::getNative(int32_t &NativeHandleDesc) const { } bool queue_impl::queue_empty() const { - // If we have in-order queue where events are not discarded then just check - // the status of the last event. + // If we have in-order queue with non-empty last event, just check its status. if (isInOrder()) { std::lock_guard Lock(MMutex); - // If there is no last event we know that no work has been submitted, so it - // must be trivially empty. - if (!MDefaultGraphDeps.LastEventPtr) - return true; - // Otherwise, check if the last event is finished. - // Note that we fall back to the backend query if the event was discarded, - // which may happend despite the queue not being a discard event queue. - if (!MDefaultGraphDeps.LastEventPtr->isDiscarded()) + if (MDefaultGraphDeps.LastEventPtr && + !MDefaultGraphDeps.LastEventPtr->isDiscarded()) return MDefaultGraphDeps.LastEventPtr ->get_info() == info::event_command_status::complete; + else if (getContextImplPtr()->getBackend() == backend::opencl) { + assert(!MNoEventMode); + // openCL does not support urQueueGetInfo and will never have + // MNoEventMode set, so if there is no last event, the queue + // must be empty. + return true; + } } // Check the status of the backend queue if this is not a host queue. @@ -778,6 +775,11 @@ bool queue_impl::queue_empty() const { if (!IsReady) return false; + // If got here, it means that LastEventPtr is nullptr (so no possible Host + // Tasks) and there is nothing executing on the device. + if (isInOrder()) + return true; + // We may have events like host tasks which are not submitted to the backend // queue so we need to get their status separately. std::lock_guard Lock(MMutex); diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 8a0a1476c2ee..1e7666fbc49d 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -164,9 +164,12 @@ class queue_impl { // different instance ID until this gets added. constructorNotification(); #endif + + trySwitchingToNoEventsMode(); } - sycl::detail::optional getLastEvent(); + sycl::detail::optional + getLastEvent(const std::shared_ptr &Self); public: /// Constructs a SYCL queue from adapter interoperability handle. @@ -227,6 +230,8 @@ class queue_impl { // different instance ID until this gets added. constructorNotification(); #endif + + trySwitchingToNoEventsMode(); } ~queue_impl() { @@ -593,6 +598,12 @@ class queue_impl { std::lock_guard Lock(MMutex); MGraph = Graph; MExtGraphDeps.reset(); + + if (Graph) { + MNoEventMode = false; + } else { + trySwitchingToNoEventsMode(); + } } std::shared_ptr @@ -681,49 +692,123 @@ class queue_impl { } template - event finalizeHandlerInOrder(HandlerType &Handler) { - // Accessing and changing of an event isn't atomic operation. - // Hence, here is the lock for thread-safety. - std::lock_guard Lock{MMutex}; + void synchronizeWithExternalEvent(HandlerType &Handler) { + // If there is an external event set, add it as a dependency and clear it. + // We do not need to hold the lock as MLastEventMtx will ensure the last + // event reflects the corresponding external event dependence as well. + std::optional ExternalEvent = popExternalEvent(); + if (ExternalEvent) + Handler.depends_on(*ExternalEvent); + } + + bool trySwitchingToNoEventsMode() { + if (MNoEventMode.load(std::memory_order_relaxed)) + return true; + + // opencl does not support UR_QUEUE_INFO_EMPTY query + if (MContext->getBackend() == backend::opencl) + return false; + + if (!MGraph.expired() || !isInOrder()) + return false; + + if (MDefaultGraphDeps.LastEventPtr != nullptr && + !Scheduler::CheckEventReadiness(MContext, + MDefaultGraphDeps.LastEventPtr)) + return false; + + MNoEventMode.store(true, std::memory_order_relaxed); + MDefaultGraphDeps.LastEventPtr = nullptr; + return true; + } + + template + event finalizeHandlerInOrderNoEventsUnlocked(HandlerType &Handler) { + assert(isInOrder()); + assert(MGraph.expired()); + assert(MDefaultGraphDeps.LastEventPtr == nullptr); + assert(MNoEventMode); + + MEmpty = false; + + synchronizeWithExternalEvent(Handler); + + return Handler.finalize(); + } + + template + event finalizeHandlerInOrderHostTaskUnlocked(HandlerType &Handler) { + assert(isInOrder()); + assert(Handler.getType() == CGType::CodeplayHostTask); auto &EventToBuildDeps = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr : MExtGraphDeps.LastEventPtr; - // This dependency is needed for the following purposes: - // - host tasks are handled by the runtime and cannot be implicitly - // synchronized by the backend. - // - to prevent the 2nd kernel enqueue when the 1st kernel is blocked - // by a host task. This dependency allows to build the enqueue order in - // the RT but will not be passed to the backend. See getPIEvents in - // Command. - if (EventToBuildDeps) { - // In the case where the last event was discarded and we are to run a - // host_task, we insert a barrier into the queue and use the resulting - // event as the dependency for the host_task. - // Note that host_task events can never be discarded, so this will not - // insert barriers between host_task enqueues. - if (EventToBuildDeps->isDiscarded() && - Handler.getType() == CGType::CodeplayHostTask) - EventToBuildDeps = insertHelperBarrier(Handler); - + if (MNoEventMode) { + // There might be some operations submitted to the queue + // but the LastEventPtr is not set. If we are to run a host_task, + // we need to insert a barrier to ensure proper synchronization. + Handler.depends_on(insertHelperBarrier(Handler)); + } + if (EventToBuildDeps && Handler.getType() != CGType::AsyncAlloc) { + // We are not in no-event mode, so we can use the last event. // depends_on after an async alloc is explicitly disallowed. Async alloc // handles in order queue dependencies preemptively, so we skip them. // Note: This could be improved by moving the handling of dependencies // to before calling the CGF. - if (!EventToBuildDeps->isDiscarded() && - !(Handler.getType() == CGType::AsyncAlloc)) - Handler.depends_on(EventToBuildDeps); + Handler.depends_on(EventToBuildDeps); } - // If there is an external event set, add it as a dependency and clear it. - // We do not need to hold the lock as MLastEventMtx will ensure the last - // event reflects the corresponding external event dependence as well. - std::optional ExternalEvent = popExternalEvent(); - if (ExternalEvent) - Handler.depends_on(*ExternalEvent); + MEmpty = false; + MNoEventMode = false; + + synchronizeWithExternalEvent(Handler); + + auto Event = Handler.finalize(); + EventToBuildDeps = getSyclObjImpl(Event); + return Event; + } + + template + event finalizeHandlerInOrderWithDepsUnlocked(HandlerType &Handler) { + // this is handled by finalizeHandlerInOrderHostTask + assert(Handler.getType() != CGType::CodeplayHostTask); + + if (Handler.getType() == CGType::ExecCommandBuffer && MNoEventMode) { + // TODO: this shouldn't be needed but without this + // the legacy adapter doesn't synchronize the operations properly + // when non-immediate command lists are used. + Handler.depends_on(insertHelperBarrier(Handler)); + } + + auto &EventToBuildDeps = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr + : MExtGraphDeps.LastEventPtr; + + // depends_on after an async alloc is explicitly disallowed. Async alloc + // handles in order queue dependencies preemptively, so we skip them. + // Note: This could be improved by moving the handling of dependencies + // to before calling the CGF. + if (EventToBuildDeps && Handler.getType() != CGType::AsyncAlloc) { + // If we have last event, this means we are no longer in no-event mode. + assert(!MNoEventMode); + Handler.depends_on(EventToBuildDeps); + } + + MEmpty = false; + + synchronizeWithExternalEvent(Handler); auto EventRet = Handler.finalize(); - EventToBuildDeps = getSyclObjImpl(EventRet); + + if (!getSyclObjImpl(EventRet)->isDiscarded()) { + MNoEventMode = false; + EventToBuildDeps = getSyclObjImpl(EventRet); + + // TODO: if the event is NOP we should be able to discard it as well. + // However, NOP events are used to describe ordering for graph operations + // Once https://github.com/intel/llvm/issues/18330 is fixed, we can + // start relying on command buffer in-order property instead. + } return EventRet; } @@ -732,6 +817,9 @@ class queue_impl { event finalizeHandlerOutOfOrder(HandlerType &Handler) { const CGType Type = getSyclObjImpl(Handler)->MCGType; std::lock_guard Lock{MMutex}; + + MEmpty = false; + // The following code supports barrier synchronization if host task is // involved in the scenario. Native barriers cannot handle host task // dependency so in the case where some commands were not enqueued @@ -766,9 +854,9 @@ class queue_impl { } template - event finalizeHandlerPostProcess( - HandlerType &Handler, - const optional &PostProcessorFunc) { + void handlerPostProcess(HandlerType &Handler, + const optional &PostProcessorFunc, + event &Event) { bool IsKernel = Handler.getType() == CGType::Kernel; bool KernelUsesAssert = false; @@ -779,26 +867,8 @@ class queue_impl { ProgramManager::getInstance().kernelUsesAssert( Handler.MKernelName.data()); - auto Event = MIsInorder ? finalizeHandlerInOrder(Handler) - : finalizeHandlerOutOfOrder(Handler); - auto &PostProcess = *PostProcessorFunc; - PostProcess(IsKernel, KernelUsesAssert, Event); - - return Event; - } - - // template is needed for proper unit testing - template - event finalizeHandler(HandlerType &Handler, - const optional &PostProcessorFunc) { - if (PostProcessorFunc) { - return finalizeHandlerPostProcess(Handler, PostProcessorFunc); - } else { - return MIsInorder ? finalizeHandlerInOrder(Handler) - : finalizeHandlerOutOfOrder(Handler); - } } #ifndef __INTEL_PREVIEW_BREAKING_CHANGES @@ -967,6 +1037,15 @@ class queue_impl { const bool MIsInorder; + // Specifies whether this queue records last event. This can only + // be true if the queue is in-order, the command graph is not + // associated with the queue and there has never been any host + // tasks submitted to the queue. + std::atomic MNoEventMode = false; + + // Used exclusively in getLastEvent implementation + bool MEmpty = true; + std::vector MStreamsServiceEvents; std::mutex MStreamsServiceEventsMutex; diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 2d3e7d17668a..aefbf112bb76 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -671,8 +671,8 @@ EventImplPtr Scheduler::addCommandGraphUpdate( return NewCmdEvent; } -bool CheckEventReadiness(const ContextImplPtr &Context, - const EventImplPtr &SyclEventImplPtr) { +bool Scheduler::CheckEventReadiness(const ContextImplPtr &Context, + const EventImplPtr &SyclEventImplPtr) { // Events that don't have an initialized context are throwaway events that // don't represent actual dependencies. Calling getContextImpl() would set // their context, which we wish to avoid as it is expensive. diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 5b657c1f13b9..a8f7235ac0d0 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -477,6 +477,9 @@ class Scheduler { const QueueImplPtr &Queue, std::vector Requirements, std::vector &Events); + static bool CheckEventReadiness(const ContextImplPtr &Context, + const EventImplPtr &SyclEventImplPtr); + static bool areEventsSafeForSchedulerBypass(const std::vector &DepEvents, const ContextImplPtr &Context); diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 983966937db6..bd3e02df6c84 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -322,7 +322,7 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) { assert(!QueueImpl->hasCommandGraph() && "Should not be called in on graph recording."); - sycl::detail::optional LastEvent = QueueImpl->getLastEvent(); + sycl::detail::optional LastEvent = QueueImpl->getLastEvent(QueueImpl); if (LastEvent) return *LastEvent; @@ -340,10 +340,7 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) { /// group is being enqueued on. event queue::ext_oneapi_submit_barrier(const detail::code_location &CodeLoc) { if (is_in_order() && !impl->hasCommandGraph() && !impl->MIsProfilingEnabled) { - event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl); - // If the last event was discarded, fall back to enqueuing a barrier. - if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded()) - return InOrderLastEvent; + return getBarrierEventForInorderQueueHelper(impl); } return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(); }, CodeLoc); @@ -368,10 +365,7 @@ event queue::ext_oneapi_submit_barrier(const std::vector &WaitList, }); if (is_in_order() && !impl->hasCommandGraph() && !impl->MIsProfilingEnabled && AllEventsEmptyOrNop) { - event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl); - // If the last event was discarded, fall back to enqueuing a barrier. - if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded()) - return InOrderLastEvent; + return getBarrierEventForInorderQueueHelper(impl); } return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); }, @@ -453,20 +447,7 @@ sycl::detail::optional queue::ext_oneapi_get_last_event_impl() const { make_error_code(errc::invalid), "ext_oneapi_get_last_event() can only be called on in-order queues."); - sycl::detail::optional LastEvent = impl->getLastEvent(); - - // If there was no last event, the queue is yet to have any work submitted and - // we return a std::nullopt. - if (!LastEvent) - return std::nullopt; - - // If the last event was discarded or a NOP, we insert a marker to represent - // an event at end. - auto LastEventImpl = detail::getSyclObjImpl(*LastEvent); - if (LastEventImpl->isDiscarded() || LastEventImpl->isNOP()) - LastEvent = - detail::createSyclObjFromImpl(impl->insertMarkerEvent(impl)); - return LastEvent; + return impl->getLastEvent(impl); } void queue::ext_oneapi_set_external_event(const event &external_event) { diff --git a/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp b/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp index e4d33dc0a765..17758071aed6 100644 --- a/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp +++ b/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp @@ -36,8 +36,13 @@ int Check(const sycl::queue &Q, const char *CheckName, const F &CheckFunc) { << std::endl; return 1; } - if (*E != *LastEvent) { - std::cout << "Failed " << CheckName << std::endl; + if (LastEvent->get_info() == + sycl::info::event_command_status::complete && + E->get_info() != + sycl::info::event_command_status::complete) { + std::cout << "ext_oneapi_get_last_event() returned an event that is " + "complete, but the event returned by CheckFunc() is not." + << std::endl; return 1; } return 0; diff --git a/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp b/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp index 4d15e3b0ce6a..97b6ea79c017 100644 --- a/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp +++ b/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp @@ -1,10 +1,5 @@ // RUN: %{build} -o %t.out -// RUN: env SYCL_UR_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s - -// Test to check that we don't insert unnecessary urEnqueueEventsWaitWithBarrier -// calls if queue is in-order and wait list is empty. - -// CHECK-NOT: <--- urEnqueueEventsWaitWithBarrier +// RUN: %{run} %t.out #include @@ -26,13 +21,11 @@ int main() { cgh.single_task([=]() { *Res += 9; }); }); auto BarrierEvent1 = Q.ext_oneapi_submit_barrier(); - assert(BarrierEvent1 == Event1); auto Event2 = Q.submit([&](sycl::handler &cgh) { cgh.single_task([=]() { *Res *= 2; }); }); auto BarrierEvent2 = Q.ext_oneapi_submit_barrier(); - assert(BarrierEvent2 == Event2); BarrierEvent2.wait(); // Check that kernel events are completed after waiting for barrier event. @@ -50,7 +43,6 @@ int main() { auto Event1 = Q.submit( [&](sycl::handler &CGH) { CGH.host_task([&] { *Res += 1; }); }); auto BarrierEvent1 = Q.ext_oneapi_submit_barrier(); - assert(Event1 == BarrierEvent1); auto Event2 = Q.submit([&](sycl::handler &CGH) { CGH.fill(Res, 10, 1); }); Q.wait(); diff --git a/sycl/unittests/Extensions/CMakeLists.txt b/sycl/unittests/Extensions/CMakeLists.txt index b415ae27dbfa..af908723a0b2 100644 --- a/sycl/unittests/Extensions/CMakeLists.txt +++ b/sycl/unittests/Extensions/CMakeLists.txt @@ -11,8 +11,6 @@ add_sycl_unittest(ExtensionsTests OBJECT USMP2P.cpp CompositeDevice.cpp OneAPIProd.cpp - EnqueueFunctionsEvents.cpp - DiscardEvent.cpp ProfilingTag.cpp KernelProperties.cpp NoDeviceIPVersion.cpp diff --git a/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp b/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp index f65018d63fb3..c50b8c1e9937 100644 --- a/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp +++ b/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp @@ -387,12 +387,6 @@ TEST_F(CommandGraphTest, InOrderQueueHostTaskAndGraph) { [&](sycl::handler &CGH) { CGH.ext_oneapi_graph(InOrderGraphExec); }); auto EventGraphImpl = sycl::detail::getSyclObjImpl(EventGraph); - auto EventGraphWaitList = EventGraphImpl->getWaitList(); - // Previous task is a host task. Explicit dependency is needed to enforce - // the execution order. - ASSERT_EQ(EventGraphWaitList.size(), 1lu); - ASSERT_EQ(EventGraphWaitList[0], EventInitialImpl); - auto EventLast = InOrderQueue.submit( [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); auto EventLastImpl = sycl::detail::getSyclObjImpl(EventLast); @@ -402,10 +396,6 @@ TEST_F(CommandGraphTest, InOrderQueueHostTaskAndGraph) { // submission to the backend). if (BlockHostTask) Lock.unlock(); - ASSERT_EQ(EventLastWaitList.size(), size_t(BlockHostTask)); - if (EventLastWaitList.size()) { - ASSERT_EQ(EventLastWaitList[0], EventGraphImpl); - } EventLast.wait(); }; @@ -474,21 +464,6 @@ TEST_F(CommandGraphTest, InOrderQueueMemsetAndGraph) { auto InOrderGraphExec = InOrderGraph.finalize(); auto EventGraph = InOrderQueue.submit( [&](sycl::handler &CGH) { CGH.ext_oneapi_graph(InOrderGraphExec); }); - - auto EventGraphImpl = sycl::detail::getSyclObjImpl(EventGraph); - auto EventGraphWaitList = EventGraphImpl->getWaitList(); - // Previous task is a memset. Explicit dependency is needed to enforce the - // execution order. - ASSERT_EQ(EventGraphWaitList.size(), 1lu); - ASSERT_EQ(EventGraphWaitList[0], EventInitialImpl); - - auto EventLast = - InOrderQueue.memcpy(TestData, TestDataHost.data(), Size * sizeof(int)); - auto EventLastImpl = sycl::detail::getSyclObjImpl(EventLast); - auto EventLastWaitList = EventLastImpl->getWaitList(); - // Previous task is not a host task. In Order queue dependency is managed by - // the backend for non-host kernels. - ASSERT_EQ(EventLastWaitList.size(), 0lu); } TEST_F(CommandGraphTest, InOrderQueueMemcpyAndGraph) { @@ -553,19 +528,4 @@ TEST_F(CommandGraphTest, InOrderQueueMemcpyAndGraph) { auto InOrderGraphExec = InOrderGraph.finalize(); auto EventGraph = InOrderQueue.submit( [&](sycl::handler &CGH) { CGH.ext_oneapi_graph(InOrderGraphExec); }); - - auto EventGraphImpl = sycl::detail::getSyclObjImpl(EventGraph); - auto EventGraphWaitList = EventGraphImpl->getWaitList(); - // Previous task is a memcpy. Explicit dependency is needed to enforce the - // execution order - ASSERT_EQ(EventGraphWaitList.size(), 1lu); - ASSERT_EQ(EventGraphWaitList[0], EventInitialImpl); - - auto EventLast = - InOrderQueue.memcpy(TestData, TestDataHost.data(), Size * sizeof(int)); - auto EventLastImpl = sycl::detail::getSyclObjImpl(EventLast); - auto EventLastWaitList = EventLastImpl->getWaitList(); - // Previous task is not a host task. In Order queue dependency is managed by - // the backend for non-host kernels. - ASSERT_EQ(EventLastWaitList.size(), 0lu); } diff --git a/sycl/unittests/Extensions/DiscardEvent.cpp b/sycl/unittests/Extensions/DiscardEvent.cpp deleted file mode 100644 index 73a2436cc0d4..000000000000 --- a/sycl/unittests/Extensions/DiscardEvent.cpp +++ /dev/null @@ -1,79 +0,0 @@ -//==------------------------- DiscardEvent.cpp -----------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "sycl/platform.hpp" -#include -#include - -#include - -#include -#include -#include - -using namespace sycl; - -namespace oneapiext = ext::oneapi::experimental; - -namespace { - -thread_local size_t counter_urEnqueueKernelLaunch = 0; -inline ur_result_t redefined_urEnqueueKernelLaunch(void *pParams) { - ++counter_urEnqueueKernelLaunch; - auto params = *static_cast(pParams); - EXPECT_EQ(*params.pphEvent, nullptr); - return UR_RESULT_SUCCESS; -} - -thread_local size_t counter_urEnqueueEventsWaitWithBarrier = 0; -thread_local std::chrono::time_point - timestamp_urEnqueueEventsWaitWithBarrier; -inline ur_result_t after_urEnqueueEventsWaitWithBarrier(void *) { - ++counter_urEnqueueEventsWaitWithBarrier; - timestamp_urEnqueueEventsWaitWithBarrier = std::chrono::steady_clock::now(); - return UR_RESULT_SUCCESS; -} - -class DiscardEventTests : public ::testing::Test { -public: - DiscardEventTests() - : Mock{}, Q{context(sycl::platform()), default_selector_v, - property::queue::in_order{}} {} - -protected: - void SetUp() override { - counter_urEnqueueKernelLaunch = 0; - counter_urEnqueueEventsWaitWithBarrier = 0; - } - - unittest::UrMock<> Mock; - queue Q; -}; - -TEST_F(DiscardEventTests, BarrierBeforeHostTask) { - // Special test for case where host_task need an event after, so a barrier is - // enqueued to create a usable event. - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - mock::getCallbacks().set_after_callback( - "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); - - oneapiext::single_task>(Q, []() {}); - - std::chrono::time_point HostTaskTimestamp; - Q.submit([&](handler &CGH) { - CGH.host_task( - [&]() { HostTaskTimestamp = std::chrono::steady_clock::now(); }); - }).wait(); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - ASSERT_EQ(counter_urEnqueueEventsWaitWithBarrier, size_t{1}); - ASSERT_TRUE(HostTaskTimestamp > timestamp_urEnqueueEventsWaitWithBarrier); -} - -} // namespace diff --git a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp deleted file mode 100644 index 1b0eb368b350..000000000000 --- a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp +++ /dev/null @@ -1,448 +0,0 @@ -//==-------------------- EnqueueFunctionsEvents.cpp ------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Tests the behavior of enqueue free functions when events can be discarded. - -#include "FreeFunctionCommands/FreeFunctionEventsHelpers.hpp" - -#include -#include - -using namespace sycl; - -namespace oneapiext = ext::oneapi::experimental; - -namespace { - -class EnqueueFunctionsEventsTests : public ::testing::Test { -public: - EnqueueFunctionsEventsTests() - : Mock{}, Q{context(sycl::platform()), default_selector_v, - property::queue::in_order{}} {} - -protected: - void SetUp() override { - counter_urEnqueueKernelLaunch = 0; - counter_urUSMEnqueueMemcpy = 0; - counter_urUSMEnqueueFill = 0; - counter_urUSMEnqueuePrefetch = 0; - counter_urUSMEnqueueMemAdvise = 0; - counter_urEnqueueEventsWaitWithBarrier = 0; - } - - unittest::UrMock<> Mock; - queue Q; -}; - -inline void CheckLastEventDiscarded(sycl::queue &Q) { - auto QueueImplPtr = sycl::detail::getSyclObjImpl(Q); - sycl::detail::optional LastEvent = QueueImplPtr->getLastEvent(); - ASSERT_TRUE(LastEvent.has_value()); - auto LastEventImplPtr = sycl::detail::getSyclObjImpl(*LastEvent); - ASSERT_TRUE(LastEventImplPtr->isDiscarded()); -} - -TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - - oneapiext::submit(Q, [&](handler &CGH) { - oneapiext::single_task>(CGH, []() {}); - }); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); -} - -TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - - oneapiext::single_task>(Q, []() {}); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); -} - -TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskKernelNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - mock::getCallbacks().set_after_callback("urKernelGetInfo", - &after_urKernelGetInfo); - - auto KID = get_kernel_id>(); - auto KB = get_kernel_bundle( - Q.get_context(), std::vector{KID}); - - ASSERT_TRUE(KB.has_kernel(KID)); - - auto Kernel = KB.get_kernel(KID); - oneapiext::submit(Q, - [&](handler &CGH) { oneapiext::single_task(CGH, Kernel); }); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); -} - -TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutKernelNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - mock::getCallbacks().set_after_callback("urKernelGetInfo", - &after_urKernelGetInfo); - - auto KID = get_kernel_id>(); - auto KB = get_kernel_bundle( - Q.get_context(), std::vector{KID}); - - ASSERT_TRUE(KB.has_kernel(KID)); - - auto Kernel = KB.get_kernel(KID); - - oneapiext::single_task(Q, Kernel); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); -} - -TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - - oneapiext::submit(Q, [&](handler &CGH) { - oneapiext::parallel_for>(CGH, range<1>{32}, [](item<1>) {}); - }); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); -} - -TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - - oneapiext::parallel_for>(Q, range<1>{32}, [](item<1>) {}); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); -} - -TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForKernelNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - mock::getCallbacks().set_after_callback("urKernelGetInfo", - &after_urKernelGetInfo); - - auto KID = get_kernel_id>(); - auto KB = get_kernel_bundle( - Q.get_context(), std::vector{KID}); - - ASSERT_TRUE(KB.has_kernel(KID)); - - auto Kernel = KB.get_kernel(KID); - oneapiext::submit(Q, [&](handler &CGH) { - oneapiext::parallel_for(CGH, range<1>{32}, Kernel); - }); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); -} - -TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutKernelNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - mock::getCallbacks().set_after_callback("urKernelGetInfo", - &after_urKernelGetInfo); - - auto KID = get_kernel_id>(); - auto KB = get_kernel_bundle( - Q.get_context(), std::vector{KID}); - - ASSERT_TRUE(KB.has_kernel(KID)); - - auto Kernel = KB.get_kernel(KID); - - oneapiext::parallel_for(Q, range<1>{32}, Kernel); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); -} - -TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - - oneapiext::submit(Q, [&](handler &CGH) { - oneapiext::nd_launch>( - CGH, nd_range<1>{range<1>{32}, range<1>{32}}, [](nd_item<1>) {}); - }); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); -} - -TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - - oneapiext::nd_launch>(Q, nd_range<1>{range<1>{32}, range<1>{32}}, - [](nd_item<1>) {}); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); -} - -TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchKernelNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - mock::getCallbacks().set_after_callback("urKernelGetInfo", - &after_urKernelGetInfo); - - auto KID = get_kernel_id>(); - auto KB = get_kernel_bundle( - Q.get_context(), std::vector{KID}); - - ASSERT_TRUE(KB.has_kernel(KID)); - - auto Kernel = KB.get_kernel(KID); - oneapiext::submit(Q, [&](handler &CGH) { - oneapiext::nd_launch(CGH, nd_range<1>{range<1>{32}, range<1>{32}}, Kernel); - }); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); -} - -TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutKernelNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - mock::getCallbacks().set_after_callback("urKernelGetInfo", - &after_urKernelGetInfo); - - auto KID = get_kernel_id>(); - auto KB = get_kernel_bundle( - Q.get_context(), std::vector{KID}); - - ASSERT_TRUE(KB.has_kernel(KID)); - - auto Kernel = KB.get_kernel(KID); - - oneapiext::nd_launch(Q, nd_range<1>{range<1>{32}, range<1>{32}}, Kernel); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); -} - -TEST_F(EnqueueFunctionsEventsTests, SubmitMemcpyNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMMemcpy", - &redefined_urUSMEnqueueMemcpy); - - constexpr size_t N = 1024; - int *Src = malloc_shared(N, Q); - int *Dst = malloc_shared(N, Q); - - oneapiext::submit(Q, [&](handler &CGH) { - oneapiext::memcpy(CGH, Src, Dst, sizeof(int) * N); - }); - - ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); - - CheckLastEventDiscarded(Q); - - free(Src, Q); - free(Dst, Q); -} - -TEST_F(EnqueueFunctionsEventsTests, MemcpyShortcutNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMMemcpy", - &redefined_urUSMEnqueueMemcpy); - - constexpr size_t N = 1024; - int *Src = malloc_shared(N, Q); - int *Dst = malloc_shared(N, Q); - - oneapiext::memcpy(Q, Src, Dst, sizeof(int) * N); - - ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); - - CheckLastEventDiscarded(Q); - - free(Src, Q); - free(Dst, Q); -} - -TEST_F(EnqueueFunctionsEventsTests, SubmitCopyNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMMemcpy", - &redefined_urUSMEnqueueMemcpy); - - constexpr size_t N = 1024; - int *Src = malloc_shared(N, Q); - int *Dst = malloc_shared(N, Q); - - oneapiext::submit(Q, - [&](handler &CGH) { oneapiext::copy(CGH, Dst, Src, N); }); - - ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); - - CheckLastEventDiscarded(Q); - - free(Src, Q); - free(Dst, Q); -} - -TEST_F(EnqueueFunctionsEventsTests, CopyShortcutNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMMemcpy", - &redefined_urUSMEnqueueMemcpy); - - constexpr size_t N = 1024; - int *Src = malloc_shared(N, Q); - int *Dst = malloc_shared(N, Q); - - oneapiext::memcpy(Q, Dst, Src, N); - - ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); - - CheckLastEventDiscarded(Q); - - free(Src, Q); - free(Dst, Q); -} - -TEST_F(EnqueueFunctionsEventsTests, SubmitMemsetNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMFill", - &redefined_urUSMEnqueueFill); - - constexpr size_t N = 1024; - int *Dst = malloc_shared(N, Q); - - oneapiext::submit(Q, [&](handler &CGH) { - oneapiext::memset(CGH, Dst, int{1}, sizeof(int) * N); - }); - - ASSERT_EQ(counter_urUSMEnqueueFill, size_t{1}); - - CheckLastEventDiscarded(Q); - - free(Dst, Q); -} - -TEST_F(EnqueueFunctionsEventsTests, MemsetShortcutNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMFill", - &redefined_urUSMEnqueueFill); - - constexpr size_t N = 1024; - int *Dst = malloc_shared(N, Q); - - oneapiext::memset(Q, Dst, 1, sizeof(int) * N); - - ASSERT_EQ(counter_urUSMEnqueueFill, size_t{1}); - - CheckLastEventDiscarded(Q); - - free(Dst, Q); -} - -TEST_F(EnqueueFunctionsEventsTests, SubmitPrefetchNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMPrefetch", - redefined_urUSMEnqueuePrefetch); - - constexpr size_t N = 1024; - int *Dst = malloc_shared(N, Q); - - oneapiext::submit( - Q, [&](handler &CGH) { oneapiext::prefetch(CGH, Dst, sizeof(int) * N); }); - - ASSERT_EQ(counter_urUSMEnqueuePrefetch, size_t{1}); - - CheckLastEventDiscarded(Q); - - free(Dst, Q); -} - -TEST_F(EnqueueFunctionsEventsTests, PrefetchShortcutNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMPrefetch", - redefined_urUSMEnqueuePrefetch); - - constexpr size_t N = 1024; - int *Dst = malloc_shared(N, Q); - - oneapiext::prefetch(Q, Dst, sizeof(int) * N); - - ASSERT_EQ(counter_urUSMEnqueuePrefetch, size_t{1}); - - CheckLastEventDiscarded(Q); - - free(Dst, Q); -} - -TEST_F(EnqueueFunctionsEventsTests, SubmitMemAdviseNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMAdvise", - redefined_urUSMEnqueueMemAdvise); - - constexpr size_t N = 1024; - int *Dst = malloc_shared(N, Q); - - oneapiext::submit(Q, [&](handler &CGH) { - oneapiext::mem_advise(CGH, Dst, sizeof(int) * N, 1); - }); - - ASSERT_EQ(counter_urUSMEnqueueMemAdvise, size_t{1}); - - CheckLastEventDiscarded(Q); - - free(Dst, Q); -} - -TEST_F(EnqueueFunctionsEventsTests, MemAdviseShortcutNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMAdvise", - &redefined_urUSMEnqueueMemAdvise); - - constexpr size_t N = 1024; - int *Dst = malloc_shared(N, Q); - - oneapiext::mem_advise(Q, Dst, sizeof(int) * N, 1); - - ASSERT_EQ(counter_urUSMEnqueueMemAdvise, size_t{1}); - - CheckLastEventDiscarded(Q); - - free(Dst, Q); -} - -TEST_F(EnqueueFunctionsEventsTests, BarrierBeforeHostTask) { - // Special test for case where host_task need an event after, so a barrier is - // enqueued to create a usable event. - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - mock::getCallbacks().set_after_callback( - "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); - - oneapiext::single_task>(Q, []() {}); - - std::chrono::time_point HostTaskTimestamp; - Q.submit([&](handler &CGH) { - CGH.host_task( - [&]() { HostTaskTimestamp = std::chrono::steady_clock::now(); }); - }).wait(); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - ASSERT_EQ(counter_urEnqueueEventsWaitWithBarrier, size_t{1}); - ASSERT_TRUE(HostTaskTimestamp > timestamp_urEnqueueEventsWaitWithBarrier); -} - -} // namespace diff --git a/sycl/unittests/Extensions/FreeFunctionCommands/CMakeLists.txt b/sycl/unittests/Extensions/FreeFunctionCommands/CMakeLists.txt index 69867a0a82aa..67a96a576908 100644 --- a/sycl/unittests/Extensions/FreeFunctionCommands/CMakeLists.txt +++ b/sycl/unittests/Extensions/FreeFunctionCommands/CMakeLists.txt @@ -1,4 +1,3 @@ add_sycl_unittest(FreeFunctionCommandsTests OBJECT Barrier.cpp - FreeFunctionCommandsEvents.cpp ) diff --git a/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp b/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp deleted file mode 100644 index e3b94d7cbb53..000000000000 --- a/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp +++ /dev/null @@ -1,469 +0,0 @@ -//==------------------ FreeFunctionCommandsEvents.cpp ---------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Tests the behavior of khr free function commands when events can be -// discarded. - -#include "FreeFunctionEventsHelpers.hpp" -#include "helpers/MockDeviceImage.hpp" -#include "helpers/MockKernelInfo.hpp" - -#define __DPCPP_ENABLE_UNFINISHED_KHR_EXTENSIONS -#include - -class TestFunctor { -public: - void operator()() const {} - void operator()(sycl::item<1>) const {} - void operator()(sycl::nd_item<1> Item) const {} -}; -namespace sycl { -inline namespace _V1 { -namespace detail { -template <> -struct KernelInfo : public unittest::MockKernelInfoBase { - static constexpr const char *getName() { return "TestFunctor"; } - static constexpr int64_t getKernelSize() { return sizeof(TestFunctor); } - static constexpr const char *getFileName() { return "TestFunctor.hpp"; } - static constexpr const char *getFunctionName() { - return "TestFunctorFunctionName"; - } - static constexpr unsigned getLineNumber() { return 13; } - static constexpr unsigned getColumnNumber() { return 8; } -}; -} // namespace detail -} // namespace _V1 -} // namespace sycl - -static sycl::unittest::MockDeviceImage Img = - sycl::unittest::generateDefaultImage({"TestFunctor"}); -static sycl::unittest::MockDeviceImageArray<1> ImgArray{&Img}; - -namespace { - -class FreeFunctionCommandsEventsTests : public ::testing::Test { -public: - FreeFunctionCommandsEventsTests() - : Mock{}, Queue{sycl::context(sycl::platform()), sycl::default_selector_v, - sycl::property::queue::in_order{}} {} - -protected: - void SetUp() override { - counter_urEnqueueKernelLaunch = 0; - counter_urUSMEnqueueMemcpy = 0; - counter_urUSMEnqueueFill = 0; - counter_urUSMEnqueuePrefetch = 0; - counter_urUSMEnqueueMemAdvise = 0; - counter_urEnqueueEventsWaitWithBarrier = 0; - } - - sycl::unittest::UrMock<> Mock; - sycl::queue Queue; -}; - -TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchTaskNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - sycl::khr::submit(Queue, [&](sycl::handler &Handler) { - sycl::khr::launch_task(Handler, TestFunctor()); - }); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, LaunchTaskShortcutNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - sycl::khr::launch_task(Queue, TestFunctor()); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchTaskKernelNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - mock::getCallbacks().set_after_callback("urKernelGetInfo", - &after_urKernelGetInfo); - auto KID = sycl::get_kernel_id(); - auto KB = sycl::get_kernel_bundle( - Queue.get_context(), std::vector{KID}); - - ASSERT_TRUE(KB.has_kernel(KID)); - - auto Kernel = KB.get_kernel(KID); - sycl::khr::submit(Queue, [&](sycl::handler &Handler) { - sycl::khr::launch_task(Handler, Kernel); - }); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, LaunchTaskShortcutKernelNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - mock::getCallbacks().set_after_callback("urKernelGetInfo", - &after_urKernelGetInfo); - - auto KID = sycl::get_kernel_id(); - auto KB = sycl::get_kernel_bundle( - Queue.get_context(), std::vector{KID}); - - ASSERT_TRUE(KB.has_kernel(KID)); - - auto Kernel = KB.get_kernel(KID); - - sycl::khr::launch_task(Queue, Kernel); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchForNoEvent) { - - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - - sycl::khr::submit(Queue, [&](sycl::handler &Handler) { - sycl::khr::launch(Handler, sycl::range<1>{32}, TestFunctor()); - }); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, LaunchForShortcutNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - - sycl::khr::launch(Queue, sycl::range<1>{32}, TestFunctor()); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchForKernelNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - mock::getCallbacks().set_after_callback("urKernelGetInfo", - &after_urKernelGetInfo); - - auto KID = sycl::get_kernel_id(); - auto KB = sycl::get_kernel_bundle( - Queue.get_context(), std::vector{KID}); - - ASSERT_TRUE(KB.has_kernel(KID)); - - auto Kernel = KB.get_kernel(KID); - sycl::khr::submit(Queue, [&](sycl::handler &Handler) { - sycl::khr::launch(Handler, sycl::range<1>{32}, Kernel); - }); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, LaunchForShortcutKernelNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - mock::getCallbacks().set_after_callback("urKernelGetInfo", - &after_urKernelGetInfo); - - auto KID = sycl::get_kernel_id(); - auto KB = sycl::get_kernel_bundle( - Queue.get_context(), std::vector{KID}); - - ASSERT_TRUE(KB.has_kernel(KID)); - - auto Kernel = KB.get_kernel(KID); - - sycl::khr::launch(Queue, sycl::range<1>{32}, Kernel); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchGroupedNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - - sycl::khr::submit(Queue, [&](sycl::handler &Handler) { - sycl::khr::launch_grouped(Handler, sycl::range<1>{32}, sycl::range<1>{32}, - TestFunctor()); - }); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, LaunchGroupedShortcutNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - - sycl::khr::launch_grouped(Queue, sycl::range<1>{32}, sycl::range<1>{32}, - TestFunctor()); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchGroupedKernelNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - mock::getCallbacks().set_after_callback("urKernelGetInfo", - &after_urKernelGetInfo); - - auto KID = sycl::get_kernel_id(); - auto KB = sycl::get_kernel_bundle( - Queue.get_context(), std::vector{KID}); - - ASSERT_TRUE(KB.has_kernel(KID)); - - auto Kernel = KB.get_kernel(KID); - sycl::khr::submit(Queue, [&](sycl::handler &Handler) { - sycl::khr::launch_grouped(Handler, sycl::range<1>{32}, sycl::range<1>{32}, - Kernel); - }); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, LaunchGroupedShortcutKernelNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - mock::getCallbacks().set_after_callback("urKernelGetInfo", - &after_urKernelGetInfo); - - auto KID = sycl::get_kernel_id(); - auto KB = sycl::get_kernel_bundle( - Queue.get_context(), std::vector{KID}); - - ASSERT_TRUE(KB.has_kernel(KID)); - - auto Kernel = KB.get_kernel(KID); - - sycl::khr::launch_grouped(Queue, sycl::range<1>{32}, sycl::range<1>{32}, - Kernel); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, SubmitMemcpyNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMMemcpy", - &redefined_urUSMEnqueueMemcpy); - - constexpr size_t N = 1024; - int *Src = sycl::malloc_shared(N, Queue); - int *Dst = sycl::malloc_shared(N, Queue); - - sycl::khr::submit(Queue, [&](sycl::handler &Handler) { - sycl::khr::memcpy(Handler, Dst, Src, sizeof(int) * N); - }); - - ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); - CheckLastEventDiscarded(Queue); - free(Src, Queue); - free(Dst, Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, MemcpyShortcutNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMMemcpy", - &redefined_urUSMEnqueueMemcpy); - - constexpr size_t N = 1024; - int *Src = sycl::malloc_shared(N, Queue); - int *Dst = sycl::malloc_shared(N, Queue); - - sycl::khr::memcpy(Queue, Dst, Src, sizeof(int) * N); - - ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); - - CheckLastEventDiscarded(Queue); - - free(Src, Queue); - free(Dst, Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, SubmitCopyNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMMemcpy", - &redefined_urUSMEnqueueMemcpy); - - constexpr size_t N = 1024; - int *Src = sycl::malloc_shared(N, Queue); - int *Dst = sycl::malloc_shared(N, Queue); - - sycl::khr::submit(Queue, [&](sycl::handler &Handler) { - sycl::khr::copy(Handler, Src, Dst, N); - }); - - ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); - - CheckLastEventDiscarded(Queue); - - free(Src, Queue); - free(Dst, Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, CopyShortcutNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMMemcpy", - &redefined_urUSMEnqueueMemcpy); - - constexpr size_t N = 1024; - int *Src = sycl::malloc_shared(N, Queue); - int *Dst = sycl::malloc_shared(N, Queue); - - sycl::khr::memcpy(Queue, Dst, Src, N); - - ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); - - CheckLastEventDiscarded(Queue); - - free(Src, Queue); - free(Dst, Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, SubmitMemsetNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMFill", - &redefined_urUSMEnqueueFill); - - constexpr size_t N = 1024; - int *Dst = sycl::malloc_shared(N, Queue); - - sycl::khr::submit(Queue, [&](sycl::handler &Handler) { - sycl::khr::memset(Handler, Dst, int{1}, sizeof(int) * N); - }); - - ASSERT_EQ(counter_urUSMEnqueueFill, size_t{1}); - - CheckLastEventDiscarded(Queue); - - free(Dst, Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, MemsetShortcutNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMFill", - &redefined_urUSMEnqueueFill); - - constexpr size_t N = 1024; - int *Dst = sycl::malloc_shared(N, Queue); - - sycl::khr::memset(Queue, Dst, 1, sizeof(int) * N); - - ASSERT_EQ(counter_urUSMEnqueueFill, size_t{1}); - - CheckLastEventDiscarded(Queue); - - free(Dst, Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, SubmitPrefetchNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMPrefetch", - redefined_urUSMEnqueuePrefetch); - - constexpr size_t N = 1024; - int *Dst = sycl::malloc_shared(N, Queue); - - sycl::khr::submit(Queue, [&](sycl::handler &Handler) { - sycl::khr::prefetch(Handler, Dst, sizeof(int) * N); - }); - - ASSERT_EQ(counter_urUSMEnqueuePrefetch, size_t{1}); - - CheckLastEventDiscarded(Queue); - - free(Dst, Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, PrefetchShortcutNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMPrefetch", - redefined_urUSMEnqueuePrefetch); - - constexpr size_t N = 1024; - int *Dst = sycl::malloc_shared(N, Queue); - - sycl::khr::prefetch(Queue, Dst, sizeof(int) * N); - - ASSERT_EQ(counter_urUSMEnqueuePrefetch, size_t{1}); - - CheckLastEventDiscarded(Queue); - - free(Dst, Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, SubmitMemAdviseNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMAdvise", - redefined_urUSMEnqueueMemAdvise); - - constexpr size_t N = 1024; - int *Dst = sycl::malloc_shared(N, Queue); - - sycl::khr::submit(Queue, [&](sycl::handler &Handler) { - sycl::khr::mem_advise(Handler, Dst, sizeof(int) * N, 1); - }); - - ASSERT_EQ(counter_urUSMEnqueueMemAdvise, size_t{1}); - - CheckLastEventDiscarded(Queue); - - free(Dst, Queue); -} -TEST_F(FreeFunctionCommandsEventsTests, MemAdviseShortcutNoEvent) { - mock::getCallbacks().set_replace_callback("urEnqueueUSMAdvise", - &redefined_urUSMEnqueueMemAdvise); - - constexpr size_t N = 1024; - int *Dst = sycl::malloc_shared(N, Queue); - - sycl::khr::mem_advise(Queue, Dst, sizeof(int) * N, 1); - - ASSERT_EQ(counter_urUSMEnqueueMemAdvise, size_t{1}); - - CheckLastEventDiscarded(Queue); - - free(Dst, Queue); -} - -TEST_F(FreeFunctionCommandsEventsTests, BarrierBeforeHostTask) { - // Special test for case where host_task need an event after, so a barrier is - // enqueued to create a usable event. - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - mock::getCallbacks().set_after_callback( - "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); - - sycl::khr::launch_task(Queue, TestFunctor()); - - std::chrono::time_point HostTaskTimestamp; - Queue - .submit([&](sycl::handler &Handler) { - Handler.host_task( - [&]() { HostTaskTimestamp = std::chrono::steady_clock::now(); }); - }) - .wait(); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - ASSERT_EQ(counter_urEnqueueEventsWaitWithBarrier, size_t{1}); - ASSERT_TRUE(HostTaskTimestamp > timestamp_urEnqueueEventsWaitWithBarrier); -} - -} // namespace diff --git a/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionEventsHelpers.hpp b/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionEventsHelpers.hpp deleted file mode 100644 index 294c425072ad..000000000000 --- a/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionEventsHelpers.hpp +++ /dev/null @@ -1,81 +0,0 @@ -#include "detail/event_impl.hpp" -#include "detail/queue_impl.hpp" -#include "sycl/platform.hpp" -#include - -#include - -#include -#include -#include - -inline void CheckLastEventDiscarded(sycl::queue &Q) { - auto QueueImplPtr = sycl::detail::getSyclObjImpl(Q); - sycl::detail::optional LastEvent = QueueImplPtr->getLastEvent(); - ASSERT_TRUE(LastEvent.has_value()); - auto LastEventImplPtr = sycl::detail::getSyclObjImpl(*LastEvent); - ASSERT_TRUE(LastEventImplPtr->isDiscarded()); -} - -inline ur_result_t after_urKernelGetInfo(void *pParams) { - auto params = *static_cast(pParams); - constexpr char MockKernel[] = "TestKernel"; - if (*params.ppropName == UR_KERNEL_INFO_FUNCTION_NAME) { - if (*params.ppPropValue) { - assert(*params.ppropSize == sizeof(MockKernel)); - std::memcpy(*params.ppPropValue, MockKernel, sizeof(MockKernel)); - } - if (*params.ppPropSizeRet) - **params.ppPropSizeRet = sizeof(MockKernel); - } - return UR_RESULT_SUCCESS; -} - -static thread_local size_t counter_urEnqueueKernelLaunch = 0; -inline ur_result_t redefined_urEnqueueKernelLaunch(void *pParams) { - ++counter_urEnqueueKernelLaunch; - auto params = *static_cast(pParams); - EXPECT_EQ(*params.pphEvent, nullptr); - return UR_RESULT_SUCCESS; -} - -static thread_local size_t counter_urUSMEnqueueMemcpy = 0; -inline ur_result_t redefined_urUSMEnqueueMemcpy(void *pParams) { - ++counter_urUSMEnqueueMemcpy; - auto params = *static_cast(pParams); - EXPECT_EQ(*params.pphEvent, nullptr); - return UR_RESULT_SUCCESS; -} - -static thread_local size_t counter_urUSMEnqueueFill = 0; -inline ur_result_t redefined_urUSMEnqueueFill(void *pParams) { - ++counter_urUSMEnqueueFill; - auto params = *static_cast(pParams); - EXPECT_EQ(*params.pphEvent, nullptr); - return UR_RESULT_SUCCESS; -} - -static thread_local size_t counter_urUSMEnqueuePrefetch = 0; -inline ur_result_t redefined_urUSMEnqueuePrefetch(void *pParams) { - ++counter_urUSMEnqueuePrefetch; - auto params = *static_cast(pParams); - EXPECT_EQ(*params.pphEvent, nullptr); - return UR_RESULT_SUCCESS; -} - -static thread_local size_t counter_urUSMEnqueueMemAdvise = 0; -inline ur_result_t redefined_urUSMEnqueueMemAdvise(void *pParams) { - ++counter_urUSMEnqueueMemAdvise; - auto params = *static_cast(pParams); - EXPECT_EQ(*params.pphEvent, nullptr); - return UR_RESULT_SUCCESS; -} - -static thread_local size_t counter_urEnqueueEventsWaitWithBarrier = 0; -static thread_local std::chrono::time_point - timestamp_urEnqueueEventsWaitWithBarrier; -inline ur_result_t after_urEnqueueEventsWaitWithBarrier(void *pParams) { - ++counter_urEnqueueEventsWaitWithBarrier; - timestamp_urEnqueueEventsWaitWithBarrier = std::chrono::steady_clock::now(); - return UR_RESULT_SUCCESS; -} diff --git a/sycl/unittests/scheduler/InOrderQueueDeps.cpp b/sycl/unittests/scheduler/InOrderQueueDeps.cpp index 2acd593d14bf..5d7b75feb1bf 100644 --- a/sycl/unittests/scheduler/InOrderQueueDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueDeps.cpp @@ -191,35 +191,4 @@ TEST_F(SchedulerTest, InOrderQueueNoSchedulerPath) { EXPECT_EQ(KernelEventListSize[1] /*EventsCount*/, 0u); } -// Test that barrier is not filtered out when waitlist contains an event -// produced by command which is bypassing the scheduler. -TEST_F(SchedulerTest, BypassSchedulerWithBarrier) { - sycl::unittest::UrMock<> Mock; - sycl::platform Plt = sycl::platform(); - - mock::getCallbacks().set_before_callback( - "urEnqueueEventsWaitWithBarrierExt", - &redefinedEnqueueEventsWaitWithBarrierExt); - BarrierCalled = false; - - context Ctx{Plt}; - queue Q1{Ctx, default_selector_v, property::queue::in_order()}; - queue Q2{Ctx, default_selector_v, property::queue::in_order()}; - static constexpr size_t Size = 10; - - int *X = malloc_host(Size, Ctx); - - // Submit a command which bypasses the scheduler. - auto FillEvent = Q2.memset(X, 0, sizeof(int) * Size); - // Submit a barrier which depends on that event. - ExpectedEvent = detail::getSyclObjImpl(FillEvent)->getHandle(); - auto BarrierQ1 = Q1.ext_oneapi_submit_barrier({FillEvent}); - Q1.wait(); - Q2.wait(); - // Verify that barrier is not filtered out. - EXPECT_EQ(BarrierCalled, true); - - free(X, Ctx); -} - } // anonymous namespace diff --git a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp index cf0c91e1478c..25dc08dc3ffd 100644 --- a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp @@ -23,11 +23,8 @@ using namespace sycl; size_t GEventsWaitCounter = 0; -inline ur_result_t redefinedEventsWait(void *pParams) { - auto params = *static_cast(pParams); - if (*params.pnumEvents > 0) { - GEventsWaitCounter++; - } +inline ur_result_t redefinedEventsWaitWithBarrier(void *pParams) { + GEventsWaitCounter++; return UR_RESULT_SUCCESS; } @@ -35,7 +32,8 @@ TEST_F(SchedulerTest, InOrderQueueHostTaskDeps) { GEventsWaitCounter = 0; sycl::unittest::UrMock<> Mock; sycl::platform Plt = sycl::platform(); - mock::getCallbacks().set_before_callback("urEventWait", &redefinedEventsWait); + mock::getCallbacks().set_before_callback("urEnqueueEventsWaitWithBarrier", + &redefinedEventsWaitWithBarrier); context Ctx{Plt}; queue InOrderQueue{Ctx, default_selector_v, property::queue::in_order()}; @@ -46,7 +44,13 @@ TEST_F(SchedulerTest, InOrderQueueHostTaskDeps) { InOrderQueue.submit([&](sycl::handler &CGH) { CGH.host_task([=] {}); }) .wait(); - EXPECT_EQ(GEventsWaitCounter, 1u); + size_t expectedCount = 1u; + + // OpenCL needs to store all events so does not need a barrier + if (Ctx.get_platform().get_backend() == backend::opencl) + expectedCount = 0u; + + EXPECT_EQ(GEventsWaitCounter, expectedCount); } enum class CommandType { KERNEL = 1, MEMSET = 2 }; diff --git a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp index 2cb53e082d6d..06c2b3eb70ae 100644 --- a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp +++ b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp @@ -26,7 +26,7 @@ class MockQueueImpl : public sycl::detail::queue_impl { const sycl::async_handler &AsyncHandler, const sycl::property_list &PropList) : sycl::detail::queue_impl(Device, AsyncHandler, PropList) {} - using sycl::detail::queue_impl::finalizeHandler; + using sycl::detail::queue_impl::finalizeHandlerInOrderHostTaskUnlocked; }; // Define type with the only methods called by finalizeHandler @@ -86,12 +86,14 @@ TEST_F(SchedulerTest, InOrderQueueSyncCheck) { LimitedHandlerSimulation MockCGH{detail::CGType::CodeplayHostTask, Queue}; EXPECT_CALL(MockCGH, depends_on(An())) .Times(0); - Queue->finalizeHandler(MockCGH, std::nullopt); + Queue->finalizeHandlerInOrderHostTaskUnlocked( + MockCGH); } { LimitedHandlerSimulation MockCGH{detail::CGType::CodeplayHostTask, Queue}; EXPECT_CALL(MockCGH, depends_on(An())) .Times(1); - Queue->finalizeHandler(MockCGH, std::nullopt); + Queue->finalizeHandlerInOrderHostTaskUnlocked( + MockCGH); } }