diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 01a54347bfdba..a13485d7b927e 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -282,18 +282,22 @@ event queue_impl::memcpyFromDeviceGlobal( DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest); } -sycl::detail::optional queue_impl::getLastEvent() { +sycl::detail::optional +queue_impl::getLastEvent(const std::shared_ptr &Self) { // The external event is required to finish last if set, so it is considered // the last event if present. if (std::optional ExternalEvent = MInOrderExternalEvent.read()) return ExternalEvent; std::lock_guard Lock{MMutex}; - if (MGraph.expired() && !MDefaultGraphDeps.LastEventPtr) + if (MEmpty) return std::nullopt; - if (!MGraph.expired() && MExtGraphDeps.LastEventPtr) - return detail::createSyclObjFromImpl(MExtGraphDeps.LastEventPtr); - return detail::createSyclObjFromImpl(MDefaultGraphDeps.LastEventPtr); + auto &LastEvent = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr + : MExtGraphDeps.LastEventPtr; + if (LastEvent) + return detail::createSyclObjFromImpl(LastEvent); + // We insert a marker to represent an event at end. + return detail::createSyclObjFromImpl(insertMarkerEvent(Self)); } void queue_impl::addEvent(const event &Event) { @@ -344,9 +348,49 @@ event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF, HandlerImpl->MEventMode = SubmitInfo.EventMode(); - auto Event = finalizeHandler(Handler, SubmitInfo.PostProcessorFunc()); + auto isHostTask = Type == CGType::CodeplayHostTask; + + // TODO: this shouldn't be needed but without this + // the legacy adapter doesn't synchronize the operations properly + // when non-immediate command lists are used. + auto isGraphSubmission = Type == CGType::ExecCommandBuffer; + + auto requiresPostProcess = SubmitInfo.PostProcessorFunc() || Streams.size(); + auto noLastEventPath = !isHostTask && !isGraphSubmission && + MNoEventMode.load(std::memory_order_relaxed) && + !requiresPostProcess; - addEvent(Event); + if (noLastEventPath) { + std::unique_lock Lock(MMutex); + + // Check if we are still in no event mode. There could + // have been a concurrent submit. + if (MNoEventMode.load(std::memory_order_relaxed)) { + return finalizeHandlerInOrderNoEventsUnlocked(Handler); + } + } + + event Event; + if (!isInOrder()) { + Event = finalizeHandlerOutOfOrder(Handler); + addEvent(Event); + } else { + if (isHostTask) { + std::unique_lock Lock(MMutex); + Event = finalizeHandlerInOrderHostTaskUnlocked(Handler); + } else { + std::unique_lock Lock(MMutex); + + if (!isGraphSubmission && trySwitchingToNoEventsMode()) { + Event = finalizeHandlerInOrderNoEventsUnlocked(Handler); + } else { + Event = finalizeHandlerInOrderWithDepsUnlocked(Handler); + } + } + } + + if (SubmitInfo.PostProcessorFunc()) + handlerPostProcess(Handler, SubmitInfo.PostProcessorFunc(), Event); const auto &EventImpl = detail::getSyclObjImpl(Event); for (auto &Stream : Streams) { @@ -370,63 +414,14 @@ event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF, #ifndef __INTEL_PREVIEW_BREAKING_CHANGES event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF, const std::shared_ptr &Self, - const std::shared_ptr &PrimaryQueue, + const std::shared_ptr &, const std::shared_ptr &SecondaryQueue, bool CallerNeedsEvent, const detail::code_location &Loc, bool IsTopCodeLoc, const SubmissionInfo &SubmitInfo) { -#ifdef __INTEL_PREVIEW_BREAKING_CHANGES - detail::handler_impl HandlerImplVal(PrimaryQueue.get(), CallerNeedsEvent); - detail::handler_impl *HandlerImpl = &HandlerImplVal; - handler Handler(HandlerImpl, Self); -#else - handler Handler(Self, CallerNeedsEvent); - auto &HandlerImpl = detail::getSyclObjImpl(Handler); -#endif - -#if XPTI_ENABLE_INSTRUMENTATION - if (xptiTraceEnabled()) { - Handler.saveCodeLoc(Loc, IsTopCodeLoc); - } -#endif - - { - NestedCallsTracker tracker; - CGF(Handler); - } - - // Scheduler will later omit events, that are not required to execute tasks. - // Host and interop tasks, however, are not submitted to low-level runtimes - // and require separate dependency management. - const CGType Type = HandlerImpl->MCGType; - std::vector Streams; - if (Type == CGType::Kernel) - Streams = std::move(Handler.MStreamStorage); - - HandlerImpl->MEventMode = SubmitInfo.EventMode(); - - auto Event = finalizeHandler(Handler, SubmitInfo.PostProcessorFunc()); - - addEvent(Event); - - const auto &EventImpl = detail::getSyclObjImpl(Event); - for (auto &Stream : Streams) { - // We don't want stream flushing to be blocking operation that is why submit - // a host task to print stream buffer. It will fire up as soon as the kernel - // finishes execution. - auto L = [&](handler &ServiceCGH) { - Stream->generateFlushCommand(ServiceCGH); - }; - detail::type_erased_cgfo_ty CGF{L}; - event FlushEvent = - submit_impl(CGF, Self, PrimaryQueue, SecondaryQueue, - /*CallerNeedsEvent*/ true, Loc, IsTopCodeLoc, {}); - EventImpl->attachEventToCompleteWeak(detail::getSyclObjImpl(FlushEvent)); - registerStreamServiceEvent(detail::getSyclObjImpl(FlushEvent)); - } - - return Event; + return submit_impl(CGF, Self, SecondaryQueue.get(), CallerNeedsEvent, Loc, + IsTopCodeLoc, SubmitInfo); } #endif @@ -467,24 +462,19 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, const std::vector &ExpandedDepEvents = getExtendDependencyList(DepEvents, MutableDepEvents, Lock); + MEmpty = false; + // If we have a command graph set we need to capture the op through the // handler rather than by-passing the scheduler. if (MGraph.expired() && Scheduler::areEventsSafeForSchedulerBypass( ExpandedDepEvents, MContext)) { - if (!CallerNeedsEvent && supportsDiscardingPiEvents()) { + auto isNoEventsMode = trySwitchingToNoEventsMode(); + if (!CallerNeedsEvent && isNoEventsMode) { NestedCallsTracker tracker; MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents), /*PiEvent*/ nullptr); - event DiscardedEvent = createDiscardedEvent(); - if (isInOrder()) { - // Store the discarded event for proper in-order dependency tracking. - auto &EventToStoreIn = MGraph.expired() - ? MDefaultGraphDeps.LastEventPtr - : MExtGraphDeps.LastEventPtr; - EventToStoreIn = detail::getSyclObjImpl(DiscardedEvent); - } - return DiscardedEvent; + return createDiscardedEvent(); } event ResEvent = prepareSYCLEventAssociatedWithQueue(Self); @@ -509,7 +499,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, } } - if (isInOrder()) { + if (isInOrder() && !isNoEventsMode) { auto &EventToStoreIn = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr : MExtGraphDeps.LastEventPtr; EventToStoreIn = EventImpl; @@ -637,9 +627,11 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { } std::vector> WeakEvents; + EventImplPtr LastEvent; { std::lock_guard Lock(MMutex); WeakEvents.swap(MEventsWeak); + LastEvent = MDefaultGraphDeps.LastEventPtr; MMissedCleanupRequests.unset( [&](MissedCleanupRequestsType &MissedCleanupRequests) { @@ -664,6 +656,11 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { } } } + + if (LastEvent) { + LastEvent->wait(LastEvent); + } + const AdapterPtr &Adapter = getAdapter(); Adapter->call(getHandleRef()); @@ -754,18 +751,14 @@ ur_native_handle_t queue_impl::getNative(int32_t &NativeHandleDesc) const { } bool queue_impl::queue_empty() const { - // If we have in-order queue where events are not discarded then just check - // the status of the last event. + // If we have in-order queue with non-empty last event, just check its status. if (isInOrder()) { std::lock_guard Lock(MMutex); - // If there is no last event we know that no work has been submitted, so it - // must be trivially empty. - if (!MDefaultGraphDeps.LastEventPtr) + if (MEmpty) return true; - // Otherwise, check if the last event is finished. - // Note that we fall back to the backend query if the event was discarded, - // which may happend despite the queue not being a discard event queue. - if (!MDefaultGraphDeps.LastEventPtr->isDiscarded()) + + if (MDefaultGraphDeps.LastEventPtr && + !MDefaultGraphDeps.LastEventPtr->isDiscarded()) return MDefaultGraphDeps.LastEventPtr ->get_info() == info::event_command_status::complete; @@ -778,6 +771,11 @@ bool queue_impl::queue_empty() const { if (!IsReady) return false; + // If got here, it means that LastEventPtr is nullptr (so no possible Host + // Tasks) and there is nothing executing on the device. + if (isInOrder()) + return true; + // We may have events like host tasks which are not submitted to the backend // queue so we need to get their status separately. std::lock_guard Lock(MMutex); diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 8a0a1476c2ee1..50e418c5911cf 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -164,9 +164,12 @@ class queue_impl { // different instance ID until this gets added. constructorNotification(); #endif + + trySwitchingToNoEventsMode(); } - sycl::detail::optional getLastEvent(); + sycl::detail::optional + getLastEvent(const std::shared_ptr &Self); public: /// Constructs a SYCL queue from adapter interoperability handle. @@ -227,6 +230,8 @@ class queue_impl { // different instance ID until this gets added. constructorNotification(); #endif + + trySwitchingToNoEventsMode(); } ~queue_impl() { @@ -593,6 +598,12 @@ class queue_impl { std::lock_guard Lock(MMutex); MGraph = Graph; MExtGraphDeps.reset(); + + if (Graph) { + MNoEventMode = false; + } else { + trySwitchingToNoEventsMode(); + } } std::shared_ptr @@ -681,49 +692,131 @@ class queue_impl { } template - event finalizeHandlerInOrder(HandlerType &Handler) { - // Accessing and changing of an event isn't atomic operation. - // Hence, here is the lock for thread-safety. - std::lock_guard Lock{MMutex}; + void synchronizeWithExternalEvent(HandlerType &Handler) { + // If there is an external event set, add it as a dependency and clear it. + // We do not need to hold the lock as MLastEventMtx will ensure the last + // event reflects the corresponding external event dependence as well. + std::optional ExternalEvent = popExternalEvent(); + if (ExternalEvent) + Handler.depends_on(*ExternalEvent); + } + + bool trySwitchingToNoEventsMode() { + if (MNoEventMode.load(std::memory_order_relaxed)) + return true; + + if (!MGraph.expired() || !isInOrder()) + return false; + + if (MDefaultGraphDeps.LastEventPtr != nullptr && + !Scheduler::CheckEventReadiness(MContext, + MDefaultGraphDeps.LastEventPtr)) + return false; + + MNoEventMode.store(true, std::memory_order_relaxed); + MDefaultGraphDeps.LastEventPtr = nullptr; + return true; + } + + template + event finalizeHandlerInOrderNoEventsUnlocked(HandlerType &Handler) { + assert(isInOrder()); + assert(MGraph.expired()); + assert(MDefaultGraphDeps.LastEventPtr == nullptr || + MContext->getBackend() == backend::opencl); + assert(MNoEventMode); + + MEmpty = false; + + synchronizeWithExternalEvent(Handler); + + if (MContext->getBackend() == backend::opencl && MGraph.expired()) { + // This is needed to support queue_empty() call + auto event = Handler.finalize(); + if (!getSyclObjImpl(event)->isDiscarded()) { + MDefaultGraphDeps.LastEventPtr = getSyclObjImpl(event); + } + return event; + } else { + return Handler.finalize(); + } + } + + template + event finalizeHandlerInOrderHostTaskUnlocked(HandlerType &Handler) { + assert(isInOrder()); + assert(Handler.getType() == CGType::CodeplayHostTask); auto &EventToBuildDeps = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr : MExtGraphDeps.LastEventPtr; - // This dependency is needed for the following purposes: - // - host tasks are handled by the runtime and cannot be implicitly - // synchronized by the backend. - // - to prevent the 2nd kernel enqueue when the 1st kernel is blocked - // by a host task. This dependency allows to build the enqueue order in - // the RT but will not be passed to the backend. See getPIEvents in - // Command. - if (EventToBuildDeps) { - // In the case where the last event was discarded and we are to run a - // host_task, we insert a barrier into the queue and use the resulting - // event as the dependency for the host_task. - // Note that host_task events can never be discarded, so this will not - // insert barriers between host_task enqueues. - if (EventToBuildDeps->isDiscarded() && - Handler.getType() == CGType::CodeplayHostTask) - EventToBuildDeps = insertHelperBarrier(Handler); - + if (EventToBuildDeps && Handler.getType() != CGType::AsyncAlloc) { + // We are not in no-event mode, so we can use the last event. // depends_on after an async alloc is explicitly disallowed. Async alloc // handles in order queue dependencies preemptively, so we skip them. // Note: This could be improved by moving the handling of dependencies // to before calling the CGF. - if (!EventToBuildDeps->isDiscarded() && - !(Handler.getType() == CGType::AsyncAlloc)) - Handler.depends_on(EventToBuildDeps); + Handler.depends_on(EventToBuildDeps); + } else if (MNoEventMode) { + // There might be some operations submitted to the queue + // but the LastEventPtr is not set. If we are to run a host_task, + // we need to insert a barrier to ensure proper synchronization. + Handler.depends_on(insertHelperBarrier(Handler)); } - // If there is an external event set, add it as a dependency and clear it. - // We do not need to hold the lock as MLastEventMtx will ensure the last - // event reflects the corresponding external event dependence as well. - std::optional ExternalEvent = popExternalEvent(); - if (ExternalEvent) - Handler.depends_on(*ExternalEvent); + MEmpty = false; + MNoEventMode = false; + + synchronizeWithExternalEvent(Handler); + + auto Event = Handler.finalize(); + EventToBuildDeps = getSyclObjImpl(Event); + assert(!EventToBuildDeps->isDiscarded()); + return Event; + } + + template + event finalizeHandlerInOrderWithDepsUnlocked(HandlerType &Handler) { + // this is handled by finalizeHandlerInOrderHostTask + assert(Handler.getType() != CGType::CodeplayHostTask); + + if (Handler.getType() == CGType::ExecCommandBuffer && MNoEventMode) { + // TODO: this shouldn't be needed but without this + // the legacy adapter doesn't synchronize the operations properly + // when non-immediate command lists are used. + Handler.depends_on(insertHelperBarrier(Handler)); + } + + auto &EventToBuildDeps = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr + : MExtGraphDeps.LastEventPtr; + + // depends_on after an async alloc is explicitly disallowed. Async alloc + // handles in order queue dependencies preemptively, so we skip them. + // Note: This could be improved by moving the handling of dependencies + // to before calling the CGF. + if (EventToBuildDeps && Handler.getType() != CGType::AsyncAlloc) { + // If we have last event, this means we are no longer in no-event mode. + assert(!MNoEventMode); + Handler.depends_on(EventToBuildDeps); + } + + MEmpty = false; + + synchronizeWithExternalEvent(Handler); auto EventRet = Handler.finalize(); - EventToBuildDeps = getSyclObjImpl(EventRet); + + if (getSyclObjImpl(EventRet)->isDiscarded()) { + EventToBuildDeps = nullptr; + } else { + MNoEventMode = false; + EventToBuildDeps = getSyclObjImpl(EventRet); + + // TODO: if the event is NOP we should be able to discard it as well. + // However, NOP events are used to describe ordering for graph operations + // Once https://github.com/intel/llvm/issues/18330 is fixed, we can + // start relying on command buffer in-order property instead. + } return EventRet; } @@ -732,6 +825,9 @@ class queue_impl { event finalizeHandlerOutOfOrder(HandlerType &Handler) { const CGType Type = getSyclObjImpl(Handler)->MCGType; std::lock_guard Lock{MMutex}; + + MEmpty = false; + // The following code supports barrier synchronization if host task is // involved in the scenario. Native barriers cannot handle host task // dependency so in the case where some commands were not enqueued @@ -766,9 +862,9 @@ class queue_impl { } template - event finalizeHandlerPostProcess( - HandlerType &Handler, - const optional &PostProcessorFunc) { + void handlerPostProcess(HandlerType &Handler, + const optional &PostProcessorFunc, + event &Event) { bool IsKernel = Handler.getType() == CGType::Kernel; bool KernelUsesAssert = false; @@ -779,26 +875,8 @@ class queue_impl { ProgramManager::getInstance().kernelUsesAssert( Handler.MKernelName.data()); - auto Event = MIsInorder ? finalizeHandlerInOrder(Handler) - : finalizeHandlerOutOfOrder(Handler); - auto &PostProcess = *PostProcessorFunc; - PostProcess(IsKernel, KernelUsesAssert, Event); - - return Event; - } - - // template is needed for proper unit testing - template - event finalizeHandler(HandlerType &Handler, - const optional &PostProcessorFunc) { - if (PostProcessorFunc) { - return finalizeHandlerPostProcess(Handler, PostProcessorFunc); - } else { - return MIsInorder ? finalizeHandlerInOrder(Handler) - : finalizeHandlerOutOfOrder(Handler); - } } #ifndef __INTEL_PREVIEW_BREAKING_CHANGES @@ -967,6 +1045,15 @@ class queue_impl { const bool MIsInorder; + // Specifies whether this queue records last event. This can only + // be true if the queue is in-order, the command graph is not + // associated with the queue and there has never been any host + // tasks submitted to the queue. + std::atomic MNoEventMode = false; + + // Used exclusively in getLastEvent and queue_empty() implementations + bool MEmpty = true; + std::vector MStreamsServiceEvents; std::mutex MStreamsServiceEventsMutex; diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 2d3e7d17668af..aefbf112bb769 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -671,8 +671,8 @@ EventImplPtr Scheduler::addCommandGraphUpdate( return NewCmdEvent; } -bool CheckEventReadiness(const ContextImplPtr &Context, - const EventImplPtr &SyclEventImplPtr) { +bool Scheduler::CheckEventReadiness(const ContextImplPtr &Context, + const EventImplPtr &SyclEventImplPtr) { // Events that don't have an initialized context are throwaway events that // don't represent actual dependencies. Calling getContextImpl() would set // their context, which we wish to avoid as it is expensive. diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 5b657c1f13b93..a8f7235ac0d05 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -477,6 +477,9 @@ class Scheduler { const QueueImplPtr &Queue, std::vector Requirements, std::vector &Events); + static bool CheckEventReadiness(const ContextImplPtr &Context, + const EventImplPtr &SyclEventImplPtr); + static bool areEventsSafeForSchedulerBypass(const std::vector &DepEvents, const ContextImplPtr &Context); diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 983966937db62..bd3e02df6c84e 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -322,7 +322,7 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) { assert(!QueueImpl->hasCommandGraph() && "Should not be called in on graph recording."); - sycl::detail::optional LastEvent = QueueImpl->getLastEvent(); + sycl::detail::optional LastEvent = QueueImpl->getLastEvent(QueueImpl); if (LastEvent) return *LastEvent; @@ -340,10 +340,7 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) { /// group is being enqueued on. event queue::ext_oneapi_submit_barrier(const detail::code_location &CodeLoc) { if (is_in_order() && !impl->hasCommandGraph() && !impl->MIsProfilingEnabled) { - event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl); - // If the last event was discarded, fall back to enqueuing a barrier. - if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded()) - return InOrderLastEvent; + return getBarrierEventForInorderQueueHelper(impl); } return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(); }, CodeLoc); @@ -368,10 +365,7 @@ event queue::ext_oneapi_submit_barrier(const std::vector &WaitList, }); if (is_in_order() && !impl->hasCommandGraph() && !impl->MIsProfilingEnabled && AllEventsEmptyOrNop) { - event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl); - // If the last event was discarded, fall back to enqueuing a barrier. - if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded()) - return InOrderLastEvent; + return getBarrierEventForInorderQueueHelper(impl); } return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); }, @@ -453,20 +447,7 @@ sycl::detail::optional queue::ext_oneapi_get_last_event_impl() const { make_error_code(errc::invalid), "ext_oneapi_get_last_event() can only be called on in-order queues."); - sycl::detail::optional LastEvent = impl->getLastEvent(); - - // If there was no last event, the queue is yet to have any work submitted and - // we return a std::nullopt. - if (!LastEvent) - return std::nullopt; - - // If the last event was discarded or a NOP, we insert a marker to represent - // an event at end. - auto LastEventImpl = detail::getSyclObjImpl(*LastEvent); - if (LastEventImpl->isDiscarded() || LastEventImpl->isNOP()) - LastEvent = - detail::createSyclObjFromImpl(impl->insertMarkerEvent(impl)); - return LastEvent; + return impl->getLastEvent(impl); } void queue::ext_oneapi_set_external_event(const event &external_event) { diff --git a/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp b/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp index e4d33dc0a7659..17758071aed69 100644 --- a/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp +++ b/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp @@ -36,8 +36,13 @@ int Check(const sycl::queue &Q, const char *CheckName, const F &CheckFunc) { << std::endl; return 1; } - if (*E != *LastEvent) { - std::cout << "Failed " << CheckName << std::endl; + if (LastEvent->get_info() == + sycl::info::event_command_status::complete && + E->get_info() != + sycl::info::event_command_status::complete) { + std::cout << "ext_oneapi_get_last_event() returned an event that is " + "complete, but the event returned by CheckFunc() is not." + << std::endl; return 1; } return 0; diff --git a/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp b/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp index 4d15e3b0ce6a2..3ceea0c28f3f3 100644 --- a/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp +++ b/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp @@ -1,10 +1,5 @@ // RUN: %{build} -o %t.out -// RUN: env SYCL_UR_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s - -// Test to check that we don't insert unnecessary urEnqueueEventsWaitWithBarrier -// calls if queue is in-order and wait list is empty. - -// CHECK-NOT: <--- urEnqueueEventsWaitWithBarrier +// RUN: %{run} %t.out #include @@ -15,6 +10,17 @@ namespace syclex = sycl::ext::oneapi::experimental; +bool checkBarrierEvent(sycl::backend backend, sycl::event LastEvent, + sycl::event BarrierEvent, bool noEventMode) { + // In noEventMode or when using opencl backend, + // barrier will always return last event + if (backend == sycl::backend::opencl || !noEventMode) { + return BarrierEvent == LastEvent; + } else { + return BarrierEvent != LastEvent; + } +} + int main() { sycl::queue Q({sycl::property::queue::in_order{}}); int *Res = sycl::malloc_host(1, Q); @@ -26,13 +32,13 @@ int main() { cgh.single_task([=]() { *Res += 9; }); }); auto BarrierEvent1 = Q.ext_oneapi_submit_barrier(); - assert(BarrierEvent1 == Event1); + assert(checkBarrierEvent(Q.get_backend(), Event1, BarrierEvent1, true)); auto Event2 = Q.submit([&](sycl::handler &cgh) { cgh.single_task([=]() { *Res *= 2; }); }); auto BarrierEvent2 = Q.ext_oneapi_submit_barrier(); - assert(BarrierEvent2 == Event2); + assert(checkBarrierEvent(Q.get_backend(), Event1, BarrierEvent1, true)); BarrierEvent2.wait(); // Check that kernel events are completed after waiting for barrier event. @@ -50,7 +56,8 @@ int main() { auto Event1 = Q.submit( [&](sycl::handler &CGH) { CGH.host_task([&] { *Res += 1; }); }); auto BarrierEvent1 = Q.ext_oneapi_submit_barrier(); - assert(Event1 == BarrierEvent1); + assert(checkBarrierEvent(Q.get_backend(), Event1, BarrierEvent1, + false /* host tasks used */)); auto Event2 = Q.submit([&](sycl::handler &CGH) { CGH.fill(Res, 10, 1); }); Q.wait(); diff --git a/sycl/unittests/Extensions/CMakeLists.txt b/sycl/unittests/Extensions/CMakeLists.txt index b415ae27dbfae..86f71310f2ae2 100644 --- a/sycl/unittests/Extensions/CMakeLists.txt +++ b/sycl/unittests/Extensions/CMakeLists.txt @@ -12,7 +12,6 @@ add_sycl_unittest(ExtensionsTests OBJECT CompositeDevice.cpp OneAPIProd.cpp EnqueueFunctionsEvents.cpp - DiscardEvent.cpp ProfilingTag.cpp KernelProperties.cpp NoDeviceIPVersion.cpp diff --git a/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp b/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp index f65018d63fb35..c50b8c1e99371 100644 --- a/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp +++ b/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp @@ -387,12 +387,6 @@ TEST_F(CommandGraphTest, InOrderQueueHostTaskAndGraph) { [&](sycl::handler &CGH) { CGH.ext_oneapi_graph(InOrderGraphExec); }); auto EventGraphImpl = sycl::detail::getSyclObjImpl(EventGraph); - auto EventGraphWaitList = EventGraphImpl->getWaitList(); - // Previous task is a host task. Explicit dependency is needed to enforce - // the execution order. - ASSERT_EQ(EventGraphWaitList.size(), 1lu); - ASSERT_EQ(EventGraphWaitList[0], EventInitialImpl); - auto EventLast = InOrderQueue.submit( [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); auto EventLastImpl = sycl::detail::getSyclObjImpl(EventLast); @@ -402,10 +396,6 @@ TEST_F(CommandGraphTest, InOrderQueueHostTaskAndGraph) { // submission to the backend). if (BlockHostTask) Lock.unlock(); - ASSERT_EQ(EventLastWaitList.size(), size_t(BlockHostTask)); - if (EventLastWaitList.size()) { - ASSERT_EQ(EventLastWaitList[0], EventGraphImpl); - } EventLast.wait(); }; @@ -474,21 +464,6 @@ TEST_F(CommandGraphTest, InOrderQueueMemsetAndGraph) { auto InOrderGraphExec = InOrderGraph.finalize(); auto EventGraph = InOrderQueue.submit( [&](sycl::handler &CGH) { CGH.ext_oneapi_graph(InOrderGraphExec); }); - - auto EventGraphImpl = sycl::detail::getSyclObjImpl(EventGraph); - auto EventGraphWaitList = EventGraphImpl->getWaitList(); - // Previous task is a memset. Explicit dependency is needed to enforce the - // execution order. - ASSERT_EQ(EventGraphWaitList.size(), 1lu); - ASSERT_EQ(EventGraphWaitList[0], EventInitialImpl); - - auto EventLast = - InOrderQueue.memcpy(TestData, TestDataHost.data(), Size * sizeof(int)); - auto EventLastImpl = sycl::detail::getSyclObjImpl(EventLast); - auto EventLastWaitList = EventLastImpl->getWaitList(); - // Previous task is not a host task. In Order queue dependency is managed by - // the backend for non-host kernels. - ASSERT_EQ(EventLastWaitList.size(), 0lu); } TEST_F(CommandGraphTest, InOrderQueueMemcpyAndGraph) { @@ -553,19 +528,4 @@ TEST_F(CommandGraphTest, InOrderQueueMemcpyAndGraph) { auto InOrderGraphExec = InOrderGraph.finalize(); auto EventGraph = InOrderQueue.submit( [&](sycl::handler &CGH) { CGH.ext_oneapi_graph(InOrderGraphExec); }); - - auto EventGraphImpl = sycl::detail::getSyclObjImpl(EventGraph); - auto EventGraphWaitList = EventGraphImpl->getWaitList(); - // Previous task is a memcpy. Explicit dependency is needed to enforce the - // execution order - ASSERT_EQ(EventGraphWaitList.size(), 1lu); - ASSERT_EQ(EventGraphWaitList[0], EventInitialImpl); - - auto EventLast = - InOrderQueue.memcpy(TestData, TestDataHost.data(), Size * sizeof(int)); - auto EventLastImpl = sycl::detail::getSyclObjImpl(EventLast); - auto EventLastWaitList = EventLastImpl->getWaitList(); - // Previous task is not a host task. In Order queue dependency is managed by - // the backend for non-host kernels. - ASSERT_EQ(EventLastWaitList.size(), 0lu); } diff --git a/sycl/unittests/Extensions/DiscardEvent.cpp b/sycl/unittests/Extensions/DiscardEvent.cpp deleted file mode 100644 index 73a2436cc0d49..0000000000000 --- a/sycl/unittests/Extensions/DiscardEvent.cpp +++ /dev/null @@ -1,79 +0,0 @@ -//==------------------------- DiscardEvent.cpp -----------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "sycl/platform.hpp" -#include -#include - -#include - -#include -#include -#include - -using namespace sycl; - -namespace oneapiext = ext::oneapi::experimental; - -namespace { - -thread_local size_t counter_urEnqueueKernelLaunch = 0; -inline ur_result_t redefined_urEnqueueKernelLaunch(void *pParams) { - ++counter_urEnqueueKernelLaunch; - auto params = *static_cast(pParams); - EXPECT_EQ(*params.pphEvent, nullptr); - return UR_RESULT_SUCCESS; -} - -thread_local size_t counter_urEnqueueEventsWaitWithBarrier = 0; -thread_local std::chrono::time_point - timestamp_urEnqueueEventsWaitWithBarrier; -inline ur_result_t after_urEnqueueEventsWaitWithBarrier(void *) { - ++counter_urEnqueueEventsWaitWithBarrier; - timestamp_urEnqueueEventsWaitWithBarrier = std::chrono::steady_clock::now(); - return UR_RESULT_SUCCESS; -} - -class DiscardEventTests : public ::testing::Test { -public: - DiscardEventTests() - : Mock{}, Q{context(sycl::platform()), default_selector_v, - property::queue::in_order{}} {} - -protected: - void SetUp() override { - counter_urEnqueueKernelLaunch = 0; - counter_urEnqueueEventsWaitWithBarrier = 0; - } - - unittest::UrMock<> Mock; - queue Q; -}; - -TEST_F(DiscardEventTests, BarrierBeforeHostTask) { - // Special test for case where host_task need an event after, so a barrier is - // enqueued to create a usable event. - mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", - &redefined_urEnqueueKernelLaunch); - mock::getCallbacks().set_after_callback( - "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); - - oneapiext::single_task>(Q, []() {}); - - std::chrono::time_point HostTaskTimestamp; - Q.submit([&](handler &CGH) { - CGH.host_task( - [&]() { HostTaskTimestamp = std::chrono::steady_clock::now(); }); - }).wait(); - - ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - ASSERT_EQ(counter_urEnqueueEventsWaitWithBarrier, size_t{1}); - ASSERT_TRUE(HostTaskTimestamp > timestamp_urEnqueueEventsWaitWithBarrier); -} - -} // namespace diff --git a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp index 1b0eb368b350a..e170628b83a7c 100644 --- a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp +++ b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp @@ -38,14 +38,6 @@ class EnqueueFunctionsEventsTests : public ::testing::Test { queue Q; }; -inline void CheckLastEventDiscarded(sycl::queue &Q) { - auto QueueImplPtr = sycl::detail::getSyclObjImpl(Q); - sycl::detail::optional LastEvent = QueueImplPtr->getLastEvent(); - ASSERT_TRUE(LastEvent.has_value()); - auto LastEventImplPtr = sycl::detail::getSyclObjImpl(*LastEvent); - ASSERT_TRUE(LastEventImplPtr->isDiscarded()); -} - TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskNoEvent) { mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", &redefined_urEnqueueKernelLaunch); @@ -55,8 +47,6 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskNoEvent) { }); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); } TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutNoEvent) { @@ -66,8 +56,6 @@ TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutNoEvent) { oneapiext::single_task>(Q, []() {}); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); } TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskKernelNoEvent) { @@ -87,8 +75,6 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskKernelNoEvent) { [&](handler &CGH) { oneapiext::single_task(CGH, Kernel); }); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); } TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutKernelNoEvent) { @@ -108,8 +94,6 @@ TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutKernelNoEvent) { oneapiext::single_task(Q, Kernel); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); } TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForNoEvent) { @@ -121,8 +105,6 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForNoEvent) { }); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); } TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutNoEvent) { @@ -132,8 +114,6 @@ TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutNoEvent) { oneapiext::parallel_for>(Q, range<1>{32}, [](item<1>) {}); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); } TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForKernelNoEvent) { @@ -154,8 +134,6 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForKernelNoEvent) { }); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); } TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutKernelNoEvent) { @@ -175,8 +153,6 @@ TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutKernelNoEvent) { oneapiext::parallel_for(Q, range<1>{32}, Kernel); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); } TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchNoEvent) { @@ -189,8 +165,6 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchNoEvent) { }); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); } TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutNoEvent) { @@ -201,8 +175,6 @@ TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutNoEvent) { [](nd_item<1>) {}); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); } TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchKernelNoEvent) { @@ -223,8 +195,6 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchKernelNoEvent) { }); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); } TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutKernelNoEvent) { @@ -244,8 +214,6 @@ TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutKernelNoEvent) { oneapiext::nd_launch(Q, nd_range<1>{range<1>{32}, range<1>{32}}, Kernel); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Q); } TEST_F(EnqueueFunctionsEventsTests, SubmitMemcpyNoEvent) { @@ -262,8 +230,6 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitMemcpyNoEvent) { ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); - CheckLastEventDiscarded(Q); - free(Src, Q); free(Dst, Q); } @@ -280,8 +246,6 @@ TEST_F(EnqueueFunctionsEventsTests, MemcpyShortcutNoEvent) { ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); - CheckLastEventDiscarded(Q); - free(Src, Q); free(Dst, Q); } @@ -299,8 +263,6 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitCopyNoEvent) { ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); - CheckLastEventDiscarded(Q); - free(Src, Q); free(Dst, Q); } @@ -317,8 +279,6 @@ TEST_F(EnqueueFunctionsEventsTests, CopyShortcutNoEvent) { ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); - CheckLastEventDiscarded(Q); - free(Src, Q); free(Dst, Q); } @@ -336,8 +296,6 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitMemsetNoEvent) { ASSERT_EQ(counter_urUSMEnqueueFill, size_t{1}); - CheckLastEventDiscarded(Q); - free(Dst, Q); } @@ -352,8 +310,6 @@ TEST_F(EnqueueFunctionsEventsTests, MemsetShortcutNoEvent) { ASSERT_EQ(counter_urUSMEnqueueFill, size_t{1}); - CheckLastEventDiscarded(Q); - free(Dst, Q); } @@ -369,8 +325,6 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitPrefetchNoEvent) { ASSERT_EQ(counter_urUSMEnqueuePrefetch, size_t{1}); - CheckLastEventDiscarded(Q); - free(Dst, Q); } @@ -385,8 +339,6 @@ TEST_F(EnqueueFunctionsEventsTests, PrefetchShortcutNoEvent) { ASSERT_EQ(counter_urUSMEnqueuePrefetch, size_t{1}); - CheckLastEventDiscarded(Q); - free(Dst, Q); } @@ -403,8 +355,6 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitMemAdviseNoEvent) { ASSERT_EQ(counter_urUSMEnqueueMemAdvise, size_t{1}); - CheckLastEventDiscarded(Q); - free(Dst, Q); } @@ -419,8 +369,6 @@ TEST_F(EnqueueFunctionsEventsTests, MemAdviseShortcutNoEvent) { ASSERT_EQ(counter_urUSMEnqueueMemAdvise, size_t{1}); - CheckLastEventDiscarded(Q); - free(Dst, Q); } diff --git a/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp b/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp index e3b94d7cbb535..56696c712248a 100644 --- a/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp +++ b/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp @@ -73,8 +73,6 @@ TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchTaskNoEvent) { }); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); } TEST_F(FreeFunctionCommandsEventsTests, LaunchTaskShortcutNoEvent) { @@ -83,8 +81,6 @@ TEST_F(FreeFunctionCommandsEventsTests, LaunchTaskShortcutNoEvent) { sycl::khr::launch_task(Queue, TestFunctor()); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); } TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchTaskKernelNoEvent) { @@ -104,8 +100,6 @@ TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchTaskKernelNoEvent) { }); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); } TEST_F(FreeFunctionCommandsEventsTests, LaunchTaskShortcutKernelNoEvent) { @@ -125,8 +119,6 @@ TEST_F(FreeFunctionCommandsEventsTests, LaunchTaskShortcutKernelNoEvent) { sycl::khr::launch_task(Queue, Kernel); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); } TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchForNoEvent) { @@ -139,8 +131,6 @@ TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchForNoEvent) { }); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); } TEST_F(FreeFunctionCommandsEventsTests, LaunchForShortcutNoEvent) { @@ -150,8 +140,6 @@ TEST_F(FreeFunctionCommandsEventsTests, LaunchForShortcutNoEvent) { sycl::khr::launch(Queue, sycl::range<1>{32}, TestFunctor()); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); } TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchForKernelNoEvent) { @@ -172,8 +160,6 @@ TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchForKernelNoEvent) { }); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); } TEST_F(FreeFunctionCommandsEventsTests, LaunchForShortcutKernelNoEvent) { @@ -193,8 +179,6 @@ TEST_F(FreeFunctionCommandsEventsTests, LaunchForShortcutKernelNoEvent) { sycl::khr::launch(Queue, sycl::range<1>{32}, Kernel); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); } TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchGroupedNoEvent) { @@ -207,8 +191,6 @@ TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchGroupedNoEvent) { }); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); } TEST_F(FreeFunctionCommandsEventsTests, LaunchGroupedShortcutNoEvent) { @@ -219,8 +201,6 @@ TEST_F(FreeFunctionCommandsEventsTests, LaunchGroupedShortcutNoEvent) { TestFunctor()); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); } TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchGroupedKernelNoEvent) { @@ -242,8 +222,6 @@ TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchGroupedKernelNoEvent) { }); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); } TEST_F(FreeFunctionCommandsEventsTests, LaunchGroupedShortcutKernelNoEvent) { @@ -264,8 +242,6 @@ TEST_F(FreeFunctionCommandsEventsTests, LaunchGroupedShortcutKernelNoEvent) { Kernel); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); - - CheckLastEventDiscarded(Queue); } TEST_F(FreeFunctionCommandsEventsTests, SubmitMemcpyNoEvent) { @@ -281,7 +257,7 @@ TEST_F(FreeFunctionCommandsEventsTests, SubmitMemcpyNoEvent) { }); ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); - CheckLastEventDiscarded(Queue); + free(Src, Queue); free(Dst, Queue); } @@ -298,8 +274,6 @@ TEST_F(FreeFunctionCommandsEventsTests, MemcpyShortcutNoEvent) { ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); - CheckLastEventDiscarded(Queue); - free(Src, Queue); free(Dst, Queue); } @@ -318,8 +292,6 @@ TEST_F(FreeFunctionCommandsEventsTests, SubmitCopyNoEvent) { ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); - CheckLastEventDiscarded(Queue); - free(Src, Queue); free(Dst, Queue); } @@ -336,8 +308,6 @@ TEST_F(FreeFunctionCommandsEventsTests, CopyShortcutNoEvent) { ASSERT_EQ(counter_urUSMEnqueueMemcpy, size_t{1}); - CheckLastEventDiscarded(Queue); - free(Src, Queue); free(Dst, Queue); } @@ -355,8 +325,6 @@ TEST_F(FreeFunctionCommandsEventsTests, SubmitMemsetNoEvent) { ASSERT_EQ(counter_urUSMEnqueueFill, size_t{1}); - CheckLastEventDiscarded(Queue); - free(Dst, Queue); } @@ -371,8 +339,6 @@ TEST_F(FreeFunctionCommandsEventsTests, MemsetShortcutNoEvent) { ASSERT_EQ(counter_urUSMEnqueueFill, size_t{1}); - CheckLastEventDiscarded(Queue); - free(Dst, Queue); } @@ -389,8 +355,6 @@ TEST_F(FreeFunctionCommandsEventsTests, SubmitPrefetchNoEvent) { ASSERT_EQ(counter_urUSMEnqueuePrefetch, size_t{1}); - CheckLastEventDiscarded(Queue); - free(Dst, Queue); } @@ -405,8 +369,6 @@ TEST_F(FreeFunctionCommandsEventsTests, PrefetchShortcutNoEvent) { ASSERT_EQ(counter_urUSMEnqueuePrefetch, size_t{1}); - CheckLastEventDiscarded(Queue); - free(Dst, Queue); } @@ -423,8 +385,6 @@ TEST_F(FreeFunctionCommandsEventsTests, SubmitMemAdviseNoEvent) { ASSERT_EQ(counter_urUSMEnqueueMemAdvise, size_t{1}); - CheckLastEventDiscarded(Queue); - free(Dst, Queue); } TEST_F(FreeFunctionCommandsEventsTests, MemAdviseShortcutNoEvent) { @@ -438,8 +398,6 @@ TEST_F(FreeFunctionCommandsEventsTests, MemAdviseShortcutNoEvent) { ASSERT_EQ(counter_urUSMEnqueueMemAdvise, size_t{1}); - CheckLastEventDiscarded(Queue); - free(Dst, Queue); } diff --git a/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionEventsHelpers.hpp b/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionEventsHelpers.hpp index 294c425072ad6..c45d72ea4c343 100644 --- a/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionEventsHelpers.hpp +++ b/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionEventsHelpers.hpp @@ -9,14 +9,6 @@ #include #include -inline void CheckLastEventDiscarded(sycl::queue &Q) { - auto QueueImplPtr = sycl::detail::getSyclObjImpl(Q); - sycl::detail::optional LastEvent = QueueImplPtr->getLastEvent(); - ASSERT_TRUE(LastEvent.has_value()); - auto LastEventImplPtr = sycl::detail::getSyclObjImpl(*LastEvent); - ASSERT_TRUE(LastEventImplPtr->isDiscarded()); -} - inline ur_result_t after_urKernelGetInfo(void *pParams) { auto params = *static_cast(pParams); constexpr char MockKernel[] = "TestKernel"; diff --git a/sycl/unittests/scheduler/InOrderQueueDeps.cpp b/sycl/unittests/scheduler/InOrderQueueDeps.cpp index 2acd593d14bf3..5d7b75feb1bf4 100644 --- a/sycl/unittests/scheduler/InOrderQueueDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueDeps.cpp @@ -191,35 +191,4 @@ TEST_F(SchedulerTest, InOrderQueueNoSchedulerPath) { EXPECT_EQ(KernelEventListSize[1] /*EventsCount*/, 0u); } -// Test that barrier is not filtered out when waitlist contains an event -// produced by command which is bypassing the scheduler. -TEST_F(SchedulerTest, BypassSchedulerWithBarrier) { - sycl::unittest::UrMock<> Mock; - sycl::platform Plt = sycl::platform(); - - mock::getCallbacks().set_before_callback( - "urEnqueueEventsWaitWithBarrierExt", - &redefinedEnqueueEventsWaitWithBarrierExt); - BarrierCalled = false; - - context Ctx{Plt}; - queue Q1{Ctx, default_selector_v, property::queue::in_order()}; - queue Q2{Ctx, default_selector_v, property::queue::in_order()}; - static constexpr size_t Size = 10; - - int *X = malloc_host(Size, Ctx); - - // Submit a command which bypasses the scheduler. - auto FillEvent = Q2.memset(X, 0, sizeof(int) * Size); - // Submit a barrier which depends on that event. - ExpectedEvent = detail::getSyclObjImpl(FillEvent)->getHandle(); - auto BarrierQ1 = Q1.ext_oneapi_submit_barrier({FillEvent}); - Q1.wait(); - Q2.wait(); - // Verify that barrier is not filtered out. - EXPECT_EQ(BarrierCalled, true); - - free(X, Ctx); -} - } // anonymous namespace diff --git a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp index cf0c91e1478c5..25dc08dc3ffd8 100644 --- a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp @@ -23,11 +23,8 @@ using namespace sycl; size_t GEventsWaitCounter = 0; -inline ur_result_t redefinedEventsWait(void *pParams) { - auto params = *static_cast(pParams); - if (*params.pnumEvents > 0) { - GEventsWaitCounter++; - } +inline ur_result_t redefinedEventsWaitWithBarrier(void *pParams) { + GEventsWaitCounter++; return UR_RESULT_SUCCESS; } @@ -35,7 +32,8 @@ TEST_F(SchedulerTest, InOrderQueueHostTaskDeps) { GEventsWaitCounter = 0; sycl::unittest::UrMock<> Mock; sycl::platform Plt = sycl::platform(); - mock::getCallbacks().set_before_callback("urEventWait", &redefinedEventsWait); + mock::getCallbacks().set_before_callback("urEnqueueEventsWaitWithBarrier", + &redefinedEventsWaitWithBarrier); context Ctx{Plt}; queue InOrderQueue{Ctx, default_selector_v, property::queue::in_order()}; @@ -46,7 +44,13 @@ TEST_F(SchedulerTest, InOrderQueueHostTaskDeps) { InOrderQueue.submit([&](sycl::handler &CGH) { CGH.host_task([=] {}); }) .wait(); - EXPECT_EQ(GEventsWaitCounter, 1u); + size_t expectedCount = 1u; + + // OpenCL needs to store all events so does not need a barrier + if (Ctx.get_platform().get_backend() == backend::opencl) + expectedCount = 0u; + + EXPECT_EQ(GEventsWaitCounter, expectedCount); } enum class CommandType { KERNEL = 1, MEMSET = 2 }; diff --git a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp index 2cb53e082d6df..0dd7b8d99f446 100644 --- a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp +++ b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp @@ -26,7 +26,7 @@ class MockQueueImpl : public sycl::detail::queue_impl { const sycl::async_handler &AsyncHandler, const sycl::property_list &PropList) : sycl::detail::queue_impl(Device, AsyncHandler, PropList) {} - using sycl::detail::queue_impl::finalizeHandler; + using sycl::detail::queue_impl::finalizeHandlerInOrderHostTaskUnlocked; }; // Define type with the only methods called by finalizeHandler @@ -85,13 +85,15 @@ TEST_F(SchedulerTest, InOrderQueueSyncCheck) { { LimitedHandlerSimulation MockCGH{detail::CGType::CodeplayHostTask, Queue}; EXPECT_CALL(MockCGH, depends_on(An())) - .Times(0); - Queue->finalizeHandler(MockCGH, std::nullopt); + .Times(1); + Queue->finalizeHandlerInOrderHostTaskUnlocked( + MockCGH); } { LimitedHandlerSimulation MockCGH{detail::CGType::CodeplayHostTask, Queue}; EXPECT_CALL(MockCGH, depends_on(An())) .Times(1); - Queue->finalizeHandler(MockCGH, std::nullopt); + Queue->finalizeHandlerInOrderHostTaskUnlocked( + MockCGH); } }