@@ -318,7 +318,12 @@ fill_copy_args(detail::handler_impl *impl,
318318
319319#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
320320handler::handler (detail::handler_impl &HandlerImpl) : impl(&HandlerImpl) {}
321+ handler::handler (std::unique_ptr<detail::handler_impl> &&HandlerImpl)
322+ : implOwner(std::move(HandlerImpl)), impl(implOwner.get()) {}
321323#else
324+ handler::handler (std::unique_ptr<detail::handler_impl> &&HandlerImpl)
325+ : impl(std::move(HandlerImpl)) {}
326+
322327handler::handler (std::shared_ptr<detail::queue_impl> Queue,
323328 bool CallerNeedsEvent)
324329 : impl(std::make_shared<detail::handler_impl>(*Queue, nullptr ,
@@ -344,6 +349,7 @@ handler::handler(
344349 : impl(std::make_shared<detail::handler_impl>(*Graph)) {}
345350
346351#endif
352+ handler::~handler () = default ;
347353
348354// Sets the submission state to indicate that an explicit kernel bundle has been
349355// set. Throws a sycl::exception with errc::invalid if the current state
@@ -426,12 +432,6 @@ detail::EventImplPtr handler::finalize() {
426432#else
427433event handler::finalize () {
428434#endif
429- // This block of code is needed only for reduction implementation.
430- // It is harmless (does nothing) for everything else.
431- if (MIsFinalized)
432- return MLastEvent;
433- MIsFinalized = true ;
434-
435435 const auto &type = getType ();
436436 detail::queue_impl *Queue = impl->get_queue_or_null ();
437437 ext::oneapi::experimental::detail::graph_impl *Graph =
@@ -559,13 +559,6 @@ event handler::finalize() {
559559 std::vector<ur_event_handle_t > RawEvents = detail::Command::getUrEvents (
560560 impl->CGData .MEvents , impl->get_queue_or_null (), false );
561561
562- #ifdef __INTEL_PREVIEW_BREAKING_CHANGES
563- detail::EventImplPtr &LastEventImpl = MLastEvent;
564- #else
565- const detail::EventImplPtr &LastEventImpl =
566- detail::getSyclObjImpl (MLastEvent);
567- #endif
568-
569562 bool DiscardEvent =
570563 !impl->MEventNeeded && impl->get_queue ().supportsDiscardingPiEvents ();
571564 if (DiscardEvent) {
@@ -577,11 +570,10 @@ event handler::finalize() {
577570 DiscardEvent = !KernelUsesAssert;
578571 }
579572
580- #ifdef __INTEL_PREVIEW_BREAKING_CHANGES
581- if (!DiscardEvent) {
582- LastEventImpl = detail::event_impl::create_completed_host_event ();
583- }
584- #endif
573+ std::shared_ptr<detail::event_impl> ResultEvent =
574+ DiscardEvent
575+ ? nullptr
576+ : detail::event_impl::create_device_event (impl->get_queue ());
585577
586578#ifdef XPTI_ENABLE_INSTRUMENTATION
587579 const bool xptiEnabled = xptiTraceEnabled ();
@@ -612,9 +604,8 @@ event handler::finalize() {
612604 enqueueImpKernel (
613605 impl->get_queue (), impl->MNDRDesc , impl->MArgs , KernelBundleImpPtr,
614606 MKernel.get (), toKernelNameStrT (MKernelName),
615- impl->MKernelNameBasedCachePtr , RawEvents,
616- DiscardEvent ? nullptr : LastEventImpl.get (), nullptr ,
617- impl->MKernelCacheConfig , impl->MKernelIsCooperative ,
607+ impl->MKernelNameBasedCachePtr , RawEvents, ResultEvent.get (),
608+ nullptr , impl->MKernelCacheConfig , impl->MKernelIsCooperative ,
618609 impl->MKernelUsesClusterLaunch , impl->MKernelWorkGroupMemorySize ,
619610 BinImage, impl->MKernelFuncPtr , impl->MKernelNumArgs ,
620611 impl->MKernelParamDescGetter , impl->MKernelHasSpecialCaptures );
@@ -624,7 +615,7 @@ event handler::finalize() {
624615 if (!DiscardEvent) {
625616 detail::emitInstrumentationGeneral (
626617 StreamID, InstanceID, CmdTraceEvent, xpti::trace_signal,
627- static_cast <const void *>(LastEventImpl ->getHandle ()));
618+ static_cast <const void *>(ResultEvent ->getHandle ()));
628619 }
629620 detail::emitInstrumentationGeneral (StreamID, InstanceID,
630621 CmdTraceEvent,
@@ -635,29 +626,32 @@ event handler::finalize() {
635626
636627 if (DiscardEvent) {
637628 EnqueueKernel ();
638- #ifndef __INTEL_PREVIEW_BREAKING_CHANGES
639- LastEventImpl->setStateDiscarded ();
640- #endif
641629 } else {
642630 detail::queue_impl &Queue = impl->get_queue ();
643- LastEventImpl ->setQueue (Queue);
644- LastEventImpl ->setWorkerQueue (Queue.weak_from_this ());
645- LastEventImpl ->setContextImpl (impl->get_context ());
646- LastEventImpl ->setStateIncomplete ();
647- LastEventImpl ->setSubmissionTime ();
631+ ResultEvent ->setQueue (Queue);
632+ ResultEvent ->setWorkerQueue (Queue.weak_from_this ());
633+ ResultEvent ->setContextImpl (impl->get_context ());
634+ ResultEvent ->setStateIncomplete ();
635+ ResultEvent ->setSubmissionTime ();
648636
649637 EnqueueKernel ();
650- LastEventImpl ->setEnqueued ();
638+ ResultEvent ->setEnqueued ();
651639 // connect returned event with dependent events
652640 if (!Queue.isInOrder ()) {
653641 // MEvents is not used anymore, so can move.
654- LastEventImpl ->getPreparedDepsEvents () =
642+ ResultEvent ->getPreparedDepsEvents () =
655643 std::move (impl->CGData .MEvents );
656- // LastEventImpl is local for current thread, no need to lock.
657- LastEventImpl ->cleanDepEventsThroughOneLevelUnlocked ();
644+ // ResultEvent is local for current thread, no need to lock.
645+ ResultEvent ->cleanDepEventsThroughOneLevelUnlocked ();
658646 }
659647 }
660- return MLastEvent;
648+ #ifdef __INTEL_PREVIEW_BREAKING_CHANGES
649+ return ResultEvent;
650+ #else
651+ return detail::createSyclObjFromImpl<event>(
652+ ResultEvent ? ResultEvent
653+ : detail::event_impl::create_discarded_event ());
654+ #endif
661655 }
662656 }
663657
@@ -939,11 +933,10 @@ event handler::finalize() {
939933 std::move (CommandGroup), *Queue, !DiscardEvent);
940934
941935#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
942- MLastEvent = DiscardEvent ? nullptr : Event;
936+ return DiscardEvent ? nullptr : Event;
943937#else
944- MLastEvent = detail::createSyclObjFromImpl<event>(Event);
938+ return detail::createSyclObjFromImpl<event>(Event);
945939#endif
946- return MLastEvent;
947940}
948941
949942void handler::addReduction (const std::shared_ptr<const void > &ReduObj) {
@@ -2474,58 +2467,28 @@ __SYCL_EXPORT void HandlerAccess::preProcess(handler &CGH,
24742467 AuxHandler.copyCodeLoc (CGH);
24752468 F (AuxHandler);
24762469 auto E = AuxHandler.finalize ();
2477- assert (!CGH.MIsFinalized &&
2478- " Can't do pre-processing if the command has been enqueued already!" );
24792470 if (EventNeeded)
24802471 CGH.depends_on (E);
24812472}
24822473__SYCL_EXPORT void HandlerAccess::postProcess (handler &CGH,
24832474 type_erased_cgfo_ty F) {
2484- // The "hacky" `handler`s manipulation mentioned near the declaration in
2485- // `handler.hpp` and implemented here is far from perfect. A better approach
2486- // would be
2487- //
2488- // bool OrigNeedsEvent = CGH.needsEvent()
2489- // assert(CGH.not_finalized/enqueued());
2490- // if (!InOrderQueue)
2491- // CGH.setNeedsEvent()
2492- //
2493- // handler PostProcessHandler(Queue, OrigNeedsEvent)
2494- // auto E = CGH.finalize(); // enqueue original or current last
2495- // // post-process
2496- // if (!InOrder)
2497- // PostProcessHandler.depends_on(E)
2498- //
2499- // swap_impls(CGH, PostProcessHandler)
2500- // return; // queue::submit finalizes PostProcessHandler and returns its
2501- // // event if necessary.
2502- //
2503- // Still hackier than "real" `queue::submit` but at least somewhat sane.
2504- // That, however hasn't been tried yet and we have an even hackier approach
2505- // copied from what's been done in an old reductions implementation before
2506- // eventless submission work has started. Not sure how feasible the approach
2507- // above is at this moment.
2508-
2509- // This `finalize` is wrong (at least logically) if
2510- // `assert(!CGH.eventNeeded())`
2511- auto E = CGH.finalize ();
2475+ bool EventNeeded = CGH.impl ->MEventNeeded ;
25122476 queue_impl &Q = CGH.impl ->get_queue ();
25132477 bool InOrder = Q.isInOrder ();
2514- // Cannot use `CGH.eventNeeded()` alone as there might be subsequent
2515- // `postProcess` calls and we cannot address them properly similarly to the
2516- // `finalize` issue described above. `swap_impls` suggested above might be
2517- // able to handle this scenario naturally.
2518- #ifdef __INTEL_PREVIEW_BREAKING_CHANGES
2519- handler_impl HandlerImpl{Q, nullptr , CGH.eventNeeded () || !InOrder};
2520- handler AuxHandler{HandlerImpl};
2521- #else
2522- handler AuxHandler{Q.shared_from_this (), CGH.eventNeeded () || !InOrder};
2523- #endif
25242478 if (!InOrder)
2525- AuxHandler.depends_on (E);
2526- AuxHandler.copyCodeLoc (CGH);
2527- F (AuxHandler);
2528- CGH.MLastEvent = AuxHandler.finalize ();
2479+ CGH.impl ->MEventNeeded = true ;
2480+
2481+ handler PostProcessHandler{
2482+ std::make_unique<handler_impl>(Q, nullptr , EventNeeded)};
2483+ PostProcessHandler.copyCodeLoc (CGH);
2484+ // Extend lifetimes of auxiliary resources till the last kernel in the chain
2485+ // finishes:
2486+ PostProcessHandler.impl ->MAuxiliaryResources = CGH.impl ->MAuxiliaryResources ;
2487+ auto E = CGH.finalize ();
2488+ if (!InOrder)
2489+ PostProcessHandler.depends_on (E);
2490+ F (PostProcessHandler);
2491+ swap (CGH, PostProcessHandler);
25292492}
25302493} // namespace detail
25312494} // namespace _V1
0 commit comments