From f7ffa5213e2e9efec1a4ebb13c4e6484bbe74643 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Tue, 16 Jul 2024 18:02:48 +0200 Subject: [PATCH] [SYCL][NFCI][ABI-Break] Move handler members to impl (#14460) This moves some of the members in the handler class to its impl class. Doing so allows developers to change these arguments without breaking ABI. This also moves more implementation details, such as command-group classes, launch configuration information, argument information and HostTask tracking, into sources to avoid hard-to-find ABI breaks in the communication between headers and runtime library. In addition to this, the following improvements are made: * The HostKernel class has been simplified to no longer have call and runOnHost functions. * The HostTask wrapper class has been moved to sources and the owner has been changed from a unique_ptr to a shared_ptr, which prevents the need for including host_task_impl.hpp in odd places. --------- Signed-off-by: Larsen, Steffen --- sycl/include/sycl/detail/cg_types.hpp | 309 +-------- sycl/include/sycl/ext/oneapi/memcpy2d.hpp | 1 - sycl/include/sycl/group.hpp | 8 +- sycl/include/sycl/handler.hpp | 324 ++++----- sycl/include/sycl/queue.hpp | 6 - sycl/include/sycl/reduction.hpp | 8 - sycl/include/sycl/stream.hpp | 3 +- sycl/include/sycl/sycl.hpp | 3 +- sycl/include/syclcompat/memory.hpp | 1 - sycl/source/accessor.cpp | 4 +- sycl/{include/sycl => source}/detail/cg.hpp | 209 ++++-- .../detail/error_handling/error_handling.hpp | 2 +- sycl/source/detail/graph_impl.cpp | 37 +- sycl/source/detail/graph_impl.hpp | 162 ++--- sycl/source/detail/handler_impl.hpp | 44 ++ .../detail/host_task.hpp} | 41 +- sycl/source/detail/jit_compiler.cpp | 2 +- .../program_manager/program_manager.hpp | 1 + sycl/source/detail/queue_impl.cpp | 6 +- sycl/source/detail/queue_impl.hpp | 10 +- sycl/source/detail/scheduler/commands.cpp | 172 ++--- sycl/source/detail/scheduler/commands.hpp | 2 +- .../source/detail/scheduler/graph_builder.cpp | 8 +- sycl/source/detail/scheduler/scheduler.cpp | 6 +- sycl/source/detail/scheduler/scheduler.hpp | 2 +- sycl/source/handler.cpp | 641 ++++++++++-------- .../AsyncHandler/default_async_handler.cpp | 1 - .../accessor/Inputs/host_task_accessor.cpp | 1 - sycl/test-e2e/Basic/accessor/accessor.cpp | 1 - .../Basic/accessor/empty_acc_host_task.cpp | 1 - sycl/test-e2e/Basic/event.cpp | 1 - sycl/test-e2e/Basic/host-task-dependency.cpp | 1 - sycl/test-e2e/Basic/host_task_depends.cpp | 1 - .../Basic/out_of_order_queue_status.cpp | 1 - sycl/test-e2e/Basic/profile_host_task.cpp | 1 - .../host_task_sampled_image_read_linear.cpp | 2 - .../host_task_sampled_image_read_nearest.cpp | 2 - .../host_task_unsampled_image_read.cpp | 2 - .../host_task_unsampled_image_write.cpp | 2 - sycl/test-e2e/Graph/Inputs/host_task.cpp | 2 - sycl/test-e2e/Graph/Inputs/host_task2.cpp | 2 - .../Inputs/host_task2_multiple_roots.cpp | 2 - sycl/test-e2e/Graph/Inputs/host_task_last.cpp | 2 - .../Graph/Inputs/host_task_multiple_deps.cpp | 2 - .../Graph/Inputs/host_task_multiple_roots.cpp | 2 - .../Graph/Inputs/host_task_single.cpp | 2 - .../Graph/Inputs/host_task_successive.cpp | 2 - .../interop-level-zero-get-native-mem.cpp | 2 +- .../Graph/RecordReplay/host_task_in_order.cpp | 1 - ...r_queue_with_host_managed_dependencies.cpp | 1 - ..._with_host_managed_dependencies_memcpy.cpp | 1 - ..._with_host_managed_dependencies_memset.cpp | 1 - .../HostInteropTask/host-task-dependency2.cpp | 1 - .../HostInteropTask/host-task-dependency3.cpp | 1 - .../HostInteropTask/host-task-dependency4.cpp | 1 - .../HostInteropTask/host-task-failure.cpp | 1 - .../HostInteropTask/host-task-two-queues.cpp | 1 - sycl/test-e2e/HostInteropTask/host-task.cpp | 1 - .../interop-task-cuda-buffer-migrate.cpp | 2 +- .../HostInteropTask/interop-task-cuda.cpp | 2 +- .../HostInteropTask/interop-task-hip.cpp | 2 +- .../test-e2e/HostInteropTask/interop-task.cpp | 2 +- .../InOrderEventsExt/get_last_event.cpp | 1 - .../in_order_ext_oneapi_submit_barrier.cpp | 1 - .../in_order_usm_host_dependency.cpp | 1 - sycl/test-e2e/KernelFusion/sync_host_task.cpp | 1 - .../interop-level-zero-get-native-mem.cpp | 2 +- ...nterop-level-zero-image-get-native-mem.cpp | 2 +- sycl/test-e2e/Plugin/interop-opencl.cpp | 2 +- .../test-e2e/Scheduler/SubBufferRemapping.cpp | 1 - sycl/test-e2e/USM/host_task.cpp | 1 - sycl/test/abi/layout_handler.cpp | 220 ++---- sycl/test/abi/sycl_symbols_linux.dump | 16 +- sycl/test/abi/sycl_symbols_windows.dump | 18 +- sycl/test/abi/symbol_size_alignment.cpp | 4 +- sycl/test/abi/vtable.cpp | 24 - .../include_deps/sycl_detail_core.hpp.cpp | 5 +- .../Extensions/CommandGraph/Barrier.cpp | 14 +- .../Extensions/CommandGraph/CommandGraph.cpp | 2 +- sycl/unittests/event/EventDestruction.cpp | 1 - .../arg_mask/EliminatedArgMask.cpp | 15 +- sycl/unittests/scheduler/Commands.cpp | 2 +- .../scheduler/InOrderQueueSyncCheck.cpp | 14 +- .../scheduler/SchedulerTestUtils.hpp | 42 +- .../scheduler/StreamInitDependencyOnHost.cpp | 8 +- 85 files changed, 1102 insertions(+), 1360 deletions(-) rename sycl/{include/sycl => source}/detail/cg.hpp (76%) rename sycl/{include/sycl/detail/host_task_impl.hpp => source/detail/host_task.hpp} (50%) diff --git a/sycl/include/sycl/detail/cg_types.hpp b/sycl/include/sycl/detail/cg_types.hpp index 2a59324a3b0c..89049c7dbf68 100644 --- a/sycl/include/sycl/detail/cg_types.hpp +++ b/sycl/include/sycl/detail/cg_types.hpp @@ -37,117 +37,34 @@ class handler; namespace detail { class HostTask; -// The structure represents kernel argument. -class ArgDesc { -public: - ArgDesc(sycl::detail::kernel_param_kind_t Type, void *Ptr, int Size, - int Index) - : MType(Type), MPtr(Ptr), MSize(Size), MIndex(Index) {} - - sycl::detail::kernel_param_kind_t MType; - void *MPtr; - int MSize; - int MIndex; -}; - -// The structure represents NDRange - global, local sizes, global offset, -// number of dimensions, and the cluster dimensions if applicable. -class NDRDescT { - // The method initializes all sizes for dimensions greater than the passed one - // to the default values, so they will not affect execution. - void setNDRangeLeftover(int Dims_) { - for (int I = Dims_; I < 3; ++I) { - GlobalSize[I] = 1; - LocalSize[I] = LocalSize[0] ? 1 : 0; - GlobalOffset[I] = 0; - NumWorkGroups[I] = 0; - } - } - -public: - NDRDescT() - : GlobalSize{0, 0, 0}, LocalSize{0, 0, 0}, NumWorkGroups{0, 0, 0}, - Dims{0} {} - - template void set(sycl::range NumWorkItems) { - for (int I = 0; I < Dims_; ++I) { - GlobalSize[I] = NumWorkItems[I]; - LocalSize[I] = 0; - GlobalOffset[I] = 0; - NumWorkGroups[I] = 0; - } - setNDRangeLeftover(Dims_); - Dims = Dims_; - } - - // Initializes this ND range descriptor with given range of work items and - // offset. - template - void set(sycl::range NumWorkItems, sycl::id Offset) { - for (int I = 0; I < Dims_; ++I) { - GlobalSize[I] = NumWorkItems[I]; - LocalSize[I] = 0; - GlobalOffset[I] = Offset[I]; - NumWorkGroups[I] = 0; - } - setNDRangeLeftover(Dims_); - Dims = Dims_; - } - - template void set(sycl::nd_range ExecutionRange) { - for (int I = 0; I < Dims_; ++I) { - GlobalSize[I] = ExecutionRange.get_global_range()[I]; - LocalSize[I] = ExecutionRange.get_local_range()[I]; - GlobalOffset[I] = ExecutionRange.get_offset()[I]; - NumWorkGroups[I] = 0; - } - setNDRangeLeftover(Dims_); - Dims = Dims_; - } - - void set(int Dims_, sycl::nd_range<3> ExecutionRange) { - for (int I = 0; I < Dims_; ++I) { - GlobalSize[I] = ExecutionRange.get_global_range()[I]; - LocalSize[I] = ExecutionRange.get_local_range()[I]; - GlobalOffset[I] = ExecutionRange.get_offset()[I]; - NumWorkGroups[I] = 0; - } - setNDRangeLeftover(Dims_); - Dims = Dims_; - } - - template void setNumWorkGroups(sycl::range N) { - for (int I = 0; I < Dims_; ++I) { - GlobalSize[I] = 0; - // '0' is a mark to adjust before kernel launch when there is enough info: - LocalSize[I] = 0; - GlobalOffset[I] = 0; - NumWorkGroups[I] = N[I]; - } - setNDRangeLeftover(Dims_); - Dims = Dims_; - } - - template void setClusterDimensions(sycl::range N) { - if (Dims_ != Dims) { - throw std::runtime_error( - "Dimensionality of cluster, global and local ranges must be same"); - } - - for (int I = 0; I < Dims_; ++I) { - ClusterDimensions[I] = N[I]; - } - } - - sycl::range<3> GlobalSize; - sycl::range<3> LocalSize; - sycl::id<3> GlobalOffset; - /// Number of workgroups, used to record the number of workgroups from the - /// simplest form of parallel_for_work_group. If set, all other fields must be - /// zero - sycl::range<3> NumWorkGroups; - sycl::range<3> ClusterDimensions{1, 1, 1}; - size_t Dims; +/// Type of the command group. +/// NOTE: Changing the values of any of these enumerators is an API-break. +enum class CGType : unsigned int { + None = 0, + Kernel = 1, + CopyAccToPtr = 2, + CopyPtrToAcc = 3, + CopyAccToAcc = 4, + Barrier = 5, + BarrierWaitlist = 6, + Fill = 7, + UpdateHost = 8, + CopyUSM = 10, + FillUSM = 11, + PrefetchUSM = 12, + CodeplayHostTask = 14, + AdviseUSM = 15, + Copy2DUSM = 16, + Fill2DUSM = 17, + Memset2DUSM = 18, + CopyToDeviceGlobal = 19, + CopyFromDeviceGlobal = 20, + ReadWriteHostPipe = 21, + ExecCommandBuffer = 22, + CopyImage = 23, + SemaphoreWait = 24, + SemaphoreSignal = 25, + ProfilingTag = 26, }; template struct check_fn_signature { @@ -236,8 +153,6 @@ runKernelWithArg(KernelType KernelName, ArgType Arg) { // The pure virtual class aimed to store lambda/functors of any type. class HostKernelBase { public: - // The method executes lambda stored using NDRange passed. - virtual void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) = 0; // Return pointer to the lambda object. // Used to extract captured variables. virtual char *getPtr() = 0; @@ -255,177 +170,9 @@ class HostKernel : public HostKernelBase { public: HostKernel(KernelType Kernel) : MKernel(Kernel) {} - void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) override { - // adjust ND range for serial host: - NDRDescT AdjustedRange = NDRDesc; - - if (NDRDesc.GlobalSize[0] == 0 && NDRDesc.NumWorkGroups[0] != 0) { - // This is a special case - NDRange information is not complete, only the - // desired number of work groups is set by the user. Choose work group - // size (LocalSize), calculate the missing NDRange characteristics - // needed to invoke the kernel and adjust the NDRange descriptor - // accordingly. For some devices the work group size selection requires - // access to the device's properties, hence such late "adjustment". - range<3> WGsize{1, 1, 1}; // no better alternative for serial host? - AdjustedRange.set(NDRDesc.Dims, - nd_range<3>(NDRDesc.NumWorkGroups * WGsize, WGsize)); - } - // If local size for host is not set explicitly, let's adjust it to 1, - // so an exception for zero local size is not thrown. - if (AdjustedRange.LocalSize[0] == 0) - for (size_t I = 0; I < AdjustedRange.Dims; ++I) - AdjustedRange.LocalSize[I] = 1; - if (HPI) - HPI->start(); - runOnHost(AdjustedRange); - if (HPI) - HPI->end(); - } char *getPtr() override { return reinterpret_cast(&MKernel); } - template - typename std::enable_if_t> - runOnHost(const NDRDescT &) { - runKernelWithoutArg(MKernel); - } - - template - typename std::enable_if_t>> - runOnHost(const NDRDescT &NDRDesc) { - sycl::range Range(InitializedVal::template get<0>()); - sycl::id Offset; - sycl::range Stride( - InitializedVal::template get<1>()); // initialized to 1 - sycl::range UpperBound( - InitializedVal::template get<0>()); - for (int I = 0; I < Dims; ++I) { - Range[I] = NDRDesc.GlobalSize[I]; - Offset[I] = NDRDesc.GlobalOffset[I]; - UpperBound[I] = Range[I] + Offset[I]; - } - - detail::NDLoop::iterate( - /*LowerBound=*/Offset, Stride, UpperBound, - [&](const sycl::id &ID) { - sycl::item Item = - IDBuilder::createItem(Range, ID, Offset); - - runKernelWithArg &>(MKernel, ID); - }); - } - - template - typename std::enable_if_t>> - runOnHost(const NDRDescT &NDRDesc) { - sycl::id ID; - sycl::range Range(InitializedVal::template get<0>()); - for (int I = 0; I < Dims; ++I) - Range[I] = NDRDesc.GlobalSize[I]; - - detail::NDLoop::iterate(Range, [&](const sycl::id ID) { - sycl::item Item = - IDBuilder::createItem(Range, ID); - sycl::item ItemWithOffset = Item; - - runKernelWithArg>(MKernel, Item); - }); - } - - template - typename std::enable_if_t>> - runOnHost(const NDRDescT &NDRDesc) { - sycl::range Range(InitializedVal::template get<0>()); - sycl::id Offset; - sycl::range Stride( - InitializedVal::template get<1>()); // initialized to 1 - sycl::range UpperBound( - InitializedVal::template get<0>()); - for (int I = 0; I < Dims; ++I) { - Range[I] = NDRDesc.GlobalSize[I]; - Offset[I] = NDRDesc.GlobalOffset[I]; - UpperBound[I] = Range[I] + Offset[I]; - } - - detail::NDLoop::iterate( - /*LowerBound=*/Offset, Stride, UpperBound, - [&](const sycl::id &ID) { - sycl::item Item = - IDBuilder::createItem(Range, ID, Offset); - - runKernelWithArg>(MKernel, Item); - }); - } - - template - typename std::enable_if_t>> - runOnHost(const NDRDescT &NDRDesc) { - sycl::range GroupSize(InitializedVal::template get<0>()); - for (int I = 0; I < Dims; ++I) { - if (NDRDesc.LocalSize[I] == 0 || - NDRDesc.GlobalSize[I] % NDRDesc.LocalSize[I] != 0) - throw sycl::exception(make_error_code(errc::nd_range), - "Invalid local size for global size"); - GroupSize[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I]; - } - - sycl::range LocalSize(InitializedVal::template get<0>()); - sycl::range GlobalSize( - InitializedVal::template get<0>()); - sycl::id GlobalOffset; - for (int I = 0; I < Dims; ++I) { - GlobalOffset[I] = NDRDesc.GlobalOffset[I]; - LocalSize[I] = NDRDesc.LocalSize[I]; - GlobalSize[I] = NDRDesc.GlobalSize[I]; - } - - detail::NDLoop::iterate(GroupSize, [&](const id &GroupID) { - sycl::group Group = IDBuilder::createGroup( - GlobalSize, LocalSize, GroupSize, GroupID); - - detail::NDLoop::iterate(LocalSize, [&](const id &LocalID) { - id GlobalID = - GroupID * id{LocalSize} + LocalID + GlobalOffset; - const sycl::item GlobalItem = - IDBuilder::createItem(GlobalSize, GlobalID, - GlobalOffset); - const sycl::item LocalItem = - IDBuilder::createItem(LocalSize, LocalID); - const sycl::nd_item NDItem = - IDBuilder::createNDItem(GlobalItem, LocalItem, Group); - - runKernelWithArg>(MKernel, NDItem); - }); - }); - } - - template - std::enable_if_t>> - runOnHost(const NDRDescT &NDRDesc) { - sycl::range NGroups(InitializedVal::template get<0>()); - - for (int I = 0; I < Dims; ++I) { - if (NDRDesc.LocalSize[I] == 0 || - NDRDesc.GlobalSize[I] % NDRDesc.LocalSize[I] != 0) - throw sycl::exception(make_error_code(errc::nd_range), - "Invalid local size for global size"); - NGroups[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I]; - } - - sycl::range LocalSize(InitializedVal::template get<0>()); - sycl::range GlobalSize( - InitializedVal::template get<0>()); - for (int I = 0; I < Dims; ++I) { - LocalSize[I] = NDRDesc.LocalSize[I]; - GlobalSize[I] = NDRDesc.GlobalSize[I]; - } - detail::NDLoop::iterate(NGroups, [&](const id &GroupID) { - sycl::group Group = - IDBuilder::createGroup(GlobalSize, LocalSize, NGroups, GroupID); - runKernelWithArg>(MKernel, Group); - }); - } - ~HostKernel() = default; }; diff --git a/sycl/include/sycl/ext/oneapi/memcpy2d.hpp b/sycl/include/sycl/ext/oneapi/memcpy2d.hpp index f7399213ac88..3e29c20c390d 100644 --- a/sycl/include/sycl/ext/oneapi/memcpy2d.hpp +++ b/sycl/include/sycl/ext/oneapi/memcpy2d.hpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #pragma once -#include #include #include #include diff --git a/sycl/include/sycl/group.hpp b/sycl/include/sycl/group.hpp index 35c6bc9c9eac..7ce1ead1a229 100644 --- a/sycl/include/sycl/group.hpp +++ b/sycl/include/sycl/group.hpp @@ -673,13 +673,7 @@ template class __SYCL_TYPE(group) group { friend class detail::Builder; group(const range &G, const range &L, const range GroupRange, const id &I) - : globalRange(G), localRange(L), groupRange(GroupRange), index(I) { - // Make sure local range divides global without remainder: - __SYCL_ASSERT(((G % L).size() == 0) && - "global range is not multiple of local"); - __SYCL_ASSERT((((G / L) - GroupRange).size() == 0) && - "inconsistent group constructor arguments"); - } + : globalRange(G), localRange(L), groupRange(GroupRange), index(I) {} }; } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index ee796cb78464..3654d735e39b 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -175,10 +174,15 @@ class handler_impl; class kernel_impl; class queue_impl; class stream_impl; +class event_impl; template class image_accessor; class HandlerAccess; +class HostTask; + +using EventImplPtr = std::shared_ptr; + template static Arg member_ptr_helper(RetType (Func::*)(Arg) const); @@ -493,28 +497,24 @@ class __SYCL_EXPORT handler { /// \param Graph is a SYCL command_graph handler(std::shared_ptr Graph); - /// Stores copy of Arg passed to the CGData.MArgsStorage. - template void *storePlainArg(T &&Arg) { - CGData.MArgsStorage.emplace_back(sizeof(T)); - void *Storage = static_cast(CGData.MArgsStorage.back().data()); - std::memcpy(Storage, &Arg, sizeof(T)); - return Storage; - } + void *storeRawArg(const void *Ptr, size_t Size); void * storeRawArg(const sycl::ext::oneapi::experimental::raw_kernel_arg &RKA) { - CGData.MArgsStorage.emplace_back(RKA.MArgSize); - void *Storage = static_cast(CGData.MArgsStorage.back().data()); - std::memcpy(Storage, RKA.MArgData, RKA.MArgSize); - return Storage; + return storeRawArg(RKA.MArgData, RKA.MArgSize); + } + + /// Stores copy of Arg passed to the argument storage. + template void *storePlainArg(T &&Arg) { + return storeRawArg(&Arg, sizeof(T)); } - void setType(detail::CG::CGTYPE Type) { MCGType = Type; } + void setType(detail::CGType Type); - detail::CG::CGTYPE getType() { return MCGType; } + detail::CGType getType() const; void throwIfActionIsCreated() { - if (detail::CG::None != getType()) + if (detail::CGType::None != getType()) throw sycl::exception(make_error_code(errc::runtime), "Attempt to set multiple actions for the " "command group. Command group must consist of " @@ -662,8 +662,8 @@ class __SYCL_EXPORT handler { detail::getSyclObjImpl(LocalAccBase); detail::LocalAccessorImplHost *Req = LocalAccImpl.get(); MLocalAccStorage.push_back(std::move(LocalAccImpl)); - MArgs.emplace_back(detail::kernel_param_kind_t::kind_accessor, Req, - static_cast(access::target::local), ArgIndex); + addArg(detail::kernel_param_kind_t::kind_accessor, Req, + static_cast(access::target::local), ArgIndex); } // setArgHelper for local accessor argument (legacy accessor interface) @@ -699,31 +699,28 @@ class __SYCL_EXPORT handler { detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Arg; detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); detail::AccessorImplHost *Req = AccImpl.get(); - // Add accessor to the list of requirements. - CGData.MRequirements.push_back(Req); - // Store copy of the accessor. - CGData.MAccStorage.push_back(std::move(AccImpl)); + addAccessorReq(std::move(AccImpl)); // Add accessor to the list of arguments. - MArgs.emplace_back(detail::kernel_param_kind_t::kind_accessor, Req, - static_cast(AccessTarget), ArgIndex); + addArg(detail::kernel_param_kind_t::kind_accessor, Req, + static_cast(AccessTarget), ArgIndex); } template void setArgHelper(int ArgIndex, T &&Arg) { void *StoredArg = storePlainArg(Arg); if (!std::is_same::value && std::is_pointer::value) { - MArgs.emplace_back(detail::kernel_param_kind_t::kind_pointer, StoredArg, - sizeof(T), ArgIndex); + addArg(detail::kernel_param_kind_t::kind_pointer, StoredArg, sizeof(T), + ArgIndex); } else { - MArgs.emplace_back(detail::kernel_param_kind_t::kind_std_layout, - StoredArg, sizeof(T), ArgIndex); + addArg(detail::kernel_param_kind_t::kind_std_layout, StoredArg, sizeof(T), + ArgIndex); } } void setArgHelper(int ArgIndex, sampler &&Arg) { void *StoredArg = storePlainArg(Arg); - MArgs.emplace_back(detail::kernel_param_kind_t::kind_sampler, StoredArg, - sizeof(sampler), ArgIndex); + addArg(detail::kernel_param_kind_t::kind_sampler, StoredArg, + sizeof(sampler), ArgIndex); } // setArgHelper for graph dynamic_parameters @@ -745,8 +742,8 @@ class __SYCL_EXPORT handler { void setArgHelper(int ArgIndex, sycl::ext::oneapi::experimental::raw_kernel_arg &&Arg) { auto StoredArg = storeRawArg(Arg); - MArgs.emplace_back(detail::kernel_param_kind_t::kind_std_layout, StoredArg, - Arg.MArgSize, ArgIndex); + addArg(detail::kernel_param_kind_t::kind_std_layout, StoredArg, + Arg.MArgSize, ArgIndex); } /// Registers a dynamic parameter with the handler for later association with @@ -934,7 +931,7 @@ class __SYCL_EXPORT handler { // header, so don't perform things that require it. if (KernelHasName) { // TODO support ESIMD in no-integration-header case too. - MArgs.clear(); + clearArgs(); extractArgsAndReqsFromLambda(reinterpret_cast(KernelPtr), KI::getNumParams(), &KI::getParamDesc(0), KI::isESIMD()); @@ -944,7 +941,7 @@ class __SYCL_EXPORT handler { // accessors from the list(which are associated with this handler) as // arguments. We must copy the associated accessors as they are checked // later during finalize. - MArgs = MAssociatedAccesors; + setArgsToAssociatedAccessors(); } // If the kernel lambda is callable with a kernel_handler argument, manifest @@ -962,15 +959,14 @@ class __SYCL_EXPORT handler { template void checkAndSetClusterRange(const Properties &Props) { namespace syclex = sycl::ext::oneapi::experimental; - constexpr std::size_t cluster_dim = + constexpr std::size_t ClusterDim = syclex::detail::getClusterDim(); - if constexpr (cluster_dim > 0) { - setKernelUsesClusterLaunch(); - MNDRDesc.setClusterDimensions( - Props - .template get_property< - syclex::cuda::cluster_size_key>() - .get_cluster_size()); + if constexpr (ClusterDim > 0) { + auto ClusterSize = Props + .template get_property< + syclex::cuda::cluster_size_key>() + .get_cluster_size(); + setKernelClusterLaunch(padRange(ClusterSize), ClusterDim); } } @@ -1358,10 +1354,10 @@ class __SYCL_EXPORT handler { // __SYCL_ASSUME_INT can still be violated. So check the bounds // of the user range, instead of the rounded range. detail::checkValueRange(UserRange); - MNDRDesc.set(RoundedRange); + setNDRangeDescriptor(RoundedRange); StoreLambda( std::move(Wrapper)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); setNDRangeUsed(false); #endif } else @@ -1379,10 +1375,10 @@ class __SYCL_EXPORT handler { #ifndef __SYCL_DEVICE_ONLY__ processProperties(Props); detail::checkValueRange(UserRange); - MNDRDesc.set(std::move(UserRange)); + setNDRangeDescriptor(std::move(UserRange)); StoreLambda( std::move(KernelFunc)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); setNDRangeUsed(false); #endif #else @@ -1428,11 +1424,11 @@ class __SYCL_EXPORT handler { PropertiesT>(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ detail::checkValueRange(ExecutionRange); - MNDRDesc.set(std::move(ExecutionRange)); + setNDRangeDescriptor(std::move(ExecutionRange)); processProperties(Props); StoreLambda( std::move(KernelFunc)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); setNDRangeUsed(true); #endif } @@ -1449,8 +1445,8 @@ class __SYCL_EXPORT handler { throwIfActionIsCreated(); MKernel = detail::getSyclObjImpl(std::move(Kernel)); detail::checkValueRange(NumWorkItems); - MNDRDesc.set(std::move(NumWorkItems)); - setType(detail::CG::Kernel); + setNDRangeDescriptor(std::move(NumWorkItems)); + setType(detail::CGType::Kernel); setNDRangeUsed(false); extractArgsAndReqs(); MKernelName = getKernelName(); @@ -1487,9 +1483,9 @@ class __SYCL_EXPORT handler { #ifndef __SYCL_DEVICE_ONLY__ processProperties(Props); detail::checkValueRange(NumWorkGroups); - MNDRDesc.setNumWorkGroups(NumWorkGroups); + setNDRangeDescriptor(NumWorkGroups, /*SetNumWorkGroups=*/true); StoreLambda(std::move(KernelFunc)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); setNDRangeUsed(false); #endif // __SYCL_DEVICE_ONLY__ } @@ -1531,9 +1527,9 @@ class __SYCL_EXPORT handler { nd_range ExecRange = nd_range(NumWorkGroups * WorkGroupSize, WorkGroupSize); detail::checkValueRange(ExecRange); - MNDRDesc.set(std::move(ExecRange)); + setNDRangeDescriptor(std::move(ExecRange)); StoreLambda(std::move(KernelFunc)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); #endif // __SYCL_DEVICE_ONLY__ } @@ -1797,10 +1793,10 @@ class __SYCL_EXPORT handler { #ifndef __SYCL_DEVICE_ONLY__ // No need to check if range is out of INT_MAX limits as it's compile-time // known constant. - MNDRDesc.set(range<1>{1}); + setNDRangeDescriptor(range<1>{1}); processProperties(Props); StoreLambda(KernelFunc); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); #endif } @@ -1816,12 +1812,23 @@ class __SYCL_EXPORT handler { void setHandlerKernelBundle( const std::shared_ptr &NewKernelBundleImpPtr); + void SetHostTask(std::function &&Func); + void SetHostTask(std::function &&Func); + template std::enable_if_t, void()>::value || detail::check_fn_signature, void(interop_handle)>::value> - host_task_impl(FuncT &&Func); + host_task_impl(FuncT &&Func) { + throwIfActionIsCreated(); + + // Need to copy these rather than move so that we can check associated + // accessors during finalize + setArgsToAssociatedAccessors(); + + SetHostTask(std::move(Func)); + } /// @brief Get the command graph if any associated with this handler. It can /// come from either the associated queue or from being set explicitly through @@ -2062,10 +2069,10 @@ class __SYCL_EXPORT handler { kernel_parallel_for_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ detail::checkValueRange(NumWorkItems, WorkItemOffset); - MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset)); + setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset)); StoreLambda( std::move(KernelFunc)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); setNDRangeUsed(false); #endif } @@ -2123,9 +2130,9 @@ class __SYCL_EXPORT handler { setHandlerKernelBundle(Kernel); // No need to check if range is out of INT_MAX limits as it's compile-time // known constant - MNDRDesc.set(range<1>{1}); + setNDRangeDescriptor(range<1>{1}); MKernel = detail::getSyclObjImpl(std::move(Kernel)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); extractArgsAndReqs(); MKernelName = getKernelName(); } @@ -2157,8 +2164,8 @@ class __SYCL_EXPORT handler { throwIfActionIsCreated(); MKernel = detail::getSyclObjImpl(std::move(Kernel)); detail::checkValueRange(NumWorkItems, WorkItemOffset); - MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset)); - setType(detail::CG::Kernel); + setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset)); + setType(detail::CGType::Kernel); setNDRangeUsed(false); extractArgsAndReqs(); MKernelName = getKernelName(); @@ -2176,8 +2183,8 @@ class __SYCL_EXPORT handler { throwIfActionIsCreated(); MKernel = detail::getSyclObjImpl(std::move(Kernel)); detail::checkValueRange(NDRange); - MNDRDesc.set(std::move(NDRange)); - setType(detail::CG::Kernel); + setNDRangeDescriptor(std::move(NDRange)); + setType(detail::CGType::Kernel); setNDRangeUsed(true); extractArgsAndReqs(); MKernelName = getKernelName(); @@ -2202,9 +2209,9 @@ class __SYCL_EXPORT handler { #ifndef __SYCL_DEVICE_ONLY__ // No need to check if range is out of INT_MAX limits as it's compile-time // known constant - MNDRDesc.set(range<1>{1}); + setNDRangeDescriptor(range<1>{1}); MKernel = detail::getSyclObjImpl(std::move(Kernel)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); if (!lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); MKernelName = getKernelName(); @@ -2238,9 +2245,9 @@ class __SYCL_EXPORT handler { kernel_parallel_for_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ detail::checkValueRange(NumWorkItems); - MNDRDesc.set(std::move(NumWorkItems)); + setNDRangeDescriptor(std::move(NumWorkItems)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); setNDRangeUsed(false); if (!lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); @@ -2278,9 +2285,9 @@ class __SYCL_EXPORT handler { kernel_parallel_for_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ detail::checkValueRange(NumWorkItems, WorkItemOffset); - MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset)); + setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); setNDRangeUsed(false); if (!lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); @@ -2317,9 +2324,9 @@ class __SYCL_EXPORT handler { kernel_parallel_for_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ detail::checkValueRange(NDRange); - MNDRDesc.set(std::move(NDRange)); + setNDRangeDescriptor(std::move(NDRange)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); setNDRangeUsed(true); if (!lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); @@ -2360,10 +2367,10 @@ class __SYCL_EXPORT handler { kernel_parallel_for_work_group_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ detail::checkValueRange(NumWorkGroups); - MNDRDesc.setNumWorkGroups(NumWorkGroups); + setNDRangeDescriptor(NumWorkGroups, /*SetNumWorkGroups=*/true); MKernel = detail::getSyclObjImpl(std::move(Kernel)); StoreLambda(std::move(KernelFunc)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); #endif // __SYCL_DEVICE_ONLY__ } @@ -2403,10 +2410,10 @@ class __SYCL_EXPORT handler { nd_range ExecRange = nd_range(NumWorkGroups * WorkGroupSize, WorkGroupSize); detail::checkValueRange(ExecRange); - MNDRDesc.set(std::move(ExecRange)); + setNDRangeDescriptor(std::move(ExecRange)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); StoreLambda(std::move(KernelFunc)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); #endif // __SYCL_DEVICE_ONLY__ } @@ -2593,7 +2600,7 @@ class __SYCL_EXPORT handler { "Invalid accessor mode for the copy method."); // Make sure data shared_ptr points to is not released until we finish // work with it. - CGData.MSharedPtrStorage.push_back(Dst); + addLifetimeSharedPtrStorage(Dst); typename std::shared_ptr::element_type *RawDstPtr = Dst.get(); copy(Src, RawDstPtr); } @@ -2623,7 +2630,7 @@ class __SYCL_EXPORT handler { // device-copyable. // Make sure data shared_ptr points to is not released until we finish // work with it. - CGData.MSharedPtrStorage.push_back(Src); + addLifetimeSharedPtrStorage(Src); typename std::shared_ptr::element_type *RawSrcPtr = Src.get(); copy(RawSrcPtr, Dst); } @@ -2648,17 +2655,16 @@ class __SYCL_EXPORT handler { "Invalid accessor target for the copy method."); static_assert(isValidModeForSourceAccessor(AccessMode), "Invalid accessor mode for the copy method."); - setType(detail::CG::CopyAccToPtr); + setType(detail::CGType::CopyAccToPtr); detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Src; detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); - CGData.MRequirements.push_back(AccImpl.get()); MSrcPtr = static_cast(AccImpl.get()); MDstPtr = static_cast(Dst); // Store copy of accessor to the local storage to make sure it is alive // until we finish - CGData.MAccStorage.push_back(std::move(AccImpl)); + addAccessorReq(std::move(AccImpl)); } /// Copies the content of memory pointed by Src into the memory object @@ -2685,17 +2691,16 @@ class __SYCL_EXPORT handler { // TODO: Add static_assert with is_device_copyable when vec is // device-copyable. - setType(detail::CG::CopyPtrToAcc); + setType(detail::CGType::CopyPtrToAcc); detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Dst; detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); - CGData.MRequirements.push_back(AccImpl.get()); MSrcPtr = const_cast(Src); MDstPtr = static_cast(AccImpl.get()); // Store copy of accessor to the local storage to make sure it is alive // until we finish - CGData.MAccStorage.push_back(std::move(AccImpl)); + addAccessorReq(std::move(AccImpl)); } /// Copies the content of memory object accessed by Src to the memory @@ -2738,7 +2743,7 @@ class __SYCL_EXPORT handler { if (copyAccToAccHelper(Src, Dst)) return; - setType(detail::CG::CopyAccToAcc); + setType(detail::CGType::CopyAccToAcc); detail::AccessorBaseHost *AccBaseSrc = (detail::AccessorBaseHost *)&Src; detail::AccessorImplPtr AccImplSrc = detail::getSyclObjImpl(*AccBaseSrc); @@ -2746,14 +2751,12 @@ class __SYCL_EXPORT handler { detail::AccessorBaseHost *AccBaseDst = (detail::AccessorBaseHost *)&Dst; detail::AccessorImplPtr AccImplDst = detail::getSyclObjImpl(*AccBaseDst); - CGData.MRequirements.push_back(AccImplSrc.get()); - CGData.MRequirements.push_back(AccImplDst.get()); MSrcPtr = AccImplSrc.get(); MDstPtr = AccImplDst.get(); // Store copy of accessor to the local storage to make sure it is alive // until we finish - CGData.MAccStorage.push_back(std::move(AccImplSrc)); - CGData.MAccStorage.push_back(std::move(AccImplDst)); + addAccessorReq(std::move(AccImplSrc)); + addAccessorReq(std::move(AccImplDst)); } /// Provides guarantees that the memory object accessed via Acc is updated @@ -2771,14 +2774,13 @@ class __SYCL_EXPORT handler { throwIfActionIsCreated(); static_assert(isValidTargetForExplicitOp(AccessTarget), "Invalid accessor target for the update_host method."); - setType(detail::CG::UpdateHost); + setType(detail::CGType::UpdateHost); detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Acc; detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); MDstPtr = static_cast(AccImpl.get()); - CGData.MRequirements.push_back(AccImpl.get()); - CGData.MAccStorage.push_back(std::move(AccImpl)); + addAccessorReq(std::move(AccImpl)); } public: @@ -2859,7 +2861,7 @@ class __SYCL_EXPORT handler { /// complete state. void ext_oneapi_barrier() { throwIfActionIsCreated(); - setType(detail::CG::Barrier); + setType(detail::CGType::Barrier); } /// Prevents any commands submitted afterward to this queue from executing @@ -3287,31 +3289,14 @@ class __SYCL_EXPORT handler { uint64_t SignalValue); private: - std::shared_ptr MImpl; + std::shared_ptr impl; std::shared_ptr MQueue; - /// The storage for the arguments passed. - /// We need to store a copy of values that are passed explicitly through - /// set_arg, require and so on, because we need them to be alive after - /// we exit the method they are passed in. - mutable detail::CG::StorageInitHelper CGData; std::vector MLocalAccStorage; std::vector> MStreamStorage; - /// The list of arguments for the kernel. - std::vector MArgs; - /// The list of associated accessors with this handler. - /// These accessors were created with this handler as argument or - /// have become required for this handler via require method. - std::vector MAssociatedAccesors; - /// Struct that encodes global size, local size, ... - detail::NDRDescT MNDRDesc; detail::string MKernelName; /// Storage for a sycl::kernel object. std::shared_ptr MKernel; - /// Type of the command group, e.g. kernel, fill. Can also encode version. - /// Use getType and setType methods to access this variable unless - /// manipulations with version are required - detail::CG::CGTYPE MCGType = detail::CG::None; /// Pointer to the source host memory or accessor(depending on command type). void *MSrcPtr = nullptr; /// Pointer to the dest host memory or accessor(depends on command type). @@ -3322,22 +3307,6 @@ class __SYCL_EXPORT handler { std::vector MPattern; /// Storage for a lambda or function object. std::unique_ptr MHostKernel; - /// Storage for lambda/function when using HostTask - std::unique_ptr MHostTask; - /// The list of valid SYCL events that need to complete - /// before barrier command can be executed - std::vector MEventsWaitWithBarrier; - - /// The graph that is associated with this handler. - std::shared_ptr MGraph; - /// If we are submitting a graph using ext_oneapi_graph this will be the graph - /// to be executed. - std::shared_ptr - MExecGraph; - /// Storage for a node created from a subgraph submission. - std::shared_ptr MSubgraphNode; - /// Storage for the CG created when handling graph nodes added explicitly. - std::unique_ptr MGraphNodeCG; detail::code_location MCodeLoc = {}; bool MIsFinalized = false; @@ -3399,6 +3368,10 @@ class __SYCL_EXPORT handler { class _propertiesT, class> friend class ext::intel::experimental::pipe; + template + friend decltype(Obj::impl) + sycl::detail::getSyclObjImpl(const Obj &SyclObject); + /// Read from a host pipe given a host address and /// \param Name name of the host pipe to be passed into lower level runtime /// \param Ptr host pointer of host pipe as identified by address of its const @@ -3523,13 +3496,12 @@ class __SYCL_EXPORT handler { accessor Dst, const T &Pattern) { - setType(detail::CG::Fill); + setType(detail::CGType::Fill); detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Dst; detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); MDstPtr = static_cast(AccImpl.get()); - CGData.MRequirements.push_back(AccImpl.get()); - CGData.MAccStorage.push_back(std::move(AccImpl)); + addAccessorReq(std::move(AccImpl)); MPattern.resize(sizeof(T)); auto PatternPtr = reinterpret_cast(MPattern.data()); @@ -3623,15 +3595,8 @@ class __SYCL_EXPORT handler { accessor Acc) { auto *AccBase = reinterpret_cast(&Acc); - detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); - detail::AccessorImplHost *Req = AccImpl.get(); - if (std::find_if(MAssociatedAccesors.begin(), MAssociatedAccesors.end(), - [&](const detail::ArgDesc &AD) { - return AD.MType == - detail::kernel_param_kind_t::kind_accessor && - AD.MPtr == Req && - AD.MSize == static_cast(AccessTarget); - }) == MAssociatedAccesors.end()) + detail::AccessorImplHost *Req = detail::getSyclObjImpl(*AccBase).get(); + if (HasAssociatedAccessor(Req, AccessTarget)) throw sycl::exception(make_error_code(errc::kernel_argument), "placeholder accessor must be bound by calling " "handler::require() before it can be used."); @@ -3642,8 +3607,8 @@ class __SYCL_EXPORT handler { // Set value of the kernel is cooperative flag void setKernelIsCooperative(bool); - // Set using cuda thread block cluster launch flag true - void setKernelUsesClusterLaunch(); + // Set using cuda thread block cluster launch flag and set the launch bounds. + void setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims); template < ext::oneapi::experimental::detail::UnsupportedGraphFeatures FeatureT> @@ -3665,9 +3630,70 @@ class __SYCL_EXPORT handler { inline void internalProfilingTagImpl() { throwIfActionIsCreated(); - setType(detail::CG::ProfilingTag); + setType(detail::CGType::ProfilingTag); } + void addAccessorReq(detail::AccessorImplPtr Accessor); + + void addLifetimeSharedPtrStorage(std::shared_ptr SPtr); + + void addArg(detail::kernel_param_kind_t ArgKind, void *Req, int AccessTarget, + int ArgIndex); + void clearArgs(); + void setArgsToAssociatedAccessors(); + + bool HasAssociatedAccessor(detail::AccessorImplHost *Req, + access::target AccessTarget) const; + + template static sycl::range<3> padRange(sycl::range Range) { + if constexpr (Dims == 3) { + return Range; + } else { + sycl::range<3> Res{0, 0, 0}; + for (int I = 0; I < Dims; ++I) + Res[I] = Range[I]; + return Res; + } + } + + template static sycl::id<3> padId(sycl::id Id) { + if constexpr (Dims == 3) { + return Id; + } else { + sycl::id<3> Res{0, 0, 0}; + for (int I = 0; I < Dims; ++I) + Res[I] = Id[I]; + return Res; + } + } + + template + void setNDRangeDescriptor(sycl::range N, + bool SetNumWorkGroups = false) { + return setNDRangeDescriptorPadded(padRange(N), SetNumWorkGroups, Dims); + } + template + void setNDRangeDescriptor(sycl::range NumWorkItems, + sycl::id Offset) { + return setNDRangeDescriptorPadded(padRange(NumWorkItems), padId(Offset), + Dims); + } + template + void setNDRangeDescriptor(sycl::nd_range ExecutionRange) { + return setNDRangeDescriptorPadded( + padRange(ExecutionRange.get_global_range()), + padRange(ExecutionRange.get_local_range()), + padId(ExecutionRange.get_offset()), Dims); + } + + void setNDRangeDescriptorPadded(sycl::range<3> N, bool SetNumWorkGroups, + int Dims); + void setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, + sycl::id<3> Offset, int Dims); + void setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, + sycl::range<3> LocalSize, sycl::id<3> Offset, + int Dims); + friend class detail::HandlerAccess; protected: @@ -3688,17 +3714,3 @@ class HandlerAccess { } // namespace _V1 } // namespace sycl - -#ifdef __SYCL_BUILD_SYCL_DLL -// The following fails (somewhat expectedly) when compiled with MSVC: -// -// #include -// struct __declspec(dllexport) handler { -// std::unique_ptr Member; -// }; -// -// We do __SYCL_EXPORT sycl::handler class and it has an -// std::unique_ptr member. As such, ensure the type is -// complete if we're building the SYCL shared library. -#include -#endif diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index b1b7d93bc679..3ce31300ba0f 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -41,12 +41,6 @@ #include // for property_list #include // for range -#if __SYCL_USE_FALLBACK_ASSERT -// TODO: maybe we can move detail::submitAssertCapture into the shared library -// instead. -#include -#endif - #include // for size_t #include // for function #include // for shared_ptr, hash diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 3e969837759b..737306818767 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -45,14 +45,6 @@ #include // for dynamic_e... #include // for malloc_de... -// reduction::withAuxHandler calls handler::~handler() and that, in turn, needs -// all the dtors from std::unique_pointer handler's data members, including the -// host_task-related stuff. That's not the case for -// because handler object is only ctor/dtor'ed inside SYCL shared library but -// not in the current translation unit. It would be nice to find a better way -// than this include in future. -#include - #include // for min #include // for array #include // for assert diff --git a/sycl/include/sycl/stream.hpp b/sycl/include/sycl/stream.hpp index eef0fc51d3b0..c965b80236bd 100644 --- a/sycl/include/sycl/stream.hpp +++ b/sycl/include/sycl/stream.hpp @@ -14,7 +14,6 @@ #include // for atomic #include // for isinf, isnan, signbit #include // for array -#include // for stream_impl #include // for __SYCL_SPECIAL_CLASS, __S... #include // for __SYCL2020_DEPRECATED #include // for __SYCL_EXPORT @@ -44,6 +43,8 @@ inline namespace _V1 { namespace detail { +class stream_impl; + using FmtFlags = unsigned int; // Mapping from stream_manipulator to FmtFlags. Each manipulator corresponds diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index fa5041ff0ece..28dc24fb9cd2 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -10,8 +10,6 @@ #include -#include - #include #include #include @@ -37,6 +35,7 @@ #include #include #include +#include #include #include #include diff --git a/sycl/include/syclcompat/memory.hpp b/sycl/include/syclcompat/memory.hpp index ad33ce9a9bdf..93dace8bb60d 100644 --- a/sycl/include/syclcompat/memory.hpp +++ b/sycl/include/syclcompat/memory.hpp @@ -42,7 +42,6 @@ #include #include -#include #include #include #include diff --git a/sycl/source/accessor.cpp b/sycl/source/accessor.cpp index d3f1557871d9..aa3547a4a44a 100644 --- a/sycl/source/accessor.cpp +++ b/sycl/source/accessor.cpp @@ -15,12 +15,12 @@ namespace sycl { inline namespace _V1 { namespace detail { device getDeviceFromHandler(handler &cgh) { - assert((cgh.MQueue || cgh.MGraph) && + assert((cgh.MQueue || getSyclObjImpl(cgh)->MGraph) && "One of MQueue or MGraph should be nonnull!"); if (cgh.MQueue) return cgh.MQueue->get_device(); - return cgh.MGraph->getDevice(); + return getSyclObjImpl(cgh)->MGraph->getDevice(); } AccessorBaseHost::AccessorBaseHost(id<3> Offset, range<3> AccessRange, diff --git a/sycl/include/sycl/detail/cg.hpp b/sycl/source/detail/cg.hpp similarity index 76% rename from sycl/include/sycl/detail/cg.hpp rename to sycl/source/detail/cg.hpp index b6955af412b3..48d80c06394b 100644 --- a/sycl/include/sycl/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -46,41 +46,119 @@ class stream_impl; class queue_impl; class kernel_bundle_impl; -// If there's a need to add new members to CG classes without breaking ABI -// compatibility, we can bring back the extended members mechanism. See -// https://github.com/intel/llvm/pull/6759 +// The structure represents kernel argument. +class ArgDesc { +public: + ArgDesc(sycl::detail::kernel_param_kind_t Type, void *Ptr, int Size, + int Index) + : MType(Type), MPtr(Ptr), MSize(Size), MIndex(Index) {} + + sycl::detail::kernel_param_kind_t MType; + void *MPtr; + int MSize; + int MIndex; +}; + +// The structure represents NDRange - global, local sizes, global offset and +// number of dimensions. +class NDRDescT { + // The method initializes all sizes for dimensions greater than the passed one + // to the default values, so they will not affect execution. + void setNDRangeLeftover() { + for (int I = Dims; I < 3; ++I) { + GlobalSize[I] = 1; + LocalSize[I] = LocalSize[0] ? 1 : 0; + GlobalOffset[I] = 0; + NumWorkGroups[I] = 0; + } + } + + template + static sycl::range<3> padRange(sycl::range Range) { + if constexpr (Dims == 3) { + return Range; + } else { + sycl::range<3> Res{0, 0, 0}; + for (int I = 0; I < Dims; ++I) + Res[I] = Range[I]; + return Res; + } + } + + template static sycl::id<3> padId(sycl::id Id) { + if constexpr (Dims == 3) { + return Id; + } else { + sycl::id<3> Res{0, 0, 0}; + for (int I = 0; I < Dims; ++I) + Res[I] = Id[I]; + return Res; + } + } + +public: + NDRDescT() = default; + NDRDescT(const NDRDescT &Desc) = default; + NDRDescT(NDRDescT &&Desc) = default; + + NDRDescT(sycl::range<3> N, bool SetNumWorkGroups, int DimsArg) + : GlobalSize{SetNumWorkGroups ? sycl::range<3>{0, 0, 0} : N}, + NumWorkGroups{SetNumWorkGroups ? N : sycl::range<3>{0, 0, 0}}, + Dims{size_t(DimsArg)} { + setNDRangeLeftover(); + } + + NDRDescT(sycl::range<3> NumWorkItems, sycl::id<3> Offset, int DimsArg) + : GlobalSize{NumWorkItems}, GlobalOffset{Offset}, Dims{size_t(DimsArg)} {} + + NDRDescT(sycl::range<3> NumWorkItems, sycl::range<3> LocalSize, + sycl::id<3> Offset, int DimsArg) + : GlobalSize{NumWorkItems}, LocalSize{LocalSize}, GlobalOffset{Offset}, + Dims{size_t(DimsArg)} { + setNDRangeLeftover();} + + template + NDRDescT(sycl::nd_range ExecutionRange, int DimsArg) + : NDRDescT(padRange(ExecutionRange.get_global_range()), + padRange(ExecutionRange.get_local_range()), + padId(ExecutionRange.get_offset()), size_t(DimsArg)) { + setNDRangeLeftover();} + + template + NDRDescT(sycl::nd_range ExecutionRange) + : NDRDescT(ExecutionRange, Dims_) {} + + template + NDRDescT(sycl::range Range) + : NDRDescT(padRange(Range), /*SetNumWorkGroups=*/false, Dims_) {} + + void setClusterDimensions(sycl::range<3> N, int Dims) { + if (this->Dims != size_t(Dims)) { + throw std::runtime_error( + "Dimensionality of cluster, global and local ranges must be same"); + } + + for (int I = 0; I < 3; ++I) + ClusterDimensions[I] = (I < Dims) ? N[I] : 1; + } + + NDRDescT &operator=(const NDRDescT &Desc) = default; + NDRDescT &operator=(NDRDescT &&Desc) = default; + + sycl::range<3> GlobalSize{0, 0, 0}; + sycl::range<3> LocalSize{0, 0, 0}; + sycl::id<3> GlobalOffset{0, 0, 0}; + /// Number of workgroups, used to record the number of workgroups from the + /// simplest form of parallel_for_work_group. If set, all other fields must be + /// zero + sycl::range<3> NumWorkGroups{0, 0, 0}; + sycl::range<3> ClusterDimensions{1, 1, 1}; + size_t Dims = 0; +}; + /// Base class for all types of command groups. class CG { public: - /// Type of the command group. - enum CGTYPE : unsigned int { - None = 0, - Kernel = 1, - CopyAccToPtr = 2, - CopyPtrToAcc = 3, - CopyAccToAcc = 4, - Barrier = 5, - BarrierWaitlist = 6, - Fill = 7, - UpdateHost = 8, - CopyUSM = 10, - FillUSM = 11, - PrefetchUSM = 12, - CodeplayHostTask = 14, - AdviseUSM = 15, - Copy2DUSM = 16, - Fill2DUSM = 17, - Memset2DUSM = 18, - CopyToDeviceGlobal = 19, - CopyFromDeviceGlobal = 20, - ReadWriteHostPipe = 21, - ExecCommandBuffer = 22, - CopyImage = 23, - SemaphoreWait = 24, - SemaphoreSignal = 25, - ProfilingTag = 26, - }; - struct StorageInitHelper { StorageInitHelper() = default; StorageInitHelper(std::vector> ArgsStorage, @@ -110,7 +188,7 @@ class CG { std::vector MEvents; }; - CG(CGTYPE Type, StorageInitHelper D, detail::code_location loc = {}) + CG(CGType Type, StorageInitHelper D, detail::code_location loc = {}) : MType(Type), MData(std::move(D)) { // Capture the user code-location from Q.submit(), Q.parallel_for() // etc for later use; if code location information is not available, @@ -126,7 +204,7 @@ class CG { CG(CG &&CommandGroup) = default; CG(const CG &CommandGroup) = default; - CGTYPE getType() const { return MType; } + CGType getType() const { return MType; } std::vector> &getArgsStorage() { return MData.MArgsStorage; @@ -152,7 +230,7 @@ class CG { virtual ~CG() = default; private: - CGTYPE MType; + CGType MType; StorageInitHelper MData; public: @@ -187,7 +265,7 @@ class CGExecKernel : public CG { std::string KernelName, std::vector> Streams, std::vector> AuxiliaryResources, - CGTYPE Type, + CGType Type, sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig, bool KernelIsCooperative, bool MKernelUsesClusterLaunch, detail::code_location loc = {}) @@ -200,7 +278,7 @@ class CGExecKernel : public CG { MKernelCacheConfig(std::move(KernelCacheConfig)), MKernelIsCooperative(KernelIsCooperative), MKernelUsesClusterLaunch(MKernelUsesClusterLaunch) { - assert(getType() == Kernel && "Wrong type of exec kernel CG."); + assert(getType() == CGType::Kernel && "Wrong type of exec kernel CG."); } CGExecKernel(const CGExecKernel &CGExec) = default; @@ -232,7 +310,7 @@ class CGCopy : public CG { std::vector> MAuxiliaryResources; public: - CGCopy(CGTYPE CopyType, void *Src, void *Dst, CG::StorageInitHelper CGData, + CGCopy(CGType CopyType, void *Src, void *Dst, CG::StorageInitHelper CGData, std::vector> AuxiliaryResources, detail::code_location loc = {}) : CG(CopyType, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), @@ -255,7 +333,7 @@ class CGFill : public CG { CGFill(std::vector Pattern, void *Ptr, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(Fill, std::move(CGData), std::move(loc)), + : CG(CGType::Fill, std::move(CGData), std::move(loc)), MPattern(std::move(Pattern)), MPtr((AccessorImplHost *)Ptr) {} AccessorImplHost *getReqToFill() { return MPtr; } }; @@ -267,7 +345,7 @@ class CGUpdateHost : public CG { public: CGUpdateHost(void *Ptr, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(UpdateHost, std::move(CGData), std::move(loc)), + : CG(CGType::UpdateHost, std::move(CGData), std::move(loc)), MPtr((AccessorImplHost *)Ptr) {} AccessorImplHost *getReqToUpdate() { return MPtr; } @@ -282,7 +360,7 @@ class CGCopyUSM : public CG { public: CGCopyUSM(void *Src, void *Dst, size_t Length, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(CopyUSM, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), + : CG(CGType::CopyUSM, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), MLength(Length) {} void *getSrc() { return MSrc; } @@ -299,7 +377,7 @@ class CGFillUSM : public CG { public: CGFillUSM(std::vector Pattern, void *DstPtr, size_t Length, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(FillUSM, std::move(CGData), std::move(loc)), + : CG(CGType::FillUSM, std::move(CGData), std::move(loc)), MPattern(std::move(Pattern)), MDst(DstPtr), MLength(Length) {} void *getDst() { return MDst; } size_t getLength() { return MLength; } @@ -314,7 +392,7 @@ class CGPrefetchUSM : public CG { public: CGPrefetchUSM(void *DstPtr, size_t Length, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(PrefetchUSM, std::move(CGData), std::move(loc)), MDst(DstPtr), + : CG(CGType::PrefetchUSM, std::move(CGData), std::move(loc)), MDst(DstPtr), MLength(Length) {} void *getDst() { return MDst; } size_t getLength() { return MLength; } @@ -328,7 +406,7 @@ class CGAdviseUSM : public CG { public: CGAdviseUSM(void *DstPtr, size_t Length, pi_mem_advice Advice, - CG::StorageInitHelper CGData, CGTYPE Type, + CG::StorageInitHelper CGData, CGType Type, detail::code_location loc = {}) : CG(Type, std::move(CGData), std::move(loc)), MDst(DstPtr), MLength(Length), MAdvice(Advice) {} @@ -342,7 +420,7 @@ class CGBarrier : public CG { std::vector MEventsWaitWithBarrier; CGBarrier(std::vector EventsWaitWithBarrier, - CG::StorageInitHelper CGData, CGTYPE Type, + CG::StorageInitHelper CGData, CGType Type, detail::code_location loc = {}) : CG(Type, std::move(CGData), std::move(loc)), MEventsWaitWithBarrier(std::move(EventsWaitWithBarrier)) {} @@ -351,7 +429,7 @@ class CGBarrier : public CG { class CGProfilingTag : public CG { public: CGProfilingTag(CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(CG::ProfilingTag, std::move(CGData), std::move(loc)) {} + : CG(CGType::ProfilingTag, std::move(CGData), std::move(loc)) {} }; /// "Copy 2D USM" command group class. @@ -367,7 +445,7 @@ class CGCopy2DUSM : public CG { CGCopy2DUSM(void *Src, void *Dst, size_t SrcPitch, size_t DstPitch, size_t Width, size_t Height, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(Copy2DUSM, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), + : CG(CGType::Copy2DUSM, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), MSrcPitch(SrcPitch), MDstPitch(DstPitch), MWidth(Width), MHeight(Height) {} @@ -391,7 +469,7 @@ class CGFill2DUSM : public CG { CGFill2DUSM(std::vector Pattern, void *DstPtr, size_t Pitch, size_t Width, size_t Height, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(Fill2DUSM, std::move(CGData), std::move(loc)), + : CG(CGType::Fill2DUSM, std::move(CGData), std::move(loc)), MPattern(std::move(Pattern)), MDst(DstPtr), MPitch(Pitch), MWidth(Width), MHeight(Height) {} void *getDst() const { return MDst; } @@ -413,7 +491,7 @@ class CGMemset2DUSM : public CG { CGMemset2DUSM(char Value, void *DstPtr, size_t Pitch, size_t Width, size_t Height, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(Memset2DUSM, std::move(CGData), std::move(loc)), MValue(Value), + : CG(CGType::Memset2DUSM, std::move(CGData), std::move(loc)), MValue(Value), MDst(DstPtr), MPitch(Pitch), MWidth(Width), MHeight(Height) {} void *getDst() const { return MDst; } size_t getPitch() const { return MPitch; } @@ -434,7 +512,7 @@ class CGReadWriteHostPipe : public CG { CGReadWriteHostPipe(const std::string &Name, bool Block, void *Ptr, size_t Size, bool Read, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(ReadWriteHostPipe, std::move(CGData), std::move(loc)), + : CG(CGType::ReadWriteHostPipe, std::move(CGData), std::move(loc)), PipeName(Name), Blocking(Block), HostPtr(Ptr), TypeSize(Size), IsReadOp(Read) {} @@ -458,7 +536,7 @@ class CGCopyToDeviceGlobal : public CG { bool IsDeviceImageScoped, size_t NumBytes, size_t Offset, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(CopyToDeviceGlobal, std::move(CGData), std::move(loc)), MSrc(Src), + : CG(CGType::CopyToDeviceGlobal, std::move(CGData), std::move(loc)), MSrc(Src), MDeviceGlobalPtr(DeviceGlobalPtr), MIsDeviceImageScoped(IsDeviceImageScoped), MNumBytes(NumBytes), MOffset(Offset) {} @@ -483,7 +561,7 @@ class CGCopyFromDeviceGlobal : public CG { bool IsDeviceImageScoped, size_t NumBytes, size_t Offset, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(CopyFromDeviceGlobal, std::move(CGData), std::move(loc)), + : CG(CGType::CopyFromDeviceGlobal, std::move(CGData), std::move(loc)), MDeviceGlobalPtr(DeviceGlobalPtr), MDest(Dest), MIsDeviceImageScoped(IsDeviceImageScoped), MNumBytes(NumBytes), MOffset(Offset) {} @@ -515,7 +593,7 @@ class CGCopyImage : public CG { sycl::detail::pi::PiImageRegion HostExtent, sycl::detail::pi::PiImageRegion CopyExtent, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(CopyImage, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), + : CG(CGType::CopyImage, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), MImageDesc(ImageDesc), MImageFormat(ImageFormat), MImageCopyFlags(ImageCopyFlags), MSrcOffset(SrcOffset), MDstOffset(DstOffset), MHostExtent(HostExtent), @@ -544,7 +622,7 @@ class CGSemaphoreWait : public CG { sycl::detail::pi::PiInteropSemaphoreHandle InteropSemaphoreHandle, std::optional WaitValue, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(SemaphoreWait, std::move(CGData), std::move(loc)), + : CG(CGType::SemaphoreWait, std::move(CGData), std::move(loc)), MInteropSemaphoreHandle(InteropSemaphoreHandle), MWaitValue(WaitValue) { } @@ -564,7 +642,7 @@ class CGSemaphoreSignal : public CG { sycl::detail::pi::PiInteropSemaphoreHandle InteropSemaphoreHandle, std::optional SignalValue, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(SemaphoreSignal, std::move(CGData), std::move(loc)), + : CG(CGType::SemaphoreSignal, std::move(CGData), std::move(loc)), MInteropSemaphoreHandle(InteropSemaphoreHandle), MSignalValue(SignalValue) {} @@ -586,10 +664,29 @@ class CGExecCommandBuffer : public CG { const std::shared_ptr< sycl::ext::oneapi::experimental::detail::exec_graph_impl> &ExecGraph, CG::StorageInitHelper CGData) - : CG(CGTYPE::ExecCommandBuffer, std::move(CGData)), + : CG(CGType::ExecCommandBuffer, std::move(CGData)), MCommandBuffer(CommandBuffer), MExecGraph(ExecGraph) {} }; +class CGHostTask : public CG { +public: + std::shared_ptr MHostTask; + // queue for host-interop task + std::shared_ptr MQueue; + // context for host-interop task + std::shared_ptr MContext; + std::vector MArgs; + + CGHostTask(std::shared_ptr HostTask, + std::shared_ptr Queue, + std::shared_ptr Context, + std::vector Args, CG::StorageInitHelper CGData, + CGType Type, detail::code_location loc = {}) + : CG(Type, std::move(CGData), std::move(loc)), + MHostTask(std::move(HostTask)), MQueue(Queue), MContext(Context), + MArgs(std::move(Args)) {} +}; + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/error_handling/error_handling.hpp b/sycl/source/detail/error_handling/error_handling.hpp index 49bad6f2a5e3..879a26905088 100644 --- a/sycl/source/detail/error_handling/error_handling.hpp +++ b/sycl/source/detail/error_handling/error_handling.hpp @@ -8,8 +8,8 @@ #pragma once +#include #include -#include #include namespace sycl { diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 127e0d618925..7b6907d2298d 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -98,7 +98,7 @@ void sortTopological(std::shared_ptr NodeImpl, /// @param PartitionNum Number to propagate. void propagatePartitionUp(std::shared_ptr Node, int PartitionNum) { if (((Node->MPartitionNum != -1) && (Node->MPartitionNum <= PartitionNum)) || - (Node->MCGType == sycl::detail::CG::CGTYPE::CodeplayHostTask)) { + (Node->MCGType == sycl::detail::CGType::CodeplayHostTask)) { return; } Node->MPartitionNum = PartitionNum; @@ -117,7 +117,7 @@ void propagatePartitionUp(std::shared_ptr Node, int PartitionNum) { void propagatePartitionDown( std::shared_ptr Node, int PartitionNum, std::list> &HostTaskList) { - if (Node->MCGType == sycl::detail::CG::CGTYPE::CodeplayHostTask) { + if (Node->MCGType == sycl::detail::CGType::CodeplayHostTask) { if (Node->MPartitionNum != -1) { HostTaskList.push_front(Node); } @@ -185,7 +185,7 @@ void exec_graph_impl::makePartitions() { std::list> HostTaskList; // find all the host-tasks in the graph for (auto &Node : MNodeStorage) { - if (Node->MCGType == sycl::detail::CG::CodeplayHostTask) { + if (Node->MCGType == sycl::detail::CGType::CodeplayHostTask) { HostTaskList.push_back(Node); } } @@ -374,7 +374,7 @@ graph_impl::add(const std::shared_ptr &Impl, sycl::handler Handler{Impl}; CGF(Handler); - if (Handler.MCGType == sycl::detail::CG::Barrier) { + if (Handler.getType() == sycl::detail::CGType::Barrier) { throw sycl::exception( make_error_code(errc::invalid), "The sycl_ext_oneapi_enqueue_barrier feature is not available with " @@ -384,20 +384,21 @@ graph_impl::add(const std::shared_ptr &Impl, Handler.finalize(); node_type NodeType = - Handler.MImpl->MUserFacingNodeType != + Handler.impl->MUserFacingNodeType != ext::oneapi::experimental::node_type::empty - ? Handler.MImpl->MUserFacingNodeType + ? Handler.impl->MUserFacingNodeType : ext::oneapi::experimental::detail::getNodeTypeFromCG( - Handler.MCGType); + Handler.getType()); - auto NodeImpl = this->add(NodeType, std::move(Handler.MGraphNodeCG), Dep); - NodeImpl->MNDRangeUsed = Handler.MImpl->MNDRangeUsed; + auto NodeImpl = + this->add(NodeType, std::move(Handler.impl->MGraphNodeCG), Dep); + NodeImpl->MNDRangeUsed = Handler.impl->MNDRangeUsed; // Add an event associated with this explicit node for mixed usage addEventForNode(Impl, std::make_shared(), NodeImpl); // Retrieve any dynamic parameters which have been registered in the CGF and // register the actual nodes with them. - auto &DynamicParams = Handler.MImpl->MDynamicParameters; + auto &DynamicParams = Handler.impl->MDynamicParameters; if (NodeType != node_type::kernel && DynamicParams.size() > 0) { throw sycl::exception(sycl::make_error_code(errc::invalid), @@ -721,10 +722,10 @@ void exec_graph_impl::createCommandBuffers( if (Node->isEmpty()) continue; - sycl::detail::CG::CGTYPE type = Node->MCGType; + sycl::detail::CGType type = Node->MCGType; // If the node is a kernel with no special requirements we can enqueue it // directly. - if (type == sycl::detail::CG::Kernel && + if (type == sycl::detail::CGType::Kernel && Node->MCommandGroup->getRequirements().size() + static_cast( Node->MCommandGroup.get()) @@ -936,7 +937,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, NewEvent->setEventFromSubmittedExecCommandBuffer(true); } else if ((CurrentPartition->MSchedule.size() > 0) && (CurrentPartition->MSchedule.front()->MCGType == - sycl::detail::CG::CGTYPE::CodeplayHostTask)) { + sycl::detail::CGType::CodeplayHostTask)) { auto NodeImpl = CurrentPartition->MSchedule.front(); // Schedule host task NodeImpl->MCommandGroup->getEvents().insert( @@ -957,7 +958,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, // If the node has no requirements for accessors etc. then we skip the // scheduler and enqueue directly. - if (NodeImpl->MCGType == sycl::detail::CG::Kernel && + if (NodeImpl->MCGType == sycl::detail::CGType::Kernel && NodeImpl->MCommandGroup->getRequirements().size() + static_cast( NodeImpl->MCommandGroup.get()) @@ -1198,7 +1199,7 @@ void exec_graph_impl::update(std::shared_ptr GraphImpl) { "of nodes being updated must have the same type"); } - if (MNodeStorage[i]->MCGType == sycl::detail::CG::Kernel) { + if (MNodeStorage[i]->MCGType == sycl::detail::CGType::Kernel) { sycl::detail::CGExecKernel *TargetCGExec = static_cast( MNodeStorage[i]->MCommandGroup.get()); @@ -1259,8 +1260,8 @@ void exec_graph_impl::update( "Node passed to update() is not part of the graph."); } - if (!(Node->isEmpty() || Node->MCGType == sycl::detail::CG::Kernel || - Node->MCGType == sycl::detail::CG::Barrier)) { + if (!(Node->isEmpty() || Node->MCGType == sycl::detail::CGType::Kernel || + Node->MCGType == sycl::detail::CGType::Barrier)) { throw sycl::exception(errc::invalid, "Unsupported node type for update. Only kernel, " "barrier and empty nodes are supported."); @@ -1318,7 +1319,7 @@ void exec_graph_impl::update( void exec_graph_impl::updateImpl(std::shared_ptr Node) { // Kernel node update is the only command type supported in UR for update. // Updating any other types of nodes, e.g. empty & barrier nodes is a no-op. - if (Node->MCGType != sycl::detail::CG::Kernel) { + if (Node->MCGType != sycl::detail::CGType::Kernel) { return; } auto ContextImpl = sycl::detail::getSyclObjImpl(MContext); diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index fe8fc14842d6..ac02da934340 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -13,10 +13,10 @@ #include #include -#include - #include +#include #include +#include #include #include @@ -41,34 +41,34 @@ namespace oneapi { namespace experimental { namespace detail { -inline node_type getNodeTypeFromCG(sycl::detail::CG::CGTYPE CGType) { +inline node_type getNodeTypeFromCG(sycl::detail::CGType CGType) { using sycl::detail::CG; switch (CGType) { - case CG::None: + case sycl::detail::CGType::None: return node_type::empty; - case CG::Kernel: + case sycl::detail::CGType::Kernel: return node_type::kernel; - case CG::CopyAccToPtr: - case CG::CopyPtrToAcc: - case CG::CopyAccToAcc: - case CG::CopyUSM: + case sycl::detail::CGType::CopyAccToPtr: + case sycl::detail::CGType::CopyPtrToAcc: + case sycl::detail::CGType::CopyAccToAcc: + case sycl::detail::CGType::CopyUSM: return node_type::memcpy; - case CG::Memset2DUSM: + case sycl::detail::CGType::Memset2DUSM: return node_type::memset; - case CG::Fill: - case CG::FillUSM: + case sycl::detail::CGType::Fill: + case sycl::detail::CGType::FillUSM: return node_type::memfill; - case CG::PrefetchUSM: + case sycl::detail::CGType::PrefetchUSM: return node_type::prefetch; - case CG::AdviseUSM: + case sycl::detail::CGType::AdviseUSM: return node_type::memadvise; - case CG::Barrier: - case CG::BarrierWaitlist: + case sycl::detail::CGType::Barrier: + case sycl::detail::CGType::BarrierWaitlist: return node_type::ext_oneapi_barrier; - case CG::CodeplayHostTask: + case sycl::detail::CGType::CodeplayHostTask: return node_type::host_task; - case CG::ExecCommandBuffer: + case sycl::detail::CGType::ExecCommandBuffer: return node_type::subgraph; default: assert(false && "Invalid Graph Node Type"); @@ -90,7 +90,7 @@ class node_impl { /// Using weak_ptr here to prevent circular references between nodes. std::vector> MPredecessors; /// Type of the command-group for the node. - sycl::detail::CG::CGTYPE MCGType = sycl::detail::CG::None; + sycl::detail::CGType MCGType = sycl::detail::CGType::None; /// User facing type of the node. node_type MNodeType = node_type::empty; /// Command group object which stores all args etc needed to enqueue the node @@ -221,50 +221,50 @@ class node_impl { /// workload but only dependencies /// @return True if this is an empty node, false otherwise. bool isEmpty() const { - return ((MCGType == sycl::detail::CG::None) || - (MCGType == sycl::detail::CG::Barrier)); + return ((MCGType == sycl::detail::CGType::None) || + (MCGType == sycl::detail::CGType::Barrier)); } /// Get a deep copy of this node's command group /// @return A unique ptr to the new command group object. std::unique_ptr getCGCopy() const { switch (MCGType) { - case sycl::detail::CG::Kernel: { + case sycl::detail::CGType::Kernel: { auto CGCopy = createCGCopy(); rebuildArgStorage(CGCopy->MArgs, MCommandGroup->getArgsStorage(), CGCopy->getArgsStorage()); return std::move(CGCopy); } - case sycl::detail::CG::CopyAccToPtr: - case sycl::detail::CG::CopyPtrToAcc: - case sycl::detail::CG::CopyAccToAcc: + case sycl::detail::CGType::CopyAccToPtr: + case sycl::detail::CGType::CopyPtrToAcc: + case sycl::detail::CGType::CopyAccToAcc: return createCGCopy(); - case sycl::detail::CG::Fill: + case sycl::detail::CGType::Fill: return createCGCopy(); - case sycl::detail::CG::UpdateHost: + case sycl::detail::CGType::UpdateHost: return createCGCopy(); - case sycl::detail::CG::CopyUSM: + case sycl::detail::CGType::CopyUSM: return createCGCopy(); - case sycl::detail::CG::FillUSM: + case sycl::detail::CGType::FillUSM: return createCGCopy(); - case sycl::detail::CG::PrefetchUSM: + case sycl::detail::CGType::PrefetchUSM: return createCGCopy(); - case sycl::detail::CG::AdviseUSM: + case sycl::detail::CGType::AdviseUSM: return createCGCopy(); - case sycl::detail::CG::Copy2DUSM: + case sycl::detail::CGType::Copy2DUSM: return createCGCopy(); - case sycl::detail::CG::Fill2DUSM: + case sycl::detail::CGType::Fill2DUSM: return createCGCopy(); - case sycl::detail::CG::Memset2DUSM: + case sycl::detail::CGType::Memset2DUSM: return createCGCopy(); - case sycl::detail::CG::CodeplayHostTask: { + case sycl::detail::CGType::CodeplayHostTask: { // The unique_ptr to the `sycl::detail::HostTask` in the HostTask CG // prevents from copying the CG. // We overcome this restriction by creating a new CG with the same data. auto CommandGroupPtr = static_cast(MCommandGroup.get()); sycl::detail::HostTask HostTask = *CommandGroupPtr->MHostTask.get(); - auto HostTaskUPtr = std::make_unique(HostTask); + auto HostTaskSPtr = std::make_shared(HostTask); sycl::detail::CG::StorageInitHelper Data( CommandGroupPtr->getArgsStorage(), CommandGroupPtr->getAccStorage(), @@ -283,32 +283,32 @@ class node_impl { return std::make_unique( sycl::detail::CGHostTask( - std::move(HostTaskUPtr), CommandGroupPtr->MQueue, + std::move(HostTaskSPtr), CommandGroupPtr->MQueue, CommandGroupPtr->MContext, std::move(NewArgs), std::move(Data), CommandGroupPtr->getType(), Loc)); } - case sycl::detail::CG::Barrier: - case sycl::detail::CG::BarrierWaitlist: + case sycl::detail::CGType::Barrier: + case sycl::detail::CGType::BarrierWaitlist: // Barrier nodes are stored in the graph with only the base CG class, // since they are treated internally as empty nodes. return createCGCopy(); - case sycl::detail::CG::CopyToDeviceGlobal: + case sycl::detail::CGType::CopyToDeviceGlobal: return createCGCopy(); - case sycl::detail::CG::CopyFromDeviceGlobal: + case sycl::detail::CGType::CopyFromDeviceGlobal: return createCGCopy(); - case sycl::detail::CG::ReadWriteHostPipe: + case sycl::detail::CGType::ReadWriteHostPipe: return createCGCopy(); - case sycl::detail::CG::CopyImage: + case sycl::detail::CGType::CopyImage: return createCGCopy(); - case sycl::detail::CG::SemaphoreSignal: + case sycl::detail::CGType::SemaphoreSignal: return createCGCopy(); - case sycl::detail::CG::SemaphoreWait: + case sycl::detail::CGType::SemaphoreWait: return createCGCopy(); - case sycl::detail::CG::ProfilingTag: + case sycl::detail::CGType::ProfilingTag: return createCGCopy(); - case sycl::detail::CG::ExecCommandBuffer: + case sycl::detail::CGType::ExecCommandBuffer: return createCGCopy(); - case sycl::detail::CG::None: + case sycl::detail::CGType::None: return nullptr; } return nullptr; @@ -332,14 +332,14 @@ class node_impl { return false; switch (MCGType) { - case sycl::detail::CG::CGTYPE::Kernel: { + case sycl::detail::CGType::Kernel: { sycl::detail::CGExecKernel *ExecKernelA = static_cast(MCommandGroup.get()); sycl::detail::CGExecKernel *ExecKernelB = static_cast(Node->MCommandGroup.get()); return ExecKernelA->MKernelName.compare(ExecKernelB->MKernelName) == 0; } - case sycl::detail::CG::CGTYPE::CopyUSM: { + case sycl::detail::CGType::CopyUSM: { sycl::detail::CGCopyUSM *CopyA = static_cast(MCommandGroup.get()); sycl::detail::CGCopyUSM *CopyB = @@ -348,9 +348,9 @@ class node_impl { (CopyA->getDst() == CopyB->getDst()) && (CopyA->getLength() == CopyB->getLength()); } - case sycl::detail::CG::CGTYPE::CopyAccToAcc: - case sycl::detail::CG::CGTYPE::CopyAccToPtr: - case sycl::detail::CG::CGTYPE::CopyPtrToAcc: { + case sycl::detail::CGType::CopyAccToAcc: + case sycl::detail::CGType::CopyAccToPtr: + case sycl::detail::CGType::CopyPtrToAcc: { sycl::detail::CGCopy *CopyA = static_cast(MCommandGroup.get()); sycl::detail::CGCopy *CopyB = @@ -394,9 +394,9 @@ class node_impl { /// Test if the node contains a N-D copy /// @return true if the op is a N-D copy bool isNDCopyNode() const { - if ((MCGType != sycl::detail::CG::CGTYPE::CopyAccToAcc) && - (MCGType != sycl::detail::CG::CGTYPE::CopyAccToPtr) && - (MCGType != sycl::detail::CG::CGTYPE::CopyPtrToAcc)) { + if ((MCGType != sycl::detail::CGType::CopyAccToAcc) && + (MCGType != sycl::detail::CGType::CopyAccToPtr) && + (MCGType != sycl::detail::CGType::CopyPtrToAcc)) { return false; } @@ -477,7 +477,7 @@ class node_impl { template void updateNDRange(nd_range ExecutionRange) { - if (MCGType != sycl::detail::CG::Kernel) { + if (MCGType != sycl::detail::CGType::Kernel) { throw sycl::exception( sycl::errc::invalid, "Cannot update execution range of nodes which are not kernel nodes"); @@ -499,11 +499,11 @@ class node_impl { "the node was originall created with."); } - NDRDesc.set(ExecutionRange); + NDRDesc = sycl::detail::NDRDescT{ExecutionRange}; } template void updateRange(range ExecutionRange) { - if (MCGType != sycl::detail::CG::Kernel) { + if (MCGType != sycl::detail::CGType::Kernel) { throw sycl::exception( sycl::errc::invalid, "Cannot update execution range of nodes which are not kernel nodes"); @@ -525,7 +525,7 @@ class node_impl { "the node was originall created with."); } - NDRDesc.set(ExecutionRange); + NDRDesc = sycl::detail::NDRDescT{ExecutionRange}; } void updateFromOtherNode(const std::shared_ptr &Other) { @@ -595,10 +595,10 @@ class node_impl { Stream << "TYPE = "; switch (MCGType) { - case sycl::detail::CG::CGTYPE::None: + case sycl::detail::CGType::None: Stream << "None \\n"; break; - case sycl::detail::CG::CGTYPE::Kernel: { + case sycl::detail::CGType::Kernel: { Stream << "CGExecKernel \\n"; sycl::detail::CGExecKernel *Kernel = static_cast(MCommandGroup.get()); @@ -645,7 +645,7 @@ class node_impl { } break; } - case sycl::detail::CG::CGTYPE::CopyAccToPtr: + case sycl::detail::CGType::CopyAccToPtr: Stream << "CGCopy Device-to-Host \\n"; if (Verbose) { sycl::detail::CGCopy *Copy = @@ -654,7 +654,7 @@ class node_impl { << "\\n"; } break; - case sycl::detail::CG::CGTYPE::CopyPtrToAcc: + case sycl::detail::CGType::CopyPtrToAcc: Stream << "CGCopy Host-to-Device \\n"; if (Verbose) { sycl::detail::CGCopy *Copy = @@ -663,7 +663,7 @@ class node_impl { << "\\n"; } break; - case sycl::detail::CG::CGTYPE::CopyAccToAcc: + case sycl::detail::CGType::CopyAccToAcc: Stream << "CGCopy Device-to-Device \\n"; if (Verbose) { sycl::detail::CGCopy *Copy = @@ -672,7 +672,7 @@ class node_impl { << "\\n"; } break; - case sycl::detail::CG::CGTYPE::Fill: + case sycl::detail::CGType::Fill: Stream << "CGFill \\n"; if (Verbose) { sycl::detail::CGFill *Fill = @@ -680,7 +680,7 @@ class node_impl { Stream << "Ptr: " << Fill->MPtr << "\\n"; } break; - case sycl::detail::CG::CGTYPE::UpdateHost: + case sycl::detail::CGType::UpdateHost: Stream << "CGCUpdateHost \\n"; if (Verbose) { sycl::detail::CGUpdateHost *Host = @@ -688,7 +688,7 @@ class node_impl { Stream << "Ptr: " << Host->getReqToUpdate() << "\\n"; } break; - case sycl::detail::CG::CGTYPE::CopyUSM: + case sycl::detail::CGType::CopyUSM: Stream << "CGCopyUSM \\n"; if (Verbose) { sycl::detail::CGCopyUSM *CopyUSM = @@ -697,7 +697,7 @@ class node_impl { << " Length: " << CopyUSM->getLength() << "\\n"; } break; - case sycl::detail::CG::CGTYPE::FillUSM: + case sycl::detail::CGType::FillUSM: Stream << "CGFillUSM \\n"; if (Verbose) { sycl::detail::CGFillUSM *FillUSM = @@ -709,7 +709,7 @@ class node_impl { Stream << "\\n"; } break; - case sycl::detail::CG::CGTYPE::PrefetchUSM: + case sycl::detail::CGType::PrefetchUSM: Stream << "CGPrefetchUSM \\n"; if (Verbose) { sycl::detail::CGPrefetchUSM *Prefetch = @@ -718,7 +718,7 @@ class node_impl { << " Length: " << Prefetch->getLength() << "\\n"; } break; - case sycl::detail::CG::CGTYPE::AdviseUSM: + case sycl::detail::CGType::AdviseUSM: Stream << "CGAdviseUSM \\n"; if (Verbose) { sycl::detail::CGAdviseUSM *AdviseUSM = @@ -727,13 +727,13 @@ class node_impl { << " Length: " << AdviseUSM->getLength() << "\\n"; } break; - case sycl::detail::CG::CGTYPE::CodeplayHostTask: + case sycl::detail::CGType::CodeplayHostTask: Stream << "CGHostTask \\n"; break; - case sycl::detail::CG::CGTYPE::Barrier: + case sycl::detail::CGType::Barrier: Stream << "CGBarrier \\n"; break; - case sycl::detail::CG::CGTYPE::Copy2DUSM: + case sycl::detail::CGType::Copy2DUSM: Stream << "CGCopy2DUSM \\n"; if (Verbose) { sycl::detail::CGCopy2DUSM *Copy2DUSM = @@ -742,7 +742,7 @@ class node_impl { << " Dst: " << Copy2DUSM->getDst() << "\\n"; } break; - case sycl::detail::CG::CGTYPE::Fill2DUSM: + case sycl::detail::CGType::Fill2DUSM: Stream << "CGFill2DUSM \\n"; if (Verbose) { sycl::detail::CGFill2DUSM *Fill2DUSM = @@ -750,7 +750,7 @@ class node_impl { Stream << "Dst: " << Fill2DUSM->getDst() << "\\n"; } break; - case sycl::detail::CG::CGTYPE::Memset2DUSM: + case sycl::detail::CGType::Memset2DUSM: Stream << "CGMemset2DUSM \\n"; if (Verbose) { sycl::detail::CGMemset2DUSM *Memset2DUSM = @@ -758,10 +758,10 @@ class node_impl { Stream << "Dst: " << Memset2DUSM->getDst() << "\\n"; } break; - case sycl::detail::CG::CGTYPE::ReadWriteHostPipe: + case sycl::detail::CGType::ReadWriteHostPipe: Stream << "CGReadWriteHostPipe \\n"; break; - case sycl::detail::CG::CGTYPE::CopyToDeviceGlobal: + case sycl::detail::CGType::CopyToDeviceGlobal: Stream << "CGCopyToDeviceGlobal \\n"; if (Verbose) { sycl::detail::CGCopyToDeviceGlobal *CopyToDeviceGlobal = @@ -771,7 +771,7 @@ class node_impl { << " Dst: " << CopyToDeviceGlobal->getDeviceGlobalPtr() << "\\n"; } break; - case sycl::detail::CG::CGTYPE::CopyFromDeviceGlobal: + case sycl::detail::CGType::CopyFromDeviceGlobal: Stream << "CGCopyFromDeviceGlobal \\n"; if (Verbose) { sycl::detail::CGCopyFromDeviceGlobal *CopyFromDeviceGlobal = @@ -781,7 +781,7 @@ class node_impl { << " Dst: " << CopyFromDeviceGlobal->getDest() << "\\n"; } break; - case sycl::detail::CG::CGTYPE::ExecCommandBuffer: + case sycl::detail::CGType::ExecCommandBuffer: Stream << "CGExecCommandBuffer \\n"; break; default: @@ -822,7 +822,7 @@ class partition { /// @return True if the partition contains a host task bool isHostTask() const { return (MRoots.size() && ((*MRoots.begin()).lock()->MCGType == - sycl::detail::CG::CGTYPE::CodeplayHostTask)); + sycl::detail::CGType::CodeplayHostTask)); } /// Checks if the graph is single path, i.e. each node has a single successor. diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index daea9816deac..b3cf5f1bebed 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -10,6 +10,7 @@ #include "sycl/handler.hpp" #include +#include #include #include @@ -37,6 +38,10 @@ class handler_impl { MSubmissionSecondaryQueue(std::move(SubmissionSecondaryQueue)), MEventNeeded(EventNeeded) {}; + handler_impl( + std::shared_ptr Graph) + : MGraph{Graph} {} + handler_impl() = default; void setStateExplicitKernelBundle() { @@ -150,6 +155,45 @@ class handler_impl { // Track whether an NDRange was used when submitting a kernel (as opposed to a // range), needed for graph update bool MNDRangeUsed = false; + + /// The storage for the arguments passed. + /// We need to store a copy of values that are passed explicitly through + /// set_arg, require and so on, because we need them to be alive after + /// we exit the method they are passed in. + detail::CG::StorageInitHelper CGData; + + /// The list of arguments for the kernel. + std::vector MArgs; + + /// The list of associated accessors with this handler. + /// These accessors were created with this handler as argument or + /// have become required for this handler via require method. + std::vector MAssociatedAccesors; + + /// Struct that encodes global size, local size, ... + detail::NDRDescT MNDRDesc; + + /// Type of the command group, e.g. kernel, fill. Can also encode version. + /// Use getType and setType methods to access this variable unless + /// manipulations with version are required + detail::CGType MCGType = detail::CGType::None; + + /// The graph that is associated with this handler. + std::shared_ptr MGraph; + /// If we are submitting a graph using ext_oneapi_graph this will be the graph + /// to be executed. + std::shared_ptr + MExecGraph; + /// Storage for a node created from a subgraph submission. + std::shared_ptr MSubgraphNode; + /// Storage for the CG created when handling graph nodes added explicitly. + std::unique_ptr MGraphNodeCG; + + /// Storage for lambda/function when using HostTask + std::shared_ptr MHostTask; + /// The list of valid SYCL events that need to complete + /// before barrier command can be executed + std::vector MEventsWaitWithBarrier; }; } // namespace detail diff --git a/sycl/include/sycl/detail/host_task_impl.hpp b/sycl/source/detail/host_task.hpp similarity index 50% rename from sycl/include/sycl/detail/host_task_impl.hpp rename to sycl/source/detail/host_task.hpp index 42b49eab820c..48cb3ce7b854 100644 --- a/sycl/include/sycl/detail/host_task_impl.hpp +++ b/sycl/source/detail/host_task.hpp @@ -1,4 +1,4 @@ -//==---- host_task_impl.hpp ------------------------------------------------==// +//==---- host_task.hpp -----------------------------------------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -12,7 +12,7 @@ #pragma once -#include +#include #include #include #include @@ -51,43 +51,6 @@ class HostTask { friend class DispatchHostTask; }; -class CGHostTask : public CG { -public: - std::unique_ptr MHostTask; - // queue for host-interop task - std::shared_ptr MQueue; - // context for host-interop task - std::shared_ptr MContext; - std::vector MArgs; - - CGHostTask(std::unique_ptr HostTask, - std::shared_ptr Queue, - std::shared_ptr Context, - std::vector Args, CG::StorageInitHelper CGData, - CGTYPE Type, detail::code_location loc = {}) - : CG(Type, std::move(CGData), std::move(loc)), - MHostTask(std::move(HostTask)), MQueue(Queue), MContext(Context), - MArgs(std::move(Args)) {} -}; - } // namespace detail -template -std::enable_if_t< - detail::check_fn_signature, void()>::value || - detail::check_fn_signature, - void(interop_handle)>::value> -handler::host_task_impl(FuncT &&Func) { - throwIfActionIsCreated(); - - MNDRDesc.set(range<1>(1)); - // Need to copy these rather than move so that we can check associated - // accessors during finalize - MArgs = MAssociatedAccesors; - - MHostTask.reset(new detail::HostTask(std::move(Func))); - - setType(detail::CG::CodeplayHostTask); -} - } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index 51631d0d3e4f..179f11b97b49 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -970,7 +970,7 @@ jit_compiler::fuseKernels(QueueImplPtr Queue, FusedCG.reset(new detail::CGExecKernel( NDRDesc, nullptr, nullptr, std::move(KernelBundleImplPtr), std::move(CGData), std::move(FusedArgs), FusedOrCachedKernelName, {}, {}, - CG::CGTYPE::Kernel, KernelCacheConfig, false /* KernelIsCooperative */, + CGType::Kernel, KernelCacheConfig, false /* KernelIsCooperative */, false /* KernelUsesClusterLaunch*/)); return FusedCG; } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 3489dba53ffa..55d9fe3de353 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #pragma once +#include #include #include #include diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index ebfa91bc8a19..6bc27710e766 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -365,15 +365,15 @@ event queue_impl::submit_impl(const std::function &CGF, // Scheduler will later omit events, that are not required to execute tasks. // Host and interop tasks, however, are not submitted to low-level runtimes // and require separate dependency management. - const CG::CGTYPE Type = Handler.getType(); + const CGType Type = detail::getSyclObjImpl(Handler)->MCGType; event Event = detail::createSyclObjFromImpl( std::make_shared()); std::vector Streams; - if (Type == CG::Kernel) + if (Type == CGType::Kernel) Streams = std::move(Handler.MStreamStorage); if (PostProcess) { - bool IsKernel = Type == CG::Kernel; + bool IsKernel = Type == CGType::Kernel; bool KernelUsesAssert = false; if (IsKernel) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 699cfdce5e6b..b0c989c35fa5 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -817,7 +817,7 @@ class queue_impl { // Note that host_task events can never be discarded, so this will not // insert barriers between host_task enqueues. if (EventToBuildDeps->isDiscarded() && - Handler.getType() == CG::CodeplayHostTask) + getSyclObjImpl(Handler)->MCGType == CGType::CodeplayHostTask) EventToBuildDeps = insertHelperBarrier(Handler); if (!EventToBuildDeps->isDiscarded()) @@ -834,7 +834,7 @@ class queue_impl { EventRet = Handler.finalize(); EventToBuildDeps = getSyclObjImpl(EventRet); } else { - const CG::CGTYPE Type = Handler.getType(); + const CGType Type = getSyclObjImpl(Handler)->MCGType; std::lock_guard Lock{MMutex}; // The following code supports barrier synchronization if host task is // involved in the scenario. Native barriers cannot handle host task @@ -848,17 +848,17 @@ class queue_impl { MMissedCleanupRequests.clear(); } auto &Deps = MGraph.expired() ? MDefaultGraphDeps : MExtGraphDeps; - if (Type == CG::Barrier && !Deps.UnenqueuedCmdEvents.empty()) { + if (Type == CGType::Barrier && !Deps.UnenqueuedCmdEvents.empty()) { Handler.depends_on(Deps.UnenqueuedCmdEvents); } if (Deps.LastBarrier) Handler.depends_on(Deps.LastBarrier); EventRet = Handler.finalize(); EventImplPtr EventRetImpl = getSyclObjImpl(EventRet); - if (Type == CG::CodeplayHostTask) + if (Type == CGType::CodeplayHostTask) Deps.UnenqueuedCmdEvents.push_back(EventRetImpl); else if (!EventRetImpl->isEnqueued()) { - if (Type == CG::Barrier || Type == CG::BarrierWaitlist) { + if (Type == CGType::Barrier || Type == CGType::BarrierWaitlist) { Deps.LastBarrier = EventRetImpl; Deps.UnenqueuedCmdEvents.clear(); } else diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index e8ec2d9764af..a41231c188e3 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -294,7 +294,7 @@ std::vector Command::getPiEventsBlocking( bool Command::isHostTask() const { return (MType == CommandType::RUN_CG) /* host task has this type also */ && ((static_cast(this))->getCG().getType() == - CG::CGTYPE::CodeplayHostTask); + CGType::CodeplayHostTask); } bool Command::isFusable() const { @@ -302,7 +302,7 @@ bool Command::isFusable() const { return false; } const auto &CG = (static_cast(*this)).getCG(); - return (CG.getType() == CG::CGTYPE::Kernel) && + return (CG.getType() == CGType::Kernel) && (!static_cast(CG).MKernelIsCooperative) && (!static_cast(CG).MKernelUsesClusterLaunch); } @@ -382,7 +382,7 @@ class DispatchHostTask { MReqPiMem(std::move(ReqPiMem)) {} void operator()() const { - assert(MThisCmd->getCG().getType() == CG::CGTYPE::CodeplayHostTask); + assert(MThisCmd->getCG().getType() == CGType::CodeplayHostTask); CGHostTask &HostTask = static_cast(MThisCmd->getCG()); @@ -1603,13 +1603,13 @@ AllocaCommandBase *ExecCGCommand::getAllocaForReq(Requirement *Req) { std::vector> ExecCGCommand::getAuxiliaryResources() const { - if (MCommandGroup->getType() == CG::Kernel) + if (MCommandGroup->getType() == CGType::Kernel) return ((CGExecKernel *)MCommandGroup.get())->getAuxiliaryResources(); return {}; } void ExecCGCommand::clearAuxiliaryResources() { - if (MCommandGroup->getType() == CG::Kernel) + if (MCommandGroup->getType() == CGType::Kernel) ((CGExecKernel *)MCommandGroup.get())->clearAuxiliaryResources(); } @@ -1852,68 +1852,68 @@ void UpdateHostRequirementCommand::emitInstrumentationData() { #endif } -static std::string_view cgTypeToString(detail::CG::CGTYPE Type) { +static std::string_view cgTypeToString(detail::CGType Type) { switch (Type) { - case detail::CG::Kernel: + case detail::CGType::Kernel: return "Kernel"; break; - case detail::CG::UpdateHost: + case detail::CGType::UpdateHost: return "update_host"; break; - case detail::CG::Fill: + case detail::CGType::Fill: return "fill"; break; - case detail::CG::CopyAccToAcc: + case detail::CGType::CopyAccToAcc: return "copy acc to acc"; break; - case detail::CG::CopyAccToPtr: + case detail::CGType::CopyAccToPtr: return "copy acc to ptr"; break; - case detail::CG::CopyPtrToAcc: + case detail::CGType::CopyPtrToAcc: return "copy ptr to acc"; break; - case detail::CG::Barrier: + case detail::CGType::Barrier: return "barrier"; - case detail::CG::BarrierWaitlist: + case detail::CGType::BarrierWaitlist: return "barrier waitlist"; - case detail::CG::CopyUSM: + case detail::CGType::CopyUSM: return "copy usm"; break; - case detail::CG::FillUSM: + case detail::CGType::FillUSM: return "fill usm"; break; - case detail::CG::PrefetchUSM: + case detail::CGType::PrefetchUSM: return "prefetch usm"; break; - case detail::CG::CodeplayHostTask: + case detail::CGType::CodeplayHostTask: return "host task"; break; - case detail::CG::Copy2DUSM: + case detail::CGType::Copy2DUSM: return "copy 2d usm"; break; - case detail::CG::Fill2DUSM: + case detail::CGType::Fill2DUSM: return "fill 2d usm"; break; - case detail::CG::AdviseUSM: + case detail::CGType::AdviseUSM: return "advise usm"; - case detail::CG::Memset2DUSM: + case detail::CGType::Memset2DUSM: return "memset 2d usm"; break; - case detail::CG::CopyToDeviceGlobal: + case detail::CGType::CopyToDeviceGlobal: return "copy to device_global"; break; - case detail::CG::CopyFromDeviceGlobal: + case detail::CGType::CopyFromDeviceGlobal: return "copy from device_global"; break; - case detail::CG::ReadWriteHostPipe: + case detail::CGType::ReadWriteHostPipe: return "read_write host pipe"; - case detail::CG::ExecCommandBuffer: + case detail::CGType::ExecCommandBuffer: return "exec command buffer"; - case detail::CG::CopyImage: + case detail::CGType::CopyImage: return "copy image"; - case detail::CG::SemaphoreWait: + case detail::CGType::SemaphoreWait: return "semaphore wait"; - case detail::CG::SemaphoreSignal: + case detail::CGType::SemaphoreSignal: return "semaphore signal"; default: return "unknown"; @@ -1928,11 +1928,11 @@ ExecCGCommand::ExecCGCommand( : Command(CommandType::RUN_CG, std::move(Queue), CommandBuffer, Dependencies), MEventNeeded(EventNeeded), MCommandGroup(std::move(CommandGroup)) { - if (MCommandGroup->getType() == detail::CG::CodeplayHostTask) { + if (MCommandGroup->getType() == detail::CGType::CodeplayHostTask) { MEvent->setSubmittedQueue( static_cast(MCommandGroup.get())->MQueue); } - if (MCommandGroup->getType() == detail::CG::ProfilingTag) + if (MCommandGroup->getType() == detail::CGType::ProfilingTag) MEvent->markAsProfilingTagEvent(); emitInstrumentationDataProxy(); @@ -2138,7 +2138,7 @@ void ExecCGCommand::emitInstrumentationData() { std::string KernelName; std::optional FromSource; switch (MCommandGroup->getType()) { - case detail::CG::Kernel: { + case detail::CGType::Kernel: { auto KernelCG = reinterpret_cast(MCommandGroup.get()); KernelName = instrumentationGetKernelName( @@ -2160,7 +2160,7 @@ void ExecCGCommand::emitInstrumentationData() { xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); MTraceEvent = static_cast(CmdTraceEvent); - if (MCommandGroup->getType() == detail::CG::Kernel) { + if (MCommandGroup->getType() == detail::CGType::Kernel) { auto KernelCG = reinterpret_cast(MCommandGroup.get()); instrumentationAddExtraKernelMetadata( @@ -2184,7 +2184,7 @@ void ExecCGCommand::printDot(std::ostream &Stream) const { Stream << "EXEC CG ON " << queueDeviceToString(MQueue.get()) << "\\n"; switch (MCommandGroup->getType()) { - case detail::CG::Kernel: { + case detail::CGType::Kernel: { auto KernelCG = reinterpret_cast(MCommandGroup.get()); Stream << "Kernel name: "; @@ -2241,7 +2241,8 @@ static void adjustNDRangePerKernel(NDRDescT &NDR, if (WGSize[0] == 0) { WGSize = {1, 1, 1}; } - NDR.set(NDR.Dims, nd_range<3>(NDR.NumWorkGroups * WGSize, WGSize)); + NDR = sycl::detail::NDRDescT{nd_range<3>(NDR.NumWorkGroups * WGSize, WGSize), + static_cast(NDR.Dims)}; } // We have the following mapping between dimensions with SPIR-V builtins: @@ -2453,31 +2454,6 @@ static pi_result SetKernelParamsAndLaunch( return Error; } -// The function initialize accessors and calls lambda. -void DispatchNativeKernel(void *Blob) { - void **CastedBlob = (void **)Blob; - - std::vector *Reqs = - static_cast *>(CastedBlob[0]); - - std::shared_ptr *HostKernel = - static_cast *>(CastedBlob[1]); - - NDRDescT *NDRDesc = static_cast(CastedBlob[2]); - - // Other value are pointer to the buffers. - void **NextArg = CastedBlob + 3; - for (detail::Requirement *Req : *Reqs) - Req->MData = *(NextArg++); - - (*HostKernel)->call(*NDRDesc, nullptr); - - // The ownership of these objects have been passed to us, need to cleanup - delete Reqs; - delete HostKernel; - delete NDRDesc; -} - pi_int32 enqueueImpCommandBufferKernel( context Ctx, DeviceImplPtr DeviceImpl, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, @@ -2783,7 +2759,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { sycl::detail::pi::PiExtSyncPoint OutSyncPoint; sycl::detail::pi::PiExtCommandBufferCommand OutCommand = nullptr; switch (MCommandGroup->getType()) { - case CG::CGTYPE::Kernel: { + case CGType::Kernel: { CGExecKernel *ExecKernel = (CGExecKernel *)MCommandGroup.get(); auto getMemAllocationFunc = [this](Requirement *Req) { @@ -2809,7 +2785,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MEvent->setCommandBufferCommand(OutCommand); return result; } - case CG::CGTYPE::CopyUSM: { + case CGType::CopyUSM: { CGCopyUSM *Copy = (CGCopyUSM *)MCommandGroup.get(); MemoryManager::ext_oneapi_copy_usm_cmd_buffer( MQueue->getContextImplPtr(), Copy->getSrc(), MCommandBuffer, @@ -2817,7 +2793,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; } - case CG::CGTYPE::CopyAccToAcc: { + case CGType::CopyAccToAcc: { CGCopy *Copy = (CGCopy *)MCommandGroup.get(); Requirement *ReqSrc = (Requirement *)(Copy->getSrc()); Requirement *ReqDst = (Requirement *)(Copy->getDst()); @@ -2836,7 +2812,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; } - case CG::CGTYPE::CopyAccToPtr: { + case CGType::CopyAccToPtr: { CGCopy *Copy = (CGCopy *)MCommandGroup.get(); Requirement *Req = (Requirement *)Copy->getSrc(); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); @@ -2851,7 +2827,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; } - case CG::CGTYPE::CopyPtrToAcc: { + case CGType::CopyPtrToAcc: { CGCopy *Copy = (CGCopy *)MCommandGroup.get(); Requirement *Req = (Requirement *)(Copy->getDst()); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); @@ -2865,7 +2841,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; } - case CG::CGTYPE::Fill: { + case CGType::Fill: { CGFill *Fill = (CGFill *)MCommandGroup.get(); Requirement *Req = (Requirement *)(Fill->getReqToFill()); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); @@ -2878,7 +2854,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; } - case CG::CGTYPE::FillUSM: { + case CGType::FillUSM: { CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get(); MemoryManager::ext_oneapi_fill_usm_cmd_buffer( MQueue->getContextImplPtr(), MCommandBuffer, Fill->getDst(), @@ -2887,7 +2863,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; } - case CG::CGTYPE::PrefetchUSM: { + case CGType::PrefetchUSM: { CGPrefetchUSM *Prefetch = (CGPrefetchUSM *)MCommandGroup.get(); MemoryManager::ext_oneapi_prefetch_usm_cmd_buffer( MQueue->getContextImplPtr(), MCommandBuffer, Prefetch->getDst(), @@ -2895,7 +2871,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; } - case CG::CGTYPE::AdviseUSM: { + case CGType::AdviseUSM: { CGAdviseUSM *Advise = (CGAdviseUSM *)MCommandGroup.get(); MemoryManager::ext_oneapi_advise_usm_cmd_buffer( MQueue->getContextImplPtr(), MCommandBuffer, Advise->getDst(), @@ -2920,7 +2896,7 @@ pi_int32 ExecCGCommand::enqueueImp() { } pi_int32 ExecCGCommand::enqueueImpQueue() { - if (getCG().getType() != CG::CGTYPE::CodeplayHostTask) + if (getCG().getType() != CGType::CodeplayHostTask) waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; auto RawEvents = getPiEvents(EventImpls); @@ -2939,12 +2915,12 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { switch (MCommandGroup->getType()) { - case CG::CGTYPE::UpdateHost: { + case CGType::UpdateHost: { throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), "Update host should be handled by the Scheduler. " + codeToString(PI_ERROR_INVALID_VALUE)); } - case CG::CGTYPE::CopyAccToPtr: { + case CGType::CopyAccToPtr: { CGCopy *Copy = (CGCopy *)MCommandGroup.get(); Requirement *Req = (Requirement *)Copy->getSrc(); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); @@ -2958,7 +2934,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::CopyPtrToAcc: { + case CGType::CopyPtrToAcc: { CGCopy *Copy = (CGCopy *)MCommandGroup.get(); Requirement *Req = (Requirement *)(Copy->getDst()); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); @@ -2972,7 +2948,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::CopyAccToAcc: { + case CGType::CopyAccToAcc: { CGCopy *Copy = (CGCopy *)MCommandGroup.get(); Requirement *ReqSrc = (Requirement *)(Copy->getSrc()); Requirement *ReqDst = (Requirement *)(Copy->getDst()); @@ -2990,7 +2966,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::Fill: { + case CGType::Fill: { CGFill *Fill = (CGFill *)MCommandGroup.get(); Requirement *Req = (Requirement *)(Fill->getReqToFill()); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); @@ -3003,7 +2979,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::Kernel: { + case CGType::Kernel: { assert(MQueue && "Kernel submissions should have an associated queue"); CGExecKernel *ExecKernel = (CGExecKernel *)MCommandGroup.get(); @@ -3039,7 +3015,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::CopyUSM: { + case CGType::CopyUSM: { CGCopyUSM *Copy = (CGCopyUSM *)MCommandGroup.get(); MemoryManager::copy_usm(Copy->getSrc(), MQueue, Copy->getLength(), Copy->getDst(), std::move(RawEvents), Event, @@ -3047,7 +3023,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::FillUSM: { + case CGType::FillUSM: { CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get(); MemoryManager::fill_usm(Fill->getDst(), MQueue, Fill->getLength(), Fill->getPattern(), std::move(RawEvents), Event, @@ -3055,7 +3031,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::PrefetchUSM: { + case CGType::PrefetchUSM: { CGPrefetchUSM *Prefetch = (CGPrefetchUSM *)MCommandGroup.get(); MemoryManager::prefetch_usm(Prefetch->getDst(), MQueue, Prefetch->getLength(), std::move(RawEvents), @@ -3063,7 +3039,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::AdviseUSM: { + case CGType::AdviseUSM: { CGAdviseUSM *Advise = (CGAdviseUSM *)MCommandGroup.get(); MemoryManager::advise_usm(Advise->getDst(), MQueue, Advise->getLength(), Advise->getAdvice(), std::move(RawEvents), Event, @@ -3071,7 +3047,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::Copy2DUSM: { + case CGType::Copy2DUSM: { CGCopy2DUSM *Copy = (CGCopy2DUSM *)MCommandGroup.get(); MemoryManager::copy_2d_usm(Copy->getSrc(), Copy->getSrcPitch(), MQueue, Copy->getDst(), Copy->getDstPitch(), @@ -3079,7 +3055,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { std::move(RawEvents), Event, MEvent); return PI_SUCCESS; } - case CG::CGTYPE::Fill2DUSM: { + case CGType::Fill2DUSM: { CGFill2DUSM *Fill = (CGFill2DUSM *)MCommandGroup.get(); MemoryManager::fill_2d_usm(Fill->getDst(), MQueue, Fill->getPitch(), Fill->getWidth(), Fill->getHeight(), @@ -3087,7 +3063,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { MEvent); return PI_SUCCESS; } - case CG::CGTYPE::Memset2DUSM: { + case CGType::Memset2DUSM: { CGMemset2DUSM *Memset = (CGMemset2DUSM *)MCommandGroup.get(); MemoryManager::memset_2d_usm(Memset->getDst(), MQueue, Memset->getPitch(), Memset->getWidth(), Memset->getHeight(), @@ -3095,7 +3071,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Event, MEvent); return PI_SUCCESS; } - case CG::CGTYPE::CodeplayHostTask: { + case CGType::CodeplayHostTask: { CGHostTask *HostTask = static_cast(MCommandGroup.get()); for (ArgDesc &Arg : HostTask->MArgs) { @@ -3161,7 +3137,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::Barrier: { + case CGType::Barrier: { assert(MQueue && "Barrier submission should have an associated queue"); const PluginPtr &Plugin = MQueue->getPlugin(); if (MEvent != nullptr) @@ -3171,7 +3147,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::BarrierWaitlist: { + case CGType::BarrierWaitlist: { assert(MQueue && "Barrier submission should have an associated queue"); CGBarrier *Barrier = static_cast(MCommandGroup.get()); std::vector Events = Barrier->MEventsWaitWithBarrier; @@ -3189,7 +3165,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::ProfilingTag: { + case CGType::ProfilingTag: { const PluginPtr &Plugin = MQueue->getPlugin(); // If the queue is not in-order, we need to insert a barrier. This barrier // does not need output events as it will implicitly enforce the following @@ -3205,7 +3181,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::CopyToDeviceGlobal: { + case CGType::CopyToDeviceGlobal: { CGCopyToDeviceGlobal *Copy = (CGCopyToDeviceGlobal *)MCommandGroup.get(); MemoryManager::copy_to_device_global( Copy->getDeviceGlobalPtr(), Copy->isDeviceImageScoped(), MQueue, @@ -3214,7 +3190,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return CL_SUCCESS; } - case CG::CGTYPE::CopyFromDeviceGlobal: { + case CGType::CopyFromDeviceGlobal: { CGCopyFromDeviceGlobal *Copy = (CGCopyFromDeviceGlobal *)MCommandGroup.get(); MemoryManager::copy_from_device_global( @@ -3224,7 +3200,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return CL_SUCCESS; } - case CG::CGTYPE::ReadWriteHostPipe: { + case CGType::ReadWriteHostPipe: { CGReadWriteHostPipe *ExecReadWriteHostPipe = (CGReadWriteHostPipe *)MCommandGroup.get(); std::string pipeName = ExecReadWriteHostPipe->getPipeName(); @@ -3239,7 +3215,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return enqueueReadWriteHostPipe(MQueue, pipeName, blocking, hostPtr, typeSize, RawEvents, EventImpl, read); } - case CG::CGTYPE::ExecCommandBuffer: { + case CGType::ExecCommandBuffer: { assert(MQueue && "Command buffer submissions should have an associated queue"); CGExecCommandBuffer *CmdBufferCG = @@ -3252,7 +3228,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], Event); } - case CG::CGTYPE::CopyImage: { + case CGType::CopyImage: { CGCopyImage *Copy = (CGCopyImage *)MCommandGroup.get(); sycl::detail::pi::PiMemImageDesc Desc = Copy->getDesc(); @@ -3264,7 +3240,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Event); return PI_SUCCESS; } - case CG::CGTYPE::SemaphoreWait: { + case CGType::SemaphoreWait: { assert(MQueue && "Semaphore wait submissions should have an associated queue"); CGSemaphoreWait *SemWait = (CGSemaphoreWait *)MCommandGroup.get(); @@ -3278,7 +3254,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::SemaphoreSignal: { + case CGType::SemaphoreSignal: { assert(MQueue && "Semaphore signal submissions should have an associated queue"); CGSemaphoreSignal *SemSignal = (CGSemaphoreSignal *)MCommandGroup.get(); @@ -3293,7 +3269,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::None: + case CGType::None: throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), "CG type not implemented. " + codeToString(PI_ERROR_INVALID_OPERATION)); @@ -3303,17 +3279,17 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { bool ExecCGCommand::producesPiEvent() const { return !MCommandBuffer && - MCommandGroup->getType() != CG::CGTYPE::CodeplayHostTask; + MCommandGroup->getType() != CGType::CodeplayHostTask; } bool ExecCGCommand::supportsPostEnqueueCleanup() const { // Host tasks are cleaned up upon completion instead. return Command::supportsPostEnqueueCleanup() && - (MCommandGroup->getType() != CG::CGTYPE::CodeplayHostTask); + (MCommandGroup->getType() != CGType::CodeplayHostTask); } bool ExecCGCommand::readyForCleanup() const { - if (MCommandGroup->getType() == CG::CGTYPE::CodeplayHostTask) + if (MCommandGroup->getType() == CGType::CodeplayHostTask) return MLeafCounter == 0 && MEvent->isCompleted(); return Command::readyForCleanup(); } diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 924ee7dff412..8944444c44ed 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -18,10 +18,10 @@ #include #include +#include #include #include #include -#include namespace sycl { inline namespace _V1 { diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 6ac1f8b34828..863752506cd0 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -906,7 +906,7 @@ EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd( } static bool isInteropHostTask(ExecCGCommand *Cmd) { - if (Cmd->getCG().getType() != CG::CGTYPE::CodeplayHostTask) + if (Cmd->getCG().getType() != CGType::CodeplayHostTask) return false; const detail::CGHostTask &HT = @@ -1017,7 +1017,7 @@ Scheduler::GraphBuildResult Scheduler::GraphBuilder::addCG( } else { std::string s; std::stringstream ss(s); - if (NewCmd->getCG().getType() == CG::CGTYPE::Kernel) { + if (NewCmd->getCG().getType() == CGType::Kernel) { ss << "Not fusing kernel with 'use_root_sync' property. Can only fuse " "non-cooperative device kernels."; } else { @@ -1333,7 +1333,7 @@ Command *Scheduler::GraphBuilder::connectDepEvent( ExecCGCommand *ConnectCmd = nullptr; try { - std::unique_ptr HT(new detail::HostTask); + std::shared_ptr HT(new detail::HostTask); std::unique_ptr ConnectCG(new detail::CGHostTask( std::move(HT), /* Queue = */ Cmd->getQueue(), /* Context = */ {}, /* Args = */ {}, @@ -1341,7 +1341,7 @@ Command *Scheduler::GraphBuilder::connectDepEvent( /* ArgsStorage = */ {}, /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, /* Requirements = */ {}, /* DepEvents = */ {DepEvent}), - CG::CodeplayHostTask, + CGType::CodeplayHostTask, /* Payload */ {})); ConnectCmd = new ExecCGCommand(std::move(ConnectCG), nullptr, /*EventNeeded=*/true); diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 258ab3f6d6a5..5fd4b17f746d 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -97,7 +97,7 @@ EventImplPtr Scheduler::addCG( bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const std::vector &Dependencies) { EventImplPtr NewEvent = nullptr; - const CG::CGTYPE Type = CommandGroup->getType(); + const CGType Type = CommandGroup->getType(); std::vector AuxiliaryCmds; std::vector> AuxiliaryResources; AuxiliaryResources = CommandGroup->getAuxiliaryResources(); @@ -109,12 +109,12 @@ EventImplPtr Scheduler::addCG( Command *NewCmd = nullptr; switch (Type) { - case CG::UpdateHost: + case CGType::UpdateHost: NewCmd = MGraphBuilder.addCGUpdateHost(std::move(CommandGroup), AuxiliaryCmds); NewEvent = NewCmd->getEvent(); break; - case CG::CodeplayHostTask: { + case CGType::CodeplayHostTask: { auto Result = MGraphBuilder.addCG(std::move(CommandGroup), nullptr, AuxiliaryCmds, EventNeeded); NewCmd = Result.NewCmd; diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index e9a8f4d56628..0cdf4ae9ec25 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -8,10 +8,10 @@ #pragma once +#include #include #include #include -#include #include #include diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index cf67dcfde8d7..3beb84cc5764 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -88,31 +89,31 @@ handler::handler(std::shared_ptr Queue, std::shared_ptr PrimaryQueue, std::shared_ptr SecondaryQueue, bool CallerNeedsEvent) - : MImpl(std::make_shared(std::move(PrimaryQueue), + : impl(std::make_shared(std::move(PrimaryQueue), std::move(SecondaryQueue), CallerNeedsEvent)), MQueue(std::move(Queue)) {} handler::handler( std::shared_ptr Graph) - : MImpl(std::make_shared()), MGraph(Graph) {} + : impl(std::make_shared(Graph)) {} // Sets the submission state to indicate that an explicit kernel bundle has been // set. Throws a sycl::exception with errc::invalid if the current state // indicates that a specialization constant has been set. void handler::setStateExplicitKernelBundle() { - MImpl->setStateExplicitKernelBundle(); + impl->setStateExplicitKernelBundle(); } // Sets the submission state to indicate that a specialization constant has been // set. Throws a sycl::exception with errc::invalid if the current state // indicates that an explicit kernel bundle has been set. -void handler::setStateSpecConstSet() { MImpl->setStateSpecConstSet(); } +void handler::setStateSpecConstSet() { impl->setStateSpecConstSet(); } // Returns true if the submission state is EXPLICIT_KERNEL_BUNDLE_STATE and // false otherwise. bool handler::isStateExplicitKernelBundle() const { - return MImpl->isStateExplicitKernelBundle(); + return impl->isStateExplicitKernelBundle(); } // Returns a shared_ptr to the kernel_bundle. @@ -121,19 +122,19 @@ bool handler::isStateExplicitKernelBundle() const { // returns shared_ptr(nullptr) if Insert is false std::shared_ptr handler::getOrInsertHandlerKernelBundle(bool Insert) const { - if (!MImpl->MKernelBundle && Insert) { - auto Ctx = MGraph ? MGraph->getContext() : MQueue->get_context(); - auto Dev = MGraph ? MGraph->getDevice() : MQueue->get_device(); - MImpl->MKernelBundle = detail::getSyclObjImpl( + if (!impl->MKernelBundle && Insert) { + auto Ctx = impl->MGraph ? impl->MGraph->getContext() : MQueue->get_context(); + auto Dev = impl->MGraph ? impl->MGraph->getDevice() : MQueue->get_device(); + impl->MKernelBundle = detail::getSyclObjImpl( get_kernel_bundle(Ctx, {Dev}, {})); } - return MImpl->MKernelBundle; + return impl->MKernelBundle; } // Sets kernel bundle to the provided one. void handler::setHandlerKernelBundle( const std::shared_ptr &NewKernelBundleImpPtr) { - MImpl->MKernelBundle = NewKernelBundleImpPtr; + impl->MKernelBundle = NewKernelBundleImpPtr; } void handler::setHandlerKernelBundle(kernel Kernel) { @@ -156,23 +157,23 @@ event handler::finalize() { // to a command without being bound to a command group, an exception should // be thrown. { - for (const auto &arg : MArgs) { + for (const auto &arg : impl->MArgs) { if (arg.MType != detail::kernel_param_kind_t::kind_accessor) continue; detail::Requirement *AccImpl = static_cast(arg.MPtr); if (AccImpl->MIsPlaceH) { - auto It = std::find(CGData.MRequirements.begin(), - CGData.MRequirements.end(), AccImpl); - if (It == CGData.MRequirements.end()) + auto It = std::find(impl->CGData.MRequirements.begin(), + impl->CGData.MRequirements.end(), AccImpl); + if (It == impl->CGData.MRequirements.end()) throw sycl::exception(make_error_code(errc::kernel_argument), "placeholder accessor must be bound by calling " "handler::require() before it can be used."); // Check associated accessors bool AccFound = false; - for (detail::ArgDesc &Acc : MAssociatedAccesors) { + for (detail::ArgDesc &Acc : impl->MAssociatedAccesors) { if (Acc.MType == detail::kernel_param_kind_t::kind_accessor && static_cast(Acc.MPtr) == AccImpl) { AccFound = true; @@ -190,15 +191,15 @@ event handler::finalize() { } const auto &type = getType(); - if (type == detail::CG::Kernel) { + if (type == detail::CGType::Kernel) { // If there were uses of set_specialization_constant build the kernel_bundle std::shared_ptr KernelBundleImpPtr = getOrInsertHandlerKernelBundle(/*Insert=*/false); if (KernelBundleImpPtr) { // Make sure implicit non-interop kernel bundles have the kernel if (!KernelBundleImpPtr->isInterop() && - !MImpl->isStateExplicitKernelBundle()) { - auto Dev = MGraph ? MGraph->getDevice() : MQueue->get_device(); + !impl->isStateExplicitKernelBundle()) { + auto Dev = impl->MGraph ? impl->MGraph->getDevice() : MQueue->get_device(); kernel_id KernelID = detail::ProgramManager::getInstance().getSYCLKernelID( MKernelName.c_str()); @@ -243,13 +244,13 @@ event handler::finalize() { } } - if (MQueue && !MGraph && !MSubgraphNode && !MQueue->getCommandGraph() && - !MQueue->is_in_fusion_mode() && !CGData.MRequirements.size() && - !MStreamStorage.size() && - (!CGData.MEvents.size() || + if (MQueue && !impl->MGraph && !impl->MSubgraphNode && + !MQueue->getCommandGraph() && !MQueue->is_in_fusion_mode() && + !impl->CGData.MRequirements.size() && !MStreamStorage.size() && + (!impl->CGData.MEvents.size() || (MQueue->isInOrder() && detail::Scheduler::areEventsSafeForSchedulerBypass( - CGData.MEvents, MQueue->getContextImplPtr())))) { + impl->CGData.MEvents, MQueue->getContextImplPtr())))) { // if user does not add a new dependency to the dependency graph, i.e. // the graph is not changed, and the queue is not in fusion mode, then // this faster path is used to submit kernel bypassing scheduler and @@ -262,8 +263,8 @@ event handler::finalize() { // uint32_t StreamID, uint64_t InstanceID, xpti_td* TraceEvent, int32_t StreamID = xptiRegisterStream(detail::SYCL_STREAM_NAME); auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData( - StreamID, MKernel, MCodeLoc, MKernelName.c_str(), MQueue, MNDRDesc, - KernelBundleImpPtr, MArgs); + StreamID, MKernel, MCodeLoc, MKernelName.c_str(), MQueue, + impl->MNDRDesc, KernelBundleImpPtr, impl->MArgs); auto EnqueueKernel = [&, CmdTraceEvent = CmdTraceEvent, InstanceID = InstanceID]() { #else @@ -273,10 +274,11 @@ event handler::finalize() { detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_begin, nullptr); #endif - enqueueImpKernel(MQueue, MNDRDesc, MArgs, KernelBundleImpPtr, MKernel, - MKernelName.c_str(), RawEvents, NewEvent, nullptr, - MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative, - MImpl->MKernelUsesClusterLaunch); + enqueueImpKernel(MQueue, impl->MNDRDesc, impl->MArgs, + KernelBundleImpPtr, MKernel, MKernelName.c_str(), + RawEvents, NewEvent, nullptr, impl->MKernelCacheConfig, + impl->MKernelIsCooperative, + impl->MKernelUsesClusterLaunch); #ifdef XPTI_ENABLE_INSTRUMENTATION // Emit signal only when event is created if (NewEvent != nullptr) { @@ -289,7 +291,7 @@ event handler::finalize() { #endif }; - bool DiscardEvent = (MQueue->MDiscardEvents || !MImpl->MEventNeeded) && + bool DiscardEvent = (MQueue->MDiscardEvents || !impl->MEventNeeded) && MQueue->supportsDiscardingPiEvents(); if (DiscardEvent) { // Kernel only uses assert if it's non interop one @@ -325,118 +327,118 @@ event handler::finalize() { std::unique_ptr CommandGroup; switch (type) { - case detail::CG::Kernel: { + case detail::CGType::Kernel: { // Copy kernel name here instead of move so that it's available after // running of this method by reductions implementation. This allows for // assert feature to check if kernel uses assertions CommandGroup.reset(new detail::CGExecKernel( - std::move(MNDRDesc), std::move(MHostKernel), std::move(MKernel), - std::move(MImpl->MKernelBundle), std::move(CGData), std::move(MArgs), - MKernelName.c_str(), std::move(MStreamStorage), - std::move(MImpl->MAuxiliaryResources), MCGType, - MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative, - MImpl->MKernelUsesClusterLaunch, MCodeLoc)); + std::move(impl->MNDRDesc), std::move(MHostKernel), std::move(MKernel), + std::move(impl->MKernelBundle), std::move(impl->CGData), + std::move(impl->MArgs), MKernelName.c_str(), std::move(MStreamStorage), + std::move(impl->MAuxiliaryResources), getType(), + impl->MKernelCacheConfig, impl->MKernelIsCooperative, + impl->MKernelUsesClusterLaunch, MCodeLoc)); break; } - case detail::CG::CopyAccToPtr: - case detail::CG::CopyPtrToAcc: - case detail::CG::CopyAccToAcc: + case detail::CGType::CopyAccToPtr: + case detail::CGType::CopyPtrToAcc: + case detail::CGType::CopyAccToAcc: CommandGroup.reset( - new detail::CGCopy(MCGType, MSrcPtr, MDstPtr, std::move(CGData), - std::move(MImpl->MAuxiliaryResources), MCodeLoc)); + new detail::CGCopy(getType(), MSrcPtr, MDstPtr, std::move(impl->CGData), + std::move(impl->MAuxiliaryResources), MCodeLoc)); break; - case detail::CG::Fill: + case detail::CGType::Fill: CommandGroup.reset(new detail::CGFill(std::move(MPattern), MDstPtr, - std::move(CGData), MCodeLoc)); + std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::UpdateHost: + case detail::CGType::UpdateHost: CommandGroup.reset( - new detail::CGUpdateHost(MDstPtr, std::move(CGData), MCodeLoc)); + new detail::CGUpdateHost(MDstPtr, std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::CopyUSM: + case detail::CGType::CopyUSM: CommandGroup.reset(new detail::CGCopyUSM(MSrcPtr, MDstPtr, MLength, - std::move(CGData), MCodeLoc)); + std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::FillUSM: + case detail::CGType::FillUSM: CommandGroup.reset(new detail::CGFillUSM( - std::move(MPattern), MDstPtr, MLength, std::move(CGData), MCodeLoc)); + std::move(MPattern), MDstPtr, MLength, std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::PrefetchUSM: + case detail::CGType::PrefetchUSM: CommandGroup.reset(new detail::CGPrefetchUSM(MDstPtr, MLength, - std::move(CGData), MCodeLoc)); + std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::AdviseUSM: - CommandGroup.reset(new detail::CGAdviseUSM(MDstPtr, MLength, MImpl->MAdvice, - std::move(CGData), MCGType, + case detail::CGType::AdviseUSM: + CommandGroup.reset(new detail::CGAdviseUSM(MDstPtr, MLength, impl->MAdvice, + std::move(impl->CGData), getType(), MCodeLoc)); break; - case detail::CG::Copy2DUSM: + case detail::CGType::Copy2DUSM: CommandGroup.reset(new detail::CGCopy2DUSM( - MSrcPtr, MDstPtr, MImpl->MSrcPitch, MImpl->MDstPitch, MImpl->MWidth, - MImpl->MHeight, std::move(CGData), MCodeLoc)); + MSrcPtr, MDstPtr, impl->MSrcPitch, impl->MDstPitch, impl->MWidth, + impl->MHeight, std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::Fill2DUSM: + case detail::CGType::Fill2DUSM: CommandGroup.reset(new detail::CGFill2DUSM( - std::move(MPattern), MDstPtr, MImpl->MDstPitch, MImpl->MWidth, - MImpl->MHeight, std::move(CGData), MCodeLoc)); + std::move(MPattern), MDstPtr, impl->MDstPitch, impl->MWidth, + impl->MHeight, std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::Memset2DUSM: + case detail::CGType::Memset2DUSM: CommandGroup.reset(new detail::CGMemset2DUSM( - MPattern[0], MDstPtr, MImpl->MDstPitch, MImpl->MWidth, MImpl->MHeight, - std::move(CGData), MCodeLoc)); + MPattern[0], MDstPtr, impl->MDstPitch, impl->MWidth, impl->MHeight, + std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::CodeplayHostTask: { - auto context = MGraph ? detail::getSyclObjImpl(MGraph->getContext()) + case detail::CGType::CodeplayHostTask: { + auto context = impl->MGraph ? detail::getSyclObjImpl(impl->MGraph->getContext()) : MQueue->getContextImplPtr(); CommandGroup.reset(new detail::CGHostTask( - std::move(MHostTask), MQueue, context, std::move(MArgs), - std::move(CGData), MCGType, MCodeLoc)); + std::move(impl->MHostTask), MQueue, context, std::move(impl->MArgs), + std::move(impl->CGData), getType(), MCodeLoc)); break; } - case detail::CG::Barrier: - case detail::CG::BarrierWaitlist: { + case detail::CGType::Barrier: + case detail::CGType::BarrierWaitlist: { if (auto GraphImpl = getCommandGraph(); GraphImpl != nullptr) { - CGData.MEvents.insert(std::end(CGData.MEvents), - std::begin(MEventsWaitWithBarrier), - std::end(MEventsWaitWithBarrier)); + impl->CGData.MEvents.insert(std::end(impl->CGData.MEvents), + std::begin(impl->MEventsWaitWithBarrier), + std::end(impl->MEventsWaitWithBarrier)); // Barrier node is implemented as an empty node in Graph // but keep the barrier type to help managing dependencies - MCGType = detail::CG::Barrier; + setType(detail::CGType::Barrier); CommandGroup.reset( - new detail::CG(detail::CG::Barrier, std::move(CGData), MCodeLoc)); + new detail::CG(detail::CGType::Barrier, std::move(impl->CGData), MCodeLoc)); } else { CommandGroup.reset( - new detail::CGBarrier(std::move(MEventsWaitWithBarrier), - std::move(CGData), MCGType, MCodeLoc)); + new detail::CGBarrier(std::move(impl->MEventsWaitWithBarrier), + std::move(impl->CGData), getType(), MCodeLoc)); } break; } - case detail::CG::ProfilingTag: { - CommandGroup.reset(new detail::CGProfilingTag(std::move(CGData), MCodeLoc)); + case detail::CGType::ProfilingTag: { + CommandGroup.reset(new detail::CGProfilingTag(std::move(impl->CGData), MCodeLoc)); break; } - case detail::CG::CopyToDeviceGlobal: { + case detail::CGType::CopyToDeviceGlobal: { CommandGroup.reset(new detail::CGCopyToDeviceGlobal( - MSrcPtr, MDstPtr, MImpl->MIsDeviceImageScoped, MLength, MImpl->MOffset, - std::move(CGData), MCodeLoc)); + MSrcPtr, MDstPtr, impl->MIsDeviceImageScoped, MLength, impl->MOffset, + std::move(impl->CGData), MCodeLoc)); break; } - case detail::CG::CopyFromDeviceGlobal: { + case detail::CGType::CopyFromDeviceGlobal: { CommandGroup.reset(new detail::CGCopyFromDeviceGlobal( - MSrcPtr, MDstPtr, MImpl->MIsDeviceImageScoped, MLength, MImpl->MOffset, - std::move(CGData), MCodeLoc)); + MSrcPtr, MDstPtr, impl->MIsDeviceImageScoped, MLength, impl->MOffset, + std::move(impl->CGData), MCodeLoc)); break; } - case detail::CG::ReadWriteHostPipe: { + case detail::CGType::ReadWriteHostPipe: { CommandGroup.reset(new detail::CGReadWriteHostPipe( - MImpl->HostPipeName, MImpl->HostPipeBlocking, MImpl->HostPipePtr, - MImpl->HostPipeTypeSize, MImpl->HostPipeRead, std::move(CGData), + impl->HostPipeName, impl->HostPipeBlocking, impl->HostPipePtr, + impl->HostPipeTypeSize, impl->HostPipeRead, std::move(impl->CGData), MCodeLoc)); break; } - case detail::CG::ExecCommandBuffer: { + case detail::CGType::ExecCommandBuffer: { std::shared_ptr ParentGraph = - MQueue ? MQueue->getCommandGraph() : MGraph; + MQueue ? MQueue->getCommandGraph() : impl->MGraph; // If a parent graph is set that means we are adding or recording a subgraph // and we don't want to actually execute this command graph submission. @@ -446,37 +448,37 @@ event handler::finalize() { ParentLock = ext::oneapi::experimental::detail::graph_impl::WriteLock( ParentGraph->MMutex); } - CGData.MRequirements = MExecGraph->getRequirements(); + impl->CGData.MRequirements = impl->MExecGraph->getRequirements(); // Here we are using the CommandGroup without passing a CommandBuffer to // pass the exec_graph_impl and event dependencies. Since this subgraph CG // will not be executed this is fine. CommandGroup.reset(new sycl::detail::CGExecCommandBuffer( - nullptr, MExecGraph, std::move(CGData))); + nullptr, impl->MExecGraph, std::move(impl->CGData))); } else { event GraphCompletionEvent = - MExecGraph->enqueue(MQueue, std::move(CGData)); + impl->MExecGraph->enqueue(MQueue, std::move(impl->CGData)); MLastEvent = GraphCompletionEvent; return MLastEvent; } } break; - case detail::CG::CopyImage: + case detail::CGType::CopyImage: CommandGroup.reset(new detail::CGCopyImage( - MSrcPtr, MDstPtr, MImpl->MImageDesc, MImpl->MImageFormat, - MImpl->MImageCopyFlags, MImpl->MSrcOffset, MImpl->MDestOffset, - MImpl->MHostExtent, MImpl->MCopyExtent, std::move(CGData), MCodeLoc)); + MSrcPtr, MDstPtr, impl->MImageDesc, impl->MImageFormat, + impl->MImageCopyFlags, impl->MSrcOffset, impl->MDestOffset, + impl->MHostExtent, impl->MCopyExtent, std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::SemaphoreWait: + case detail::CGType::SemaphoreWait: CommandGroup.reset(new detail::CGSemaphoreWait( - MImpl->MInteropSemaphoreHandle, MImpl->MWaitValue, std::move(CGData), + impl->MInteropSemaphoreHandle, impl->MWaitValue, std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::SemaphoreSignal: + case detail::CGType::SemaphoreSignal: CommandGroup.reset(new detail::CGSemaphoreSignal( - MImpl->MInteropSemaphoreHandle, MImpl->MSignalValue, std::move(CGData), + impl->MInteropSemaphoreHandle, impl->MSignalValue, std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::None: + case detail::CGType::None: if (detail::pi::trace(detail::pi::TraceLevel::PI_TRACE_ALL)) { std::cout << "WARNING: An empty command group is submitted." << std::endl; } @@ -484,9 +486,9 @@ event handler::finalize() { // Empty nodes are handled by Graph like standard nodes // For Standard mode (non-graph), // empty nodes are not sent to the scheduler to save time - if (MGraph || (MQueue && MQueue->getCommandGraph())) { + if (impl->MGraph || (MQueue && MQueue->getCommandGraph())) { CommandGroup.reset( - new detail::CG(detail::CG::None, std::move(CGData), MCodeLoc)); + new detail::CG(detail::CGType::None, std::move(impl->CGData), MCodeLoc)); } else { detail::EventImplPtr Event = std::make_shared(); MLastEvent = detail::createSyclObjFromImpl(Event); @@ -502,8 +504,8 @@ event handler::finalize() { // If there is a graph associated with the handler we are in the explicit // graph mode, so we store the CG instead of submitting it to the scheduler, // so it can be retrieved by the graph later. - if (MGraph) { - MGraphNodeCG = std::move(CommandGroup); + if (impl->MGraph) { + impl->MGraphNodeCG = std::move(CommandGroup); return detail::createSyclObjFromImpl( std::make_shared()); } @@ -522,10 +524,10 @@ event handler::finalize() { GraphImpl->MMutex); ext::oneapi::experimental::node_type NodeType = - MImpl->MUserFacingNodeType != + impl->MUserFacingNodeType != ext::oneapi::experimental::node_type::empty - ? MImpl->MUserFacingNodeType - : ext::oneapi::experimental::detail::getNodeTypeFromCG(MCGType); + ? impl->MUserFacingNodeType + : ext::oneapi::experimental::detail::getNodeTypeFromCG(getType()); // Create a new node in the graph representing this command-group if (MQueue->isInOrder()) { @@ -552,7 +554,7 @@ event handler::finalize() { NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup)); } - if (NodeImpl->MCGType == sycl::detail::CG::Barrier) { + if (NodeImpl->MCGType == sycl::detail::CGType::Barrier) { GraphImpl->setBarrierDep(MQueue, NodeImpl); } } @@ -560,20 +562,20 @@ event handler::finalize() { // Associate an event with this new node and return the event. GraphImpl->addEventForNode(GraphImpl, EventImpl, NodeImpl); - NodeImpl->MNDRangeUsed = MImpl->MNDRangeUsed; + NodeImpl->MNDRangeUsed = impl->MNDRangeUsed; return detail::createSyclObjFromImpl(EventImpl); } detail::EventImplPtr Event = detail::Scheduler::getInstance().addCG( - std::move(CommandGroup), std::move(MQueue), MImpl->MEventNeeded); + std::move(CommandGroup), std::move(MQueue), impl->MEventNeeded); MLastEvent = detail::createSyclObjFromImpl(Event); return MLastEvent; } void handler::addReduction(const std::shared_ptr &ReduObj) { - MImpl->MAuxiliaryResources.push_back(ReduObj); + impl->MAuxiliaryResources.push_back(ReduObj); } void handler::associateWithHandlerCommon(detail::AccessorImplPtr AccImpl, @@ -592,13 +594,13 @@ void handler::associateWithHandlerCommon(detail::AccessorImplPtr AccImpl, } // Add accessor to the list of requirements. if (Req->MAccessRange.size() != 0) - CGData.MRequirements.push_back(Req); + impl->CGData.MRequirements.push_back(Req); // Store copy of the accessor. - CGData.MAccStorage.push_back(std::move(AccImpl)); + impl->CGData.MAccStorage.push_back(std::move(AccImpl)); // Add an accessor to the handler list of associated accessors. // For associated accessors index does not means nothing. - MAssociatedAccesors.emplace_back(detail::kernel_param_kind_t::kind_accessor, - Req, AccTarget, /*index*/ 0); + impl->MAssociatedAccesors.emplace_back( + detail::kernel_param_kind_t::kind_accessor, Req, AccTarget, /*index*/ 0); } void handler::associateWithHandler(detail::AccessorBaseHost *AccBase, @@ -662,7 +664,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, switch (Kind) { case kernel_param_kind_t::kind_std_layout: case kernel_param_kind_t::kind_pointer: { - MArgs.emplace_back(Kind, Ptr, Size, Index + IndexShift); + addArg(Kind, Ptr, Size, Index + IndexShift); break; } case kernel_param_kind_t::kind_stream: { @@ -675,7 +677,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, detail::Requirement *GBufReq = GBufImpl.get(); addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size, IsKernelCreatedFromSource, - MNDRDesc.GlobalSize.size(), MArgs, IsESIMD); + impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD); ++IndexShift; detail::AccessorBaseHost *GOffsetBase = static_cast(&S->GlobalOffset); @@ -683,14 +685,14 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, detail::Requirement *GOffsetReq = GOfssetImpl.get(); addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size, IsKernelCreatedFromSource, - MNDRDesc.GlobalSize.size(), MArgs, IsESIMD); + impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD); ++IndexShift; detail::AccessorBaseHost *GFlushBase = static_cast(&S->GlobalFlushBuf); detail::AccessorImplPtr GFlushImpl = detail::getSyclObjImpl(*GFlushBase); detail::Requirement *GFlushReq = GFlushImpl.get(); - size_t GlobalSize = MNDRDesc.GlobalSize.size(); + size_t GlobalSize = impl->MNDRDesc.GlobalSize.size(); // If work group size wasn't set explicitly then it must be recieved // from kernel attribute or set to default values. // For now we can't get this attribute here. @@ -698,15 +700,14 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, // TODO adjust MNDRDesc when device image contains kernel's attribute if (GlobalSize == 0) { // Suppose that work group size is 1 for every dimension - GlobalSize = MNDRDesc.NumWorkGroups.size(); + GlobalSize = impl->MNDRDesc.NumWorkGroups.size(); } addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size, - IsKernelCreatedFromSource, GlobalSize, MArgs, + IsKernelCreatedFromSource, GlobalSize, impl->MArgs, IsESIMD); ++IndexShift; - MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, - &S->FlushBufferSize, sizeof(S->FlushBufferSize), - Index + IndexShift); + addArg(kernel_param_kind_t::kind_std_layout, &S->FlushBufferSize, + sizeof(S->FlushBufferSize), Index + IndexShift); break; } @@ -719,9 +720,9 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, case access::target::device: case access::target::constant_buffer: { detail::Requirement *AccImpl = static_cast(Ptr); - addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size, - IsKernelCreatedFromSource, - MNDRDesc.GlobalSize.size(), MArgs, IsESIMD); + addArgsForGlobalAccessor( + AccImpl, Index, IndexShift, Size, IsKernelCreatedFromSource, + impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD); break; } case access::target::local: { @@ -736,7 +737,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, // Some backends do not accept zero-sized local memory arguments, so we // make it a minimum allocation of 1 byte. SizeInBytes = std::max(SizeInBytes, 1); - MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr, + impl->MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr, SizeInBytes, Index + IndexShift); // TODO ESIMD currently does not suport MSize field passing yet // accessor::init for ESIMD-mode accessor has a single field, translated @@ -744,21 +745,21 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, if (!IsESIMD && !IsKernelCreatedFromSource) { ++IndexShift; const size_t SizeAccField = (Dims == 0 ? 1 : Dims) * sizeof(Size[0]); - MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size, - SizeAccField, Index + IndexShift); + addArg(kernel_param_kind_t::kind_std_layout, &Size, SizeAccField, + Index + IndexShift); ++IndexShift; - MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size, - SizeAccField, Index + IndexShift); + addArg(kernel_param_kind_t::kind_std_layout, &Size, SizeAccField, + Index + IndexShift); ++IndexShift; - MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size, - SizeAccField, Index + IndexShift); + addArg(kernel_param_kind_t::kind_std_layout, &Size, SizeAccField, + Index + IndexShift); } break; } case access::target::image: case access::target::image_array: { detail::Requirement *AccImpl = static_cast(Ptr); - MArgs.emplace_back(Kind, AccImpl, Size, Index + IndexShift); + addArg(Kind, AccImpl, Size, Index + IndexShift); if (!IsKernelCreatedFromSource) { // TODO Handle additional kernel arguments for image class // if the compiler front-end adds them. @@ -776,14 +777,13 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, break; } case kernel_param_kind_t::kind_sampler: { - MArgs.emplace_back(kernel_param_kind_t::kind_sampler, Ptr, sizeof(sampler), - Index + IndexShift); + addArg(kernel_param_kind_t::kind_sampler, Ptr, sizeof(sampler), + Index + IndexShift); break; } case kernel_param_kind_t::kind_specialization_constants_buffer: { - MArgs.emplace_back( - kernel_param_kind_t::kind_specialization_constants_buffer, Ptr, Size, - Index + IndexShift); + addArg(kernel_param_kind_t::kind_specialization_constants_buffer, Ptr, Size, + Index + IndexShift); break; } case kernel_param_kind_t::kind_invalid: @@ -805,8 +805,8 @@ inline constexpr size_t MaxNumAdditionalArgs = 13; void handler::extractArgsAndReqs() { assert(MKernel && "MKernel is not initialized"); - std::vector UnPreparedArgs = std::move(MArgs); - MArgs.clear(); + std::vector UnPreparedArgs = std::move(impl->MArgs); + clearArgs(); std::sort( UnPreparedArgs.begin(), UnPreparedArgs.end(), @@ -815,7 +815,7 @@ void handler::extractArgsAndReqs() { }); const bool IsKernelCreatedFromSource = MKernel->isCreatedFromSource(); - MArgs.reserve(MaxNumAdditionalArgs * UnPreparedArgs.size()); + impl->MArgs.reserve(MaxNumAdditionalArgs * UnPreparedArgs.size()); size_t IndexShift = 0; for (size_t I = 0; I < UnPreparedArgs.size(); ++I) { @@ -833,7 +833,7 @@ void handler::extractArgsAndReqsFromLambda( const detail::kernel_param_desc_t *KernelArgs, bool IsESIMD) { const bool IsKernelCreatedFromSource = false; size_t IndexShift = 0; - MArgs.reserve(MaxNumAdditionalArgs * KernelArgsNum); + impl->MArgs.reserve(MaxNumAdditionalArgs * KernelArgsNum); for (size_t I = 0; I < KernelArgsNum; ++I) { void *Ptr = LambdaPtr + KernelArgs[I].offset; @@ -876,12 +876,12 @@ void handler::verifyUsedKernelBundleInternal(detail::string_view KernelName) { return; // Implicit kernel bundles are populated late so we ignore them - if (!MImpl->isStateExplicitKernelBundle()) + if (!impl->isStateExplicitKernelBundle()) return; kernel_id KernelID = detail::get_kernel_id_impl(KernelName); device Dev = - MGraph ? MGraph->getDevice() : detail::getDeviceFromHandler(*this); + impl->MGraph ? impl->MGraph->getDevice() : detail::getDeviceFromHandler(*this); if (!UsedKernelBundleImplPtr->has_kernel(KernelID, Dev)) throw sycl::exception( make_error_code(errc::kernel_not_supported), @@ -890,8 +890,8 @@ void handler::verifyUsedKernelBundleInternal(detail::string_view KernelName) { void handler::ext_oneapi_barrier(const std::vector &WaitList) { throwIfActionIsCreated(); - MCGType = detail::CG::BarrierWaitlist; - MEventsWaitWithBarrier.reserve(WaitList.size()); + setType(detail::CGType::BarrierWaitlist); + impl->MEventsWaitWithBarrier.reserve(WaitList.size()); for (auto &Event : WaitList) { auto EventImpl = detail::getSyclObjImpl(Event); // We could not wait for host task events in backend. @@ -899,7 +899,7 @@ void handler::ext_oneapi_barrier(const std::vector &WaitList) { if (EventImpl->isHost()) { depends_on(EventImpl); } - MEventsWaitWithBarrier.push_back(EventImpl); + impl->MEventsWaitWithBarrier.push_back(EventImpl); } } @@ -923,7 +923,7 @@ void handler::memcpy(void *Dest, const void *Src, size_t Count) { MSrcPtr = const_cast(Src); MDstPtr = Dest; MLength = Count; - setType(detail::CG::CopyUSM); + setType(detail::CGType::CopyUSM); } void handler::memset(void *Dest, int Value, size_t Count) { @@ -932,22 +932,22 @@ void handler::memset(void *Dest, int Value, size_t Count) { MPattern.push_back(static_cast(Value)); MLength = Count; setUserFacingNodeType(ext::oneapi::experimental::node_type::memset); - setType(detail::CG::FillUSM); + setType(detail::CGType::FillUSM); } void handler::prefetch(const void *Ptr, size_t Count) { throwIfActionIsCreated(); MDstPtr = const_cast(Ptr); MLength = Count; - setType(detail::CG::PrefetchUSM); + setType(detail::CGType::PrefetchUSM); } void handler::mem_advise(const void *Ptr, size_t Count, int Advice) { throwIfActionIsCreated(); MDstPtr = const_cast(Ptr); MLength = Count; - MImpl->MAdvice = static_cast(Advice); - setType(detail::CG::AdviseUSM); + impl->MAdvice = static_cast(Advice); + setType(detail::CGType::AdviseUSM); } void handler::fill_impl(void *Dest, const void *Value, size_t ValueSize, @@ -956,7 +956,7 @@ void handler::fill_impl(void *Dest, const void *Value, size_t ValueSize, MPattern.resize(ValueSize); std::memcpy(MPattern.data(), Value, ValueSize); MLength = Count * ValueSize; - setType(detail::CG::FillUSM); + setType(detail::CGType::FillUSM); } void handler::ext_oneapi_memcpy2d_impl(void *Dest, size_t DestPitch, @@ -965,11 +965,11 @@ void handler::ext_oneapi_memcpy2d_impl(void *Dest, size_t DestPitch, // Checks done in callers. MSrcPtr = const_cast(Src); MDstPtr = Dest; - MImpl->MSrcPitch = SrcPitch; - MImpl->MDstPitch = DestPitch; - MImpl->MWidth = Width; - MImpl->MHeight = Height; - setType(detail::CG::Copy2DUSM); + impl->MSrcPitch = SrcPitch; + impl->MDstPitch = DestPitch; + impl->MWidth = Width; + impl->MHeight = Height; + setType(detail::CGType::Copy2DUSM); } void handler::ext_oneapi_fill2d_impl(void *Dest, size_t DestPitch, @@ -979,10 +979,10 @@ void handler::ext_oneapi_fill2d_impl(void *Dest, size_t DestPitch, MDstPtr = Dest; MPattern.resize(ValueSize); std::memcpy(MPattern.data(), Value, ValueSize); - MImpl->MDstPitch = DestPitch; - MImpl->MWidth = Width; - MImpl->MHeight = Height; - setType(detail::CG::Fill2DUSM); + impl->MDstPitch = DestPitch; + impl->MWidth = Width; + impl->MHeight = Height; + setType(detail::CGType::Fill2DUSM); } void handler::ext_oneapi_memset2d_impl(void *Dest, size_t DestPitch, int Value, @@ -990,10 +990,10 @@ void handler::ext_oneapi_memset2d_impl(void *Dest, size_t DestPitch, int Value, // Checks done in callers. MDstPtr = Dest; MPattern.push_back(static_cast(Value)); - MImpl->MDstPitch = DestPitch; - MImpl->MWidth = Width; - MImpl->MHeight = Height; - setType(detail::CG::Memset2DUSM); + impl->MDstPitch = DestPitch; + impl->MWidth = Width; + impl->MHeight = Height; + setType(detail::CGType::Memset2DUSM); } void handler::ext_oneapi_copy( @@ -1037,15 +1037,15 @@ void handler::ext_oneapi_copy( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(Desc.num_channels)); - MImpl->MSrcOffset = {0, 0, 0}; - MImpl->MDestOffset = {0, 0, 0}; - MImpl->MCopyExtent = {Desc.width, Desc.height, Desc.depth}; - MImpl->MHostExtent = {Desc.width, Desc.height, Desc.depth}; - MImpl->MImageDesc = PiDesc; - MImpl->MImageFormat = PiFormat; - MImpl->MImageCopyFlags = + impl->MSrcOffset = {0, 0, 0}; + impl->MDestOffset = {0, 0, 0}; + impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth}; + impl->MHostExtent = {Desc.width, Desc.height, Desc.depth}; + impl->MImageDesc = PiDesc; + impl->MImageFormat = PiFormat; + impl->MImageCopyFlags = sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_HOST_TO_DEVICE; - setType(detail::CG::CopyImage); + setType(detail::CGType::CopyImage); } void handler::ext_oneapi_copy( @@ -1091,15 +1091,15 @@ void handler::ext_oneapi_copy( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(DestImgDesc.num_channels)); - MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; - MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; - MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; - MImpl->MHostExtent = {SrcExtent[0], SrcExtent[1], SrcExtent[2]}; - MImpl->MImageDesc = PiDesc; - MImpl->MImageFormat = PiFormat; - MImpl->MImageCopyFlags = + impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; + impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; + impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; + impl->MHostExtent = {SrcExtent[0], SrcExtent[1], SrcExtent[2]}; + impl->MImageDesc = PiDesc; + impl->MImageFormat = PiFormat; + impl->MImageCopyFlags = sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_HOST_TO_DEVICE; - setType(detail::CG::CopyImage); + setType(detail::CGType::CopyImage); } void handler::ext_oneapi_copy( @@ -1143,15 +1143,15 @@ void handler::ext_oneapi_copy( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(Desc.num_channels)); - MImpl->MSrcOffset = {0, 0, 0}; - MImpl->MDestOffset = {0, 0, 0}; - MImpl->MCopyExtent = {Desc.width, Desc.height, Desc.depth}; - MImpl->MHostExtent = {Desc.width, Desc.height, Desc.depth}; - MImpl->MImageDesc = PiDesc; - MImpl->MImageFormat = PiFormat; - MImpl->MImageCopyFlags = + impl->MSrcOffset = {0, 0, 0}; + impl->MDestOffset = {0, 0, 0}; + impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth}; + impl->MHostExtent = {Desc.width, Desc.height, Desc.depth}; + impl->MImageDesc = PiDesc; + impl->MImageFormat = PiFormat; + impl->MImageCopyFlags = sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_DEVICE_TO_HOST; - setType(detail::CG::CopyImage); + setType(detail::CGType::CopyImage); } void handler::ext_oneapi_copy( @@ -1195,15 +1195,15 @@ void handler::ext_oneapi_copy( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(ImageDesc.num_channels)); - MImpl->MSrcOffset = {0, 0, 0}; - MImpl->MDestOffset = {0, 0, 0}; - MImpl->MCopyExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth}; - MImpl->MHostExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth}; - MImpl->MImageDesc = PiDesc; - MImpl->MImageFormat = PiFormat; - MImpl->MImageCopyFlags = + impl->MSrcOffset = {0, 0, 0}; + impl->MDestOffset = {0, 0, 0}; + impl->MCopyExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth}; + impl->MHostExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth}; + impl->MImageDesc = PiDesc; + impl->MImageFormat = PiFormat; + impl->MImageCopyFlags = sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_DEVICE_TO_DEVICE; - setType(detail::CG::CopyImage); + setType(detail::CGType::CopyImage); } void handler::ext_oneapi_copy( @@ -1249,15 +1249,15 @@ void handler::ext_oneapi_copy( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(SrcImgDesc.num_channels)); - MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; - MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; - MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; - MImpl->MHostExtent = {DestExtent[0], DestExtent[1], DestExtent[2]}; - MImpl->MImageDesc = PiDesc; - MImpl->MImageFormat = PiFormat; - MImpl->MImageCopyFlags = + impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; + impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; + impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; + impl->MHostExtent = {DestExtent[0], DestExtent[1], DestExtent[2]}; + impl->MImageDesc = PiDesc; + impl->MImageFormat = PiFormat; + impl->MImageCopyFlags = sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_DEVICE_TO_HOST; - setType(detail::CG::CopyImage); + setType(detail::CGType::CopyImage); } void handler::ext_oneapi_copy( @@ -1301,17 +1301,17 @@ void handler::ext_oneapi_copy( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(Desc.num_channels)); - MImpl->MSrcOffset = {0, 0, 0}; - MImpl->MDestOffset = {0, 0, 0}; - MImpl->MCopyExtent = {Desc.width, Desc.height, Desc.depth}; - MImpl->MHostExtent = {Desc.width, Desc.height, Desc.depth}; - MImpl->MImageDesc = PiDesc; - MImpl->MImageDesc.image_row_pitch = Pitch; - MImpl->MImageFormat = PiFormat; - MImpl->MImageCopyFlags = detail::getPiImageCopyFlags( + impl->MSrcOffset = {0, 0, 0}; + impl->MDestOffset = {0, 0, 0}; + impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth}; + impl->MHostExtent = {Desc.width, Desc.height, Desc.depth}; + impl->MImageDesc = PiDesc; + impl->MImageDesc.image_row_pitch = Pitch; + impl->MImageFormat = PiFormat; + impl->MImageCopyFlags = detail::getPiImageCopyFlags( get_pointer_type(Src, MQueue->get_context()), get_pointer_type(Dest, MQueue->get_context())); - setType(detail::CG::CopyImage); + setType(detail::CGType::CopyImage); } void handler::ext_oneapi_copy( @@ -1358,17 +1358,17 @@ void handler::ext_oneapi_copy( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(DeviceImgDesc.num_channels)); - MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; - MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; - MImpl->MHostExtent = {HostExtent[0], HostExtent[1], HostExtent[2]}; - MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; - MImpl->MImageDesc = PiDesc; - MImpl->MImageDesc.image_row_pitch = DeviceRowPitch; - MImpl->MImageFormat = PiFormat; - MImpl->MImageCopyFlags = detail::getPiImageCopyFlags( + impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; + impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; + impl->MHostExtent = {HostExtent[0], HostExtent[1], HostExtent[2]}; + impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; + impl->MImageDesc = PiDesc; + impl->MImageDesc.image_row_pitch = DeviceRowPitch; + impl->MImageFormat = PiFormat; + impl->MImageCopyFlags = detail::getPiImageCopyFlags( get_pointer_type(Src, MQueue->get_context()), get_pointer_type(Dest, MQueue->get_context())); - setType(detail::CG::CopyImage); + setType(detail::CGType::CopyImage); } void handler::ext_oneapi_wait_external_semaphore( @@ -1387,10 +1387,10 @@ void handler::ext_oneapi_wait_external_semaphore( "Invalid type of semaphore for this operation. The " "type of semaphore used needs a user passed wait value."); } - MImpl->MInteropSemaphoreHandle = + impl->MInteropSemaphoreHandle = (sycl::detail::pi::PiInteropSemaphoreHandle)SemaphoreHandle.raw_handle; - MImpl->MWaitValue = {}; - setType(detail::CG::SemaphoreWait); + impl->MWaitValue = {}; + setType(detail::CGType::SemaphoreWait); } void handler::ext_oneapi_wait_external_semaphore( @@ -1407,10 +1407,10 @@ void handler::ext_oneapi_wait_external_semaphore( "Invalid type of semaphore for this operation. The " "type of semaphore does not support user passed wait values."); } - MImpl->MInteropSemaphoreHandle = + impl->MInteropSemaphoreHandle = (sycl::detail::pi::PiInteropSemaphoreHandle)SemaphoreHandle.raw_handle; - MImpl->MWaitValue = WaitValue; - setType(detail::CG::SemaphoreWait); + impl->MWaitValue = WaitValue; + setType(detail::CGType::SemaphoreWait); } void handler::ext_oneapi_signal_external_semaphore( @@ -1429,10 +1429,10 @@ void handler::ext_oneapi_signal_external_semaphore( "Invalid type of semaphore for this operation. The " "type of semaphore used needs a user passed signal value."); } - MImpl->MInteropSemaphoreHandle = + impl->MInteropSemaphoreHandle = (sycl::detail::pi::PiInteropSemaphoreHandle)SemaphoreHandle.raw_handle; - MImpl->MSignalValue = {}; - setType(detail::CG::SemaphoreSignal); + impl->MSignalValue = {}; + setType(detail::CGType::SemaphoreSignal); } void handler::ext_oneapi_signal_external_semaphore( @@ -1449,25 +1449,25 @@ void handler::ext_oneapi_signal_external_semaphore( "Invalid type of semaphore for this operation. The " "type of semaphore does not support user passed signal values."); } - MImpl->MInteropSemaphoreHandle = + impl->MInteropSemaphoreHandle = (sycl::detail::pi::PiInteropSemaphoreHandle)SemaphoreHandle.raw_handle; - MImpl->MSignalValue = SignalValue; - setType(detail::CG::SemaphoreSignal); + impl->MSignalValue = SignalValue; + setType(detail::CGType::SemaphoreSignal); } void handler::use_kernel_bundle( const kernel_bundle &ExecBundle) { std::shared_ptr PrimaryQueue = - MImpl->MSubmissionPrimaryQueue; - if ((!MGraph && (PrimaryQueue->get_context() != ExecBundle.get_context())) || - (MGraph && (MGraph->getContext() != ExecBundle.get_context()))) + impl->MSubmissionPrimaryQueue; + if ((!impl->MGraph && (PrimaryQueue->get_context() != ExecBundle.get_context())) || + (impl->MGraph && (impl->MGraph->getContext() != ExecBundle.get_context()))) throw sycl::exception( make_error_code(errc::invalid), "Context associated with the primary queue is different from the " "context associated with the kernel bundle"); std::shared_ptr SecondaryQueue = - MImpl->MSubmissionSecondaryQueue; + impl->MSubmissionSecondaryQueue; if (SecondaryQueue && SecondaryQueue->get_context() != ExecBundle.get_context()) throw sycl::exception( @@ -1510,7 +1510,7 @@ void handler::depends_on(const detail::EventImplPtr &EventImpl) { "Graph nodes cannot depend on events from another graph."); } } - CGData.MEvents.push_back(EventImpl); + impl->CGData.MEvents.push_back(EventImpl); } void handler::depends_on(const std::vector &Events) { @@ -1579,7 +1579,7 @@ void handler::verifyDeviceHasProgressGuarantee( bool handler::supportsUSMMemcpy2D() { for (const std::shared_ptr &QueueImpl : - {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) { + {impl->MSubmissionPrimaryQueue, impl->MSubmissionSecondaryQueue}) { if (QueueImpl && !checkContextSupports(QueueImpl->getContextImplPtr(), PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT)) @@ -1590,7 +1590,7 @@ bool handler::supportsUSMMemcpy2D() { bool handler::supportsUSMFill2D() { for (const std::shared_ptr &QueueImpl : - {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) { + {impl->MSubmissionPrimaryQueue, impl->MSubmissionSecondaryQueue}) { if (QueueImpl && !checkContextSupports(QueueImpl->getContextImplPtr(), PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT)) @@ -1601,7 +1601,7 @@ bool handler::supportsUSMFill2D() { bool handler::supportsUSMMemset2D() { for (const std::shared_ptr &QueueImpl : - {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) { + {impl->MSubmissionPrimaryQueue, impl->MSubmissionSecondaryQueue}) { if (QueueImpl && !checkContextSupports(QueueImpl->getContextImplPtr(), PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT)) @@ -1618,30 +1618,30 @@ id<2> handler::computeFallbackKernelBounds(size_t Width, size_t Height) { } backend handler::getDeviceBackend() const { - if (MGraph) - return MGraph->getDevice().get_backend(); + if (impl->MGraph) + return impl->MGraph->getDevice().get_backend(); else return MQueue->getDeviceImplPtr()->getBackend(); } void handler::ext_intel_read_host_pipe(detail::string_view Name, void *Ptr, size_t Size, bool Block) { - MImpl->HostPipeName = Name.data(); - MImpl->HostPipePtr = Ptr; - MImpl->HostPipeTypeSize = Size; - MImpl->HostPipeBlocking = Block; - MImpl->HostPipeRead = 1; - setType(detail::CG::ReadWriteHostPipe); + impl->HostPipeName = Name.data(); + impl->HostPipePtr = Ptr; + impl->HostPipeTypeSize = Size; + impl->HostPipeBlocking = Block; + impl->HostPipeRead = 1; + setType(detail::CGType::ReadWriteHostPipe); } void handler::ext_intel_write_host_pipe(detail::string_view Name, void *Ptr, size_t Size, bool Block) { - MImpl->HostPipeName = Name.data(); - MImpl->HostPipePtr = Ptr; - MImpl->HostPipeTypeSize = Size; - MImpl->HostPipeBlocking = Block; - MImpl->HostPipeRead = 0; - setType(detail::CG::ReadWriteHostPipe); + impl->HostPipeName = Name.data(); + impl->HostPipePtr = Ptr; + impl->HostPipeTypeSize = Size; + impl->HostPipeBlocking = Block; + impl->HostPipeRead = 0; + setType(detail::CGType::ReadWriteHostPipe); } void handler::memcpyToDeviceGlobal(const void *DeviceGlobalPtr, const void *Src, @@ -1650,10 +1650,10 @@ void handler::memcpyToDeviceGlobal(const void *DeviceGlobalPtr, const void *Src, throwIfActionIsCreated(); MSrcPtr = const_cast(Src); MDstPtr = const_cast(DeviceGlobalPtr); - MImpl->MIsDeviceImageScoped = IsDeviceImageScoped; + impl->MIsDeviceImageScoped = IsDeviceImageScoped; MLength = NumBytes; - MImpl->MOffset = Offset; - setType(detail::CG::CopyToDeviceGlobal); + impl->MOffset = Offset; + setType(detail::CGType::CopyToDeviceGlobal); } void handler::memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr, @@ -1662,10 +1662,10 @@ void handler::memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr, throwIfActionIsCreated(); MSrcPtr = const_cast(DeviceGlobalPtr); MDstPtr = Dest; - MImpl->MIsDeviceImageScoped = IsDeviceImageScoped; + impl->MIsDeviceImageScoped = IsDeviceImageScoped; MLength = NumBytes; - MImpl->MOffset = Offset; - setType(detail::CG::CopyFromDeviceGlobal); + impl->MOffset = Offset; + setType(detail::CGType::CopyFromDeviceGlobal); } void handler::memcpyToHostOnlyDeviceGlobal(const void *DeviceGlobalPtr, @@ -1715,38 +1715,39 @@ handler::getContextImplPtr() const { void handler::setKernelCacheConfig( sycl::detail::pi::PiKernelCacheConfig Config) { - MImpl->MKernelCacheConfig = Config; + impl->MKernelCacheConfig = Config; } void handler::setKernelIsCooperative(bool KernelIsCooperative) { - MImpl->MKernelIsCooperative = KernelIsCooperative; + impl->MKernelIsCooperative = KernelIsCooperative; } -void handler::setKernelUsesClusterLaunch() { +void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_experimental_cuda_cluster_launch>(); - MImpl->MKernelUsesClusterLaunch = true; + impl->MKernelUsesClusterLaunch = true; + impl->MNDRDesc.setClusterDimensions(ClusterSize, Dims); } void handler::ext_oneapi_graph( ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable> Graph) { - MCGType = detail::CG::ExecCommandBuffer; - MExecGraph = detail::getSyclObjImpl(Graph); + setType(detail::CGType::ExecCommandBuffer); + impl->MExecGraph = detail::getSyclObjImpl(Graph); } std::shared_ptr handler::getCommandGraph() const { - if (MGraph) { - return MGraph; + if (impl->MGraph) { + return impl->MGraph; } return MQueue->getCommandGraph(); } void handler::setUserFacingNodeType(ext::oneapi::experimental::node_type Type) { - MImpl->MUserFacingNodeType = Type; + impl->MUserFacingNodeType = Type; } std::optional> handler::getMaxWorkGroups() { @@ -1770,7 +1771,7 @@ std::tuple, bool> handler::getMaxWorkGroups_v2() { return {std::array{0, 0, 0}, false}; } -void handler::setNDRangeUsed(bool Value) { MImpl->MNDRangeUsed = Value; } +void handler::setNDRangeUsed(bool Value) { impl->MNDRangeUsed = Value; } void handler::registerDynamicParameter( ext::oneapi::experimental::detail::dynamic_parameter_base &DynamicParamBase, @@ -1780,22 +1781,94 @@ void handler::registerDynamicParameter( make_error_code(errc::invalid), "Dynamic Parameters cannot be used with Graph Queue recording."); } - if (!MGraph) { + if (!impl->MGraph) { throw sycl::exception( make_error_code(errc::invalid), "Dynamic Parameters cannot be used with normal SYCL submissions"); } - auto ParamImpl = detail::getSyclObjImpl(DynamicParamBase); - if (ParamImpl->MGraph != this->MGraph) { + auto Paraimpl = detail::getSyclObjImpl(DynamicParamBase); + if (Paraimpl->MGraph != this->impl->MGraph) { throw sycl::exception( make_error_code(errc::invalid), "Cannot use a Dynamic Parameter with a node associated with a graph " "other than the one it was created with."); } - MImpl->MDynamicParameters.emplace_back(ParamImpl.get(), ArgIndex); + impl->MDynamicParameters.emplace_back(Paraimpl.get(), ArgIndex); +} + +bool handler::eventNeeded() const { return impl->MEventNeeded; } + +void *handler::storeRawArg(const void *Ptr, size_t Size) { + impl->CGData.MArgsStorage.emplace_back(Size); + void *Storage = static_cast(impl->CGData.MArgsStorage.back().data()); + std::memcpy(Storage, Ptr, Size); + return Storage; +} + +void handler::SetHostTask(std::function &&Func) { + setNDRangeDescriptor(range<1>(1)); + impl->MHostTask.reset(new detail::HostTask(std::move(Func))); + setType(detail::CGType::CodeplayHostTask); +} + +void handler::SetHostTask(std::function &&Func) { + setNDRangeDescriptor(range<1>(1)); + impl->MHostTask.reset(new detail::HostTask(std::move(Func))); + setType(detail::CGType::CodeplayHostTask); +} + +void handler::addAccessorReq(detail::AccessorImplPtr Accessor) { + // Add accessor to the list of requirements. + impl->CGData.MRequirements.push_back(Accessor.get()); + // Store copy of the accessor. + impl->CGData.MAccStorage.push_back(std::move(Accessor)); +} + +void handler::addLifetimeSharedPtrStorage(std::shared_ptr SPtr) { + impl->CGData.MSharedPtrStorage.push_back(std::move(SPtr)); +} + +void handler::addArg(detail::kernel_param_kind_t ArgKind, void *Req, + int AccessTarget, int ArgIndex) { + impl->MArgs.emplace_back(ArgKind, Req, AccessTarget, ArgIndex); +} + +void handler::clearArgs() { + impl->MArgs.clear(); +} + +void handler::setArgsToAssociatedAccessors() { + impl->MArgs = impl->MAssociatedAccesors; +} + +bool handler::HasAssociatedAccessor(detail::AccessorImplHost *Req, + access::target AccessTarget) const { + return std::find_if( + impl->MAssociatedAccesors.cbegin(), + impl->MAssociatedAccesors.cend(), [&](const detail::ArgDesc &AD) { + return AD.MType == detail::kernel_param_kind_t::kind_accessor && + AD.MPtr == Req && + AD.MSize == static_cast(AccessTarget); + }) == impl->MAssociatedAccesors.end(); +} + +void handler::setType(sycl::detail::CGType Type) { impl->MCGType = Type; } +sycl::detail::CGType handler::getType() const { return impl->MCGType; } + +void handler::setNDRangeDescriptorPadded(sycl::range<3> N, + bool SetNumWorkGroups, int Dims) { + impl->MNDRDesc = NDRDescT{N, SetNumWorkGroups, Dims}; +} +void handler::setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, + sycl::id<3> Offset, int Dims) { + impl->MNDRDesc = NDRDescT{NumWorkItems, Offset, Dims}; +} +void handler::setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, + sycl::range<3> LocalSize, sycl::id<3> Offset, + int Dims) { + impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset, Dims}; } -bool handler::eventNeeded() const { return MImpl->MEventNeeded; } } // namespace _V1 } // namespace sycl diff --git a/sycl/test-e2e/AsyncHandler/default_async_handler.cpp b/sycl/test-e2e/AsyncHandler/default_async_handler.cpp index b54ed7b1e8f9..7e6a333e305d 100644 --- a/sycl/test-e2e/AsyncHandler/default_async_handler.cpp +++ b/sycl/test-e2e/AsyncHandler/default_async_handler.cpp @@ -2,7 +2,6 @@ // RUN: %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt #include -#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/accessor/Inputs/host_task_accessor.cpp b/sycl/test-e2e/Basic/accessor/Inputs/host_task_accessor.cpp index dc384c049e83..1a8300eae488 100644 --- a/sycl/test-e2e/Basic/accessor/Inputs/host_task_accessor.cpp +++ b/sycl/test-e2e/Basic/accessor/Inputs/host_task_accessor.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include #include -#include #if !defined(accessor_new_api_test) && !defined(buffer_new_api_test) && \ !defined(accessor_placeholder_new_api_test) && \ diff --git a/sycl/test-e2e/Basic/accessor/accessor.cpp b/sycl/test-e2e/Basic/accessor/accessor.cpp index 0846a4fdc6a7..de944ba12754 100644 --- a/sycl/test-e2e/Basic/accessor/accessor.cpp +++ b/sycl/test-e2e/Basic/accessor/accessor.cpp @@ -11,7 +11,6 @@ #include #include #include -#include struct IdxID1 { int x; diff --git a/sycl/test-e2e/Basic/accessor/empty_acc_host_task.cpp b/sycl/test-e2e/Basic/accessor/empty_acc_host_task.cpp index ed282aa84cf6..b8e91547b83c 100644 --- a/sycl/test-e2e/Basic/accessor/empty_acc_host_task.cpp +++ b/sycl/test-e2e/Basic/accessor/empty_acc_host_task.cpp @@ -1,7 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out #include -#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/Basic/event.cpp b/sycl/test-e2e/Basic/event.cpp index d5cba1063f07..513e1aa8d630 100644 --- a/sycl/test-e2e/Basic/event.cpp +++ b/sycl/test-e2e/Basic/event.cpp @@ -10,7 +10,6 @@ //===----------------------------------------------------------------------===// #include #include -#include int main() { { diff --git a/sycl/test-e2e/Basic/host-task-dependency.cpp b/sycl/test-e2e/Basic/host-task-dependency.cpp index 7f4f31320f1e..8185eeac6658 100644 --- a/sycl/test-e2e/Basic/host-task-dependency.cpp +++ b/sycl/test-e2e/Basic/host-task-dependency.cpp @@ -8,7 +8,6 @@ #define SYCL2020_DISABLE_DEPRECATION_WARNINGS #include -#include #include #include diff --git a/sycl/test-e2e/Basic/host_task_depends.cpp b/sycl/test-e2e/Basic/host_task_depends.cpp index 746062ffce31..e59acd8a701d 100644 --- a/sycl/test-e2e/Basic/host_task_depends.cpp +++ b/sycl/test-e2e/Basic/host_task_depends.cpp @@ -3,7 +3,6 @@ // RUN: %{run} %t.out #include -#include using namespace sycl; using namespace sycl::access; diff --git a/sycl/test-e2e/Basic/out_of_order_queue_status.cpp b/sycl/test-e2e/Basic/out_of_order_queue_status.cpp index a7f6319892bc..67929ac0fc66 100644 --- a/sycl/test-e2e/Basic/out_of_order_queue_status.cpp +++ b/sycl/test-e2e/Basic/out_of_order_queue_status.cpp @@ -5,7 +5,6 @@ // queue. #include -#include #include #include diff --git a/sycl/test-e2e/Basic/profile_host_task.cpp b/sycl/test-e2e/Basic/profile_host_task.cpp index 61fa74539739..d3512fc13d55 100644 --- a/sycl/test-e2e/Basic/profile_host_task.cpp +++ b/sycl/test-e2e/Basic/profile_host_task.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include int main() { diff --git a/sycl/test-e2e/Basic/sycl_2020_images/host_task_sampled_image_read_linear.cpp b/sycl/test-e2e/Basic/sycl_2020_images/host_task_sampled_image_read_linear.cpp index 039d1a008b45..bb87ffeb443a 100644 --- a/sycl/test-e2e/Basic/sycl_2020_images/host_task_sampled_image_read_linear.cpp +++ b/sycl/test-e2e/Basic/sycl_2020_images/host_task_sampled_image_read_linear.cpp @@ -7,8 +7,6 @@ #include "common.hpp" -#include - constexpr size_t IMAGE_WIDTH = 5; constexpr size_t IMAGE_HEIGHT = 4; constexpr size_t IMAGE_DEPTH = 2; diff --git a/sycl/test-e2e/Basic/sycl_2020_images/host_task_sampled_image_read_nearest.cpp b/sycl/test-e2e/Basic/sycl_2020_images/host_task_sampled_image_read_nearest.cpp index fefd50ca52cd..d69f0db910bf 100644 --- a/sycl/test-e2e/Basic/sycl_2020_images/host_task_sampled_image_read_nearest.cpp +++ b/sycl/test-e2e/Basic/sycl_2020_images/host_task_sampled_image_read_nearest.cpp @@ -7,8 +7,6 @@ #include "common.hpp" -#include - constexpr size_t IMAGE_WIDTH = 5; constexpr size_t IMAGE_HEIGHT = 4; constexpr size_t IMAGE_DEPTH = 2; diff --git a/sycl/test-e2e/Basic/sycl_2020_images/host_task_unsampled_image_read.cpp b/sycl/test-e2e/Basic/sycl_2020_images/host_task_unsampled_image_read.cpp index c2121346a50b..99774f29cc83 100644 --- a/sycl/test-e2e/Basic/sycl_2020_images/host_task_unsampled_image_read.cpp +++ b/sycl/test-e2e/Basic/sycl_2020_images/host_task_unsampled_image_read.cpp @@ -6,8 +6,6 @@ #include "common.hpp" -#include - constexpr size_t IMAGE_WIDTH = 5; constexpr size_t IMAGE_HEIGHT = 4; constexpr size_t IMAGE_DEPTH = 2; diff --git a/sycl/test-e2e/Basic/sycl_2020_images/host_task_unsampled_image_write.cpp b/sycl/test-e2e/Basic/sycl_2020_images/host_task_unsampled_image_write.cpp index 14bbca6d0411..2fd1f31026e1 100644 --- a/sycl/test-e2e/Basic/sycl_2020_images/host_task_unsampled_image_write.cpp +++ b/sycl/test-e2e/Basic/sycl_2020_images/host_task_unsampled_image_write.cpp @@ -6,8 +6,6 @@ #include "common.hpp" -#include - constexpr size_t IMAGE_WIDTH = 5; constexpr size_t IMAGE_HEIGHT = 4; constexpr size_t IMAGE_DEPTH = 2; diff --git a/sycl/test-e2e/Graph/Inputs/host_task.cpp b/sycl/test-e2e/Graph/Inputs/host_task.cpp index 8d1c976b1acb..8ec19d770dbc 100644 --- a/sycl/test-e2e/Graph/Inputs/host_task.cpp +++ b/sycl/test-e2e/Graph/Inputs/host_task.cpp @@ -2,8 +2,6 @@ #include "../graph_common.hpp" -#include - int main() { queue Queue{}; diff --git a/sycl/test-e2e/Graph/Inputs/host_task2.cpp b/sycl/test-e2e/Graph/Inputs/host_task2.cpp index e49d97f36c85..6fddddf489b3 100644 --- a/sycl/test-e2e/Graph/Inputs/host_task2.cpp +++ b/sycl/test-e2e/Graph/Inputs/host_task2.cpp @@ -2,8 +2,6 @@ #include "../graph_common.hpp" -#include - int main() { queue Queue{}; diff --git a/sycl/test-e2e/Graph/Inputs/host_task2_multiple_roots.cpp b/sycl/test-e2e/Graph/Inputs/host_task2_multiple_roots.cpp index 369b51cbdb9c..c4d03e3be9e4 100644 --- a/sycl/test-e2e/Graph/Inputs/host_task2_multiple_roots.cpp +++ b/sycl/test-e2e/Graph/Inputs/host_task2_multiple_roots.cpp @@ -3,8 +3,6 @@ #include "../graph_common.hpp" -#include - int main() { queue Queue{}; diff --git a/sycl/test-e2e/Graph/Inputs/host_task_last.cpp b/sycl/test-e2e/Graph/Inputs/host_task_last.cpp index 572087cb5412..07cc7f1dc61a 100644 --- a/sycl/test-e2e/Graph/Inputs/host_task_last.cpp +++ b/sycl/test-e2e/Graph/Inputs/host_task_last.cpp @@ -2,8 +2,6 @@ #include "../graph_common.hpp" -#include - int main() { queue Queue{}; diff --git a/sycl/test-e2e/Graph/Inputs/host_task_multiple_deps.cpp b/sycl/test-e2e/Graph/Inputs/host_task_multiple_deps.cpp index 8b899cb3bb31..efe084645053 100644 --- a/sycl/test-e2e/Graph/Inputs/host_task_multiple_deps.cpp +++ b/sycl/test-e2e/Graph/Inputs/host_task_multiple_deps.cpp @@ -3,8 +3,6 @@ #include "../graph_common.hpp" -#include - int main() { queue Queue{}; diff --git a/sycl/test-e2e/Graph/Inputs/host_task_multiple_roots.cpp b/sycl/test-e2e/Graph/Inputs/host_task_multiple_roots.cpp index c72fe620f73c..62dc577106e3 100644 --- a/sycl/test-e2e/Graph/Inputs/host_task_multiple_roots.cpp +++ b/sycl/test-e2e/Graph/Inputs/host_task_multiple_roots.cpp @@ -3,8 +3,6 @@ #include "../graph_common.hpp" -#include - int main() { queue Queue{}; diff --git a/sycl/test-e2e/Graph/Inputs/host_task_single.cpp b/sycl/test-e2e/Graph/Inputs/host_task_single.cpp index 28c566f23991..d87d6d1f76e5 100644 --- a/sycl/test-e2e/Graph/Inputs/host_task_single.cpp +++ b/sycl/test-e2e/Graph/Inputs/host_task_single.cpp @@ -2,8 +2,6 @@ #include "../graph_common.hpp" -#include - int main() { queue Queue{}; diff --git a/sycl/test-e2e/Graph/Inputs/host_task_successive.cpp b/sycl/test-e2e/Graph/Inputs/host_task_successive.cpp index fbe94be1be0e..1312ea1074da 100644 --- a/sycl/test-e2e/Graph/Inputs/host_task_successive.cpp +++ b/sycl/test-e2e/Graph/Inputs/host_task_successive.cpp @@ -2,8 +2,6 @@ #include "../graph_common.hpp" -#include - int main() { queue Queue{}; diff --git a/sycl/test-e2e/Graph/Inputs/interop-level-zero-get-native-mem.cpp b/sycl/test-e2e/Graph/Inputs/interop-level-zero-get-native-mem.cpp index 3c22462ac485..56ce712b8d41 100644 --- a/sycl/test-e2e/Graph/Inputs/interop-level-zero-get-native-mem.cpp +++ b/sycl/test-e2e/Graph/Inputs/interop-level-zero-get-native-mem.cpp @@ -9,7 +9,7 @@ // SYCL #include -#include +#include bool is_discrete(const device &Device) { auto ZeDevice = get_native(Device); diff --git a/sycl/test-e2e/Graph/RecordReplay/host_task_in_order.cpp b/sycl/test-e2e/Graph/RecordReplay/host_task_in_order.cpp index e35d175e5205..ac5184124b16 100644 --- a/sycl/test-e2e/Graph/RecordReplay/host_task_in_order.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/host_task_in_order.cpp @@ -12,7 +12,6 @@ #include "../graph_common.hpp" -#include #include int main() { diff --git a/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies.cpp b/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies.cpp index f2260314aa80..17d66d8b2d1d 100644 --- a/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies.cpp @@ -8,7 +8,6 @@ #include "../graph_common.hpp" -#include #include int main() { diff --git a/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies_memcpy.cpp b/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies_memcpy.cpp index ee7b8c215f39..cd19c85add1f 100644 --- a/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies_memcpy.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies_memcpy.cpp @@ -8,7 +8,6 @@ #include "../graph_common.hpp" -#include #include int main() { diff --git a/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies_memset.cpp b/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies_memset.cpp index 1fc8264b018c..ad375fbac43d 100644 --- a/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies_memset.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies_memset.cpp @@ -8,7 +8,6 @@ #include "../graph_common.hpp" -#include #include int main() { diff --git a/sycl/test-e2e/HostInteropTask/host-task-dependency2.cpp b/sycl/test-e2e/HostInteropTask/host-task-dependency2.cpp index c6752de3923d..d1a7f5c419e3 100644 --- a/sycl/test-e2e/HostInteropTask/host-task-dependency2.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task-dependency2.cpp @@ -6,7 +6,6 @@ #include #include -#include using namespace sycl; using namespace sycl::access; diff --git a/sycl/test-e2e/HostInteropTask/host-task-dependency3.cpp b/sycl/test-e2e/HostInteropTask/host-task-dependency3.cpp index 3b4cdf1b53c3..16b308cfc544 100644 --- a/sycl/test-e2e/HostInteropTask/host-task-dependency3.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task-dependency3.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include using namespace sycl; diff --git a/sycl/test-e2e/HostInteropTask/host-task-dependency4.cpp b/sycl/test-e2e/HostInteropTask/host-task-dependency4.cpp index a991182ece57..bb315329195b 100644 --- a/sycl/test-e2e/HostInteropTask/host-task-dependency4.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task-dependency4.cpp @@ -3,7 +3,6 @@ // RUN: %{run} %t.out #include -#include sycl::event submit(sycl::queue &Q, sycl::buffer &B) { return Q.submit([&](sycl::handler &CGH) { diff --git a/sycl/test-e2e/HostInteropTask/host-task-failure.cpp b/sycl/test-e2e/HostInteropTask/host-task-failure.cpp index 7828362fab56..eef7cf70b21b 100644 --- a/sycl/test-e2e/HostInteropTask/host-task-failure.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task-failure.cpp @@ -5,7 +5,6 @@ // UNSUPPORTED: ze_debug && windows #include -#include using namespace sycl; using namespace sycl::access; diff --git a/sycl/test-e2e/HostInteropTask/host-task-two-queues.cpp b/sycl/test-e2e/HostInteropTask/host-task-two-queues.cpp index 741b3dea3dd3..7f384fbc0972 100644 --- a/sycl/test-e2e/HostInteropTask/host-task-two-queues.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task-two-queues.cpp @@ -6,7 +6,6 @@ #include #include -#include #include namespace S = sycl; diff --git a/sycl/test-e2e/HostInteropTask/host-task.cpp b/sycl/test-e2e/HostInteropTask/host-task.cpp index 8dda4a1daaeb..3d6423aa6e0f 100644 --- a/sycl/test-e2e/HostInteropTask/host-task.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include using namespace sycl; diff --git a/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp b/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp index fee893b9f752..3f73fcb420b2 100644 --- a/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp +++ b/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/HostInteropTask/interop-task-cuda.cpp b/sycl/test-e2e/HostInteropTask/interop-task-cuda.cpp index 503fa4a51067..ad4c973019fc 100644 --- a/sycl/test-e2e/HostInteropTask/interop-task-cuda.cpp +++ b/sycl/test-e2e/HostInteropTask/interop-task-cuda.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include diff --git a/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp b/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp index e7b34adf2ccb..05d4edf1d2d1 100644 --- a/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp +++ b/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #define __HIP_PLATFORM_AMD__ diff --git a/sycl/test-e2e/HostInteropTask/interop-task.cpp b/sycl/test-e2e/HostInteropTask/interop-task.cpp index aa6606c1cc2f..668affc96a7c 100644 --- a/sycl/test-e2e/HostInteropTask/interop-task.cpp +++ b/sycl/test-e2e/HostInteropTask/interop-task.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include using namespace sycl; using namespace sycl::access; diff --git a/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp b/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp index c1dbbed7b134..7a33cbf7f33c 100644 --- a/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp +++ b/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp @@ -11,7 +11,6 @@ // call to ext_oneapi_set_external_event. #include -#include #include #include diff --git a/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp b/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp index 997a8f582452..0ff035914946 100644 --- a/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp +++ b/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp @@ -9,7 +9,6 @@ #include #include -#include #include #include diff --git a/sycl/test-e2e/InorderQueue/in_order_usm_host_dependency.cpp b/sycl/test-e2e/InorderQueue/in_order_usm_host_dependency.cpp index 92ccbe5b019e..e8d07332a3c5 100644 --- a/sycl/test-e2e/InorderQueue/in_order_usm_host_dependency.cpp +++ b/sycl/test-e2e/InorderQueue/in_order_usm_host_dependency.cpp @@ -5,7 +5,6 @@ // queue in the case when usm commands and host tasks are interleaved. #include -#include #include #include diff --git a/sycl/test-e2e/KernelFusion/sync_host_task.cpp b/sycl/test-e2e/KernelFusion/sync_host_task.cpp index 40754d4e7a8d..640bd7264146 100644 --- a/sycl/test-e2e/KernelFusion/sync_host_task.cpp +++ b/sycl/test-e2e/KernelFusion/sync_host_task.cpp @@ -8,7 +8,6 @@ // complete_fusion. #include -#include #include #include diff --git a/sycl/test-e2e/Plugin/interop-level-zero-get-native-mem.cpp b/sycl/test-e2e/Plugin/interop-level-zero-get-native-mem.cpp index e6257353d3bd..1f9bfd5b5f23 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero-get-native-mem.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero-get-native-mem.cpp @@ -13,8 +13,8 @@ // SYCL #include "interop-level-zero-buffer-helpers.hpp" #include -#include #include +#include using namespace sycl; diff --git a/sycl/test-e2e/Plugin/interop-level-zero-image-get-native-mem.cpp b/sycl/test-e2e/Plugin/interop-level-zero-image-get-native-mem.cpp index ccf99824f324..cb4b317b629b 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero-image-get-native-mem.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero-image-get-native-mem.cpp @@ -24,9 +24,9 @@ #include #include #include -#include #include #include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/Plugin/interop-opencl.cpp b/sycl/test-e2e/Plugin/interop-opencl.cpp index faa5ae348191..137c2b8a7455 100644 --- a/sycl/test-e2e/Plugin/interop-opencl.cpp +++ b/sycl/test-e2e/Plugin/interop-opencl.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp b/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp index c2d262d106c5..22e742e0d6c2 100644 --- a/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp +++ b/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp @@ -21,7 +21,6 @@ // CHECK-NEXT: : 3 #include -#include int main(int argc, const char **argv) { diff --git a/sycl/test-e2e/USM/host_task.cpp b/sycl/test-e2e/USM/host_task.cpp index 6c30c07b049d..fc6551844e9e 100644 --- a/sycl/test-e2e/USM/host_task.cpp +++ b/sycl/test-e2e/USM/host_task.cpp @@ -9,7 +9,6 @@ // REQUIRES: aspect-usm_shared_allocations #include -#include #include int main() { diff --git a/sycl/test/abi/layout_handler.cpp b/sycl/test/abi/layout_handler.cpp index dc1e3f82edda..0a571110240f 100644 --- a/sycl/test/abi/layout_handler.cpp +++ b/sycl/test/abi/layout_handler.cpp @@ -14,7 +14,7 @@ void foo() { // The order of field declarations and their types are important. // CHECK: 0 | class sycl::handler -// CHECK-NEXT: 0 | class std::shared_ptr MImpl +// CHECK-NEXT: 0 | class std::shared_ptr impl // CHECK-NEXT: 0 | class std::__shared_ptr (base) // CHECK-NEXT: 0 | class std::__shared_ptr_access (base) (empty) // CHECK-NEXT: 0 | element_type * _M_ptr @@ -26,166 +26,58 @@ void foo() { // CHECK-NEXT: 16 | element_type * _M_ptr // CHECK-NEXT: 24 | class std::__shared_count<> _M_refcount // CHECK-NEXT: 24 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi -// CHECK-NEXT: 32 | struct sycl::detail::CG::StorageInitHelper CGData -// CHECK-NEXT: 32 | class std::vector > MArgsStorage -// CHECK-NEXT: 32 | struct std::_Vector_base, class std::allocator > > (base) -// CHECK-NEXT: 32 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl -// CHECK-NEXT: 32 | class std::allocator > (base) (empty) -// CHECK: 32 | pointer _M_start -// CHECK-NEXT: 40 | pointer _M_finish -// CHECK-NEXT: 48 | pointer _M_end_of_storage -// CHECK-NEXT: 56 | class std::vector > MAccStorage -// CHECK-NEXT: 56 | struct std::_Vector_base, class std::allocator > > (base) -// CHECK-NEXT: 56 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl -// CHECK-NEXT: 56 | class std::allocator > (base) (empty) -// CHECK: 56 | pointer _M_start -// CHECK-NEXT: 64 | pointer _M_finish -// CHECK-NEXT: 72 | pointer _M_end_of_storage -// CHECK-NEXT: 80 | class std::vector > MSharedPtrStorage -// CHECK-NEXT: 80 | struct std::_Vector_base, class std::allocator > > (base) -// CHECK-NEXT: 80 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl -// CHECK-NEXT: 80 | class std::allocator > (base) (empty) -// CHECK: 80 | pointer _M_start -// CHECK-NEXT: 88 | pointer _M_finish -// CHECK-NEXT: 96 | pointer _M_end_of_storage -// CHECK-NEXT: 104 | class std::vector MRequirements -// CHECK-NEXT: 104 | struct std::_Vector_base > (base) -// CHECK-NEXT: 104 | struct std::_Vector_base >::_Vector_impl _M_impl -// CHECK-NEXT: 104 | class std::allocator (base) (empty) -// CHECK: 104 | pointer _M_start -// CHECK-NEXT: 112 | pointer _M_finish -// CHECK-NEXT: 120 | pointer _M_end_of_storage -// CHECK-NEXT: 128 | class std::vector > MEvents -// CHECK-NEXT: 128 | struct std::_Vector_base, class std::allocator > > (base) -// CHECK-NEXT: 128 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl -// CHECK-NEXT: 128 | class std::allocator > (base) (empty) -// CHECK: 128 | pointer _M_start -// CHECK-NEXT: 136 | pointer _M_finish -// CHECK-NEXT: 144 | pointer _M_end_of_storage -// CHECK-NEXT: 152 | class std::vector > MLocalAccStorage -// CHECK-NEXT: 152 | struct std::_Vector_base, class std::allocator > > (base) -// CHECK-NEXT: 152 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl -// CHECK-NEXT: 152 | class std::allocator > (base) (empty) -// CHECK: 152 | pointer _M_start -// CHECK-NEXT: 160 | pointer _M_finish -// CHECK-NEXT: 168 | pointer _M_end_of_storage -// CHECK-NEXT: 176 | class std::vector > MStreamStorage -// CHECK-NEXT: 176 | struct std::_Vector_base, class std::allocator > > (base) -// CHECK-NEXT: 176 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl -// CHECK-NEXT: 176 | class std::allocator > (base) (empty) -// CHECK: 176 | pointer _M_start -// CHECK-NEXT: 184 | pointer _M_finish -// CHECK-NEXT: 192 | pointer _M_end_of_storage -// CHECK-NEXT: 200 | class std::vector MArgs -// CHECK-NEXT: 200 | struct std::_Vector_base > (base) -// CHECK-NEXT: 200 | struct std::_Vector_base >::_Vector_impl _M_impl -// CHECK-NEXT: 200 | class std::allocator (base) (empty) -// CHECK: 200 | pointer _M_start -// CHECK-NEXT: 208 | pointer _M_finish -// CHECK-NEXT: 216 | pointer _M_end_of_storage -// CHECK-NEXT: 224 | class std::vector MAssociatedAccesors -// CHECK-NEXT: 224 | struct std::_Vector_base > (base) -// CHECK-NEXT: 224 | struct std::_Vector_base >::_Vector_impl _M_impl -// CHECK-NEXT: 224 | class std::allocator (base) (empty) -// CHECK: 224 | pointer _M_start -// CHECK-NEXT: 232 | pointer _M_finish -// CHECK-NEXT: 240 | pointer _M_end_of_storage -// CHECK-NEXT: 248 | class sycl::detail::NDRDescT MNDRDesc -// CHECK-NEXT: 248 | class sycl::range<3> GlobalSize -// CHECK-NEXT: 248 | class sycl::detail::array<3> (base) -// CHECK-NEXT: 248 | size_t[3] common_array -// CHECK-NEXT: 272 | class sycl::range<3> LocalSize -// CHECK-NEXT: 272 | class sycl::detail::array<3> (base) -// CHECK-NEXT: 272 | size_t[3] common_array -// CHECK-NEXT: 296 | class sycl::id<3> GlobalOffset -// CHECK-NEXT: 296 | class sycl::detail::array<3> (base) -// CHECK-NEXT: 296 | size_t[3] common_array -// CHECK-NEXT: 320 | class sycl::range<3> NumWorkGroups -// CHECK-NEXT: 320 | class sycl::detail::array<3> (base) -// CHECK-NEXT: 320 | size_t[3] common_array -// CHECK-NEXT: 344 | class sycl::range<3> ClusterDimensions -// CHECK-NEXT: 344 | class sycl::detail::array<3> (base) -// CHECK-NEXT: 344 | size_t[3] common_array -// CHECK-NEXT: 368 | size_t Dims -// CHECK-NEXT: 376 | class sycl::detail::string MKernelName -// CHECK-NEXT: 376 | char * str -// CHECK-NEXT: 384 | class std::shared_ptr MKernel -// CHECK-NEXT: 384 | class std::__shared_ptr (base) -// CHECK-NEXT: 384 | class std::__shared_ptr_access (base) (empty) -// CHECK-NEXT: 384 | element_type * _M_ptr -// CHECK-NEXT: 392 | class std::__shared_count<> _M_refcount -// CHECK-NEXT: 392 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi -// CHECK-NEXT: 400 | detail::class CG::CGTYPE MCGType -// CHECK-NEXT: 408 | void * MSrcPtr -// CHECK-NEXT: 416 | void * MDstPtr -// CHECK-NEXT: 424 | size_t MLength -// CHECK-NEXT: 432 | class std::vector MPattern -// CHECK-NEXT: 432 | struct std::_Vector_base > (base) -// CHECK-NEXT: 432 | struct std::_Vector_base >::_Vector_impl _M_impl -// CHECK-NEXT: 432 | class std::allocator (base) (empty) -// CHECK: 432 | pointer _M_start -// CHECK-NEXT: 440 | pointer _M_finish -// CHECK-NEXT: 448 | pointer _M_end_of_storage -// CHECK-NEXT: 456 | class std::unique_ptr MHostKernel -// CHECK: 456 | class std::__uniq_ptr_impl > -// CHECK-NEXT: 456 | class std::tuple > _M_t -// CHECK-NEXT: 456 | struct std::_Tuple_impl<0, class sycl::detail::HostKernelBase *, struct std::default_delete > (base) -// CHECK-NEXT: 456 | struct std::_Tuple_impl<1, struct std::default_delete > (base) (empty) -// CHECK: 456 | struct std::_Head_base<0, class sycl::detail::HostKernelBase *> (base) -// CHECK-NEXT: 456 | class sycl::detail::HostKernelBase * _M_head_impl -// CHECK-NEXT: 464 | class std::unique_ptr MHostTask -// CHECK: 464 | class std::__uniq_ptr_impl > -// CHECK-NEXT: 464 | class std::tuple > _M_t -// CHECK-NEXT: 464 | struct std::_Tuple_impl<0, class sycl::detail::HostTask *, struct std::default_delete > (base) -// CHECK-NEXT: 464 | struct std::_Tuple_impl<1, struct std::default_delete > (base) (empty) -// CHECK: 464 | struct std::_Head_base<0, class sycl::detail::HostTask *> (base) -// CHECK-NEXT: 464 | class sycl::detail::HostTask * _M_head_impl -// CHECK-NEXT: 472 | class std::vector > MEventsWaitWithBarrier -// CHECK-NEXT: 472 | struct std::_Vector_base, class std::allocator > > (base) -// CHECK-NEXT: 472 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl -// CHECK-NEXT: 472 | class std::allocator > (base) (empty) -// CHECK: 472 | pointer _M_start -// CHECK-NEXT: 480 | pointer _M_finish -// CHECK-NEXT: 488 | pointer _M_end_of_storage -// CHECK-NEXT: 496 | class std::shared_ptr MGraph -// CHECK-NEXT: 496 | class std::__shared_ptr (base) -// CHECK-NEXT: 496 | class std::__shared_ptr_access (base) (empty) -// CHECK-NEXT: 496 | element_type * _M_ptr -// CHECK-NEXT: 504 | class std::__shared_count<> _M_refcount -// CHECK-NEXT: 504 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi -// CHECK-NEXT: 512 | class std::shared_ptr MExecGraph -// CHECK-NEXT: 512 | class std::__shared_ptr (base) -// CHECK-NEXT: 512 | class std::__shared_ptr_access (base) (empty) -// CHECK-NEXT: 512 | element_type * _M_ptr -// CHECK-NEXT: 520 | class std::__shared_count<> _M_refcount -// CHECK-NEXT: 520 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi -// CHECK-NEXT: 528 | class std::shared_ptr MSubgraphNode -// CHECK-NEXT: 528 | class std::__shared_ptr (base) -// CHECK-NEXT: 528 | class std::__shared_ptr_access (base) (empty) -// CHECK-NEXT: 528 | element_type * _M_ptr -// CHECK-NEXT: 536 | class std::__shared_count<> _M_refcount -// CHECK-NEXT: 536 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi -// CHECK-NEXT: 544 | class std::unique_ptr MGraphNodeCG -// CHECK: 544 | class std::__uniq_ptr_impl > -// CHECK-NEXT: 544 | class std::tuple > _M_t -// CHECK-NEXT: 544 | struct std::_Tuple_impl<0, class sycl::detail::CG *, struct std::default_delete > (base) -// CHECK-NEXT: 544 | struct std::_Tuple_impl<1, struct std::default_delete > (base) (empty) -// CHECK: 544 | struct std::_Head_base<0, class sycl::detail::CG *> (base) -// CHECK-NEXT: 544 | class sycl::detail::CG * _M_head_impl -// CHECK-NEXT: 552 | struct sycl::detail::code_location MCodeLoc -// CHECK-NEXT: 552 | const char * MFileName -// CHECK-NEXT: 560 | const char * MFunctionName -// CHECK-NEXT: 568 | unsigned long MLineNo -// CHECK-NEXT: 576 | unsigned long MColumnNo -// CHECK-NEXT: 584 | _Bool MIsFinalized -// CHECK-NEXT: 592 | class sycl::event MLastEvent -// CHECK-NEXT: 592 | class sycl::detail::OwnerLessBase (base) (empty) -// CHECK-NEXT: 592 | class std::shared_ptr impl -// CHECK-NEXT: 592 | class std::__shared_ptr (base) -// CHECK-NEXT: 592 | class std::__shared_ptr_access (base) (empty) -// CHECK-NEXT: 592 | element_type * _M_ptr -// CHECK-NEXT: 600 | class std::__shared_count<> _M_refcount -// CHECK-NEXT: 600 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi -// CHECK-NEXT: | [sizeof=608, dsize=608, align=8, -// CHECK-NEXT: | nvsize=608, nvalign=8] - +// CHECK-NEXT: 32 | class std::vector > MLocalAccStorage +// CHECK-NEXT: 32 | struct std::_Vector_base, class std::allocator > > (base) +// CHECK-NEXT: 32 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl +// CHECK-NEXT: 32 | class std::allocator > (base) (empty) +// CHECK: 32 | pointer _M_start +// CHECK-NEXT: 40 | pointer _M_finish +// CHECK-NEXT: 48 | pointer _M_end_of_storage +// CHECK-NEXT: 56 | class std::vector > MStreamStorage +// CHECK-NEXT: 56 | struct std::_Vector_base, class std::allocator > > (base) +// CHECK-NEXT: 56 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl +// CHECK-NEXT: 56 | class std::allocator > (base) (empty) +// CHECK: 56 | pointer _M_start +// CHECK-NEXT: 64 | pointer _M_finish +// CHECK-NEXT: 72 | pointer _M_end_of_storage +// CHECK-NEXT: 80 | class sycl::detail::string MKernelName +// CHECK-NEXT: 80 | char * str +// CHECK-NEXT: 88 | class std::shared_ptr MKernel +// CHECK-NEXT: 88 | class std::__shared_ptr (base) +// CHECK-NEXT: 88 | class std::__shared_ptr_access (base) (empty) +// CHECK-NEXT: 88 | element_type * _M_ptr +// CHECK-NEXT: 96 | class std::__shared_count<> _M_refcount +// CHECK-NEXT: 96 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi +// CHECK-NEXT: 104 | void * MSrcPtr +// CHECK-NEXT: 112 | void * MDstPtr +// CHECK-NEXT: 120 | size_t MLength +// CHECK-NEXT: 128 | class std::vector MPattern +// CHECK-NEXT: 128 | struct std::_Vector_base > (base) +// CHECK-NEXT: 128 | struct std::_Vector_base >::_Vector_impl _M_impl +// CHECK-NEXT: 128 | class std::allocator (base) (empty) +// CHECK: 128 | pointer _M_start +// CHECK-NEXT: 136 | pointer _M_finish +// CHECK-NEXT: 144 | pointer _M_end_of_storage +// CHECK-NEXT: 152 | class std::unique_ptr MHostKernel +// CHECK: 152 | class std::__uniq_ptr_impl > (base) +// CHECK-NEXT: 152 | class std::tuple > _M_t +// CHECK-NEXT: 152 | struct std::_Tuple_impl<0, class sycl::detail::HostKernelBase *, struct std::default_delete > (base) +// CHECK-NEXT: 152 | struct std::_Tuple_impl<1, struct std::default_delete > (base) (empty) +// CHECK: 152 | struct std::_Head_base<0, class sycl::detail::HostKernelBase *> (base) +// CHECK-NEXT: 152 | class sycl::detail::HostKernelBase * _M_head_impl +// CHECK-NEXT: 160 | struct sycl::detail::code_location MCodeLoc +// CHECK-NEXT: 160 | const char * MFileName +// CHECK-NEXT: 168 | const char * MFunctionName +// CHECK-NEXT: 176 | unsigned long MLineNo +// CHECK-NEXT: 184 | unsigned long MColumnNo +// CHECK-NEXT: 192 | _Bool MIsFinalized +// CHECK-NEXT: 200 | class sycl::event MLastEvent +// CHECK-NEXT: 200 | class sycl::detail::OwnerLessBase (base) (empty) +// CHECK-NEXT: 200 | class std::shared_ptr impl +// CHECK-NEXT: 200 | class std::__shared_ptr (base) +// CHECK-NEXT: 200 | class std::__shared_ptr_access (base) (empty) +// CHECK-NEXT: 200 | element_type * _M_ptr +// CHECK-NEXT: 208 | class std::__shared_count<> _M_refcount +// CHECK-NEXT: 208 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi +// CHECK-NEXT: | [sizeof=216, dsize=216, align=8, +// CHECK-NEXT: | nvsize=216, nvalign=8] diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 4b442721a108..d50aa035912c 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3482,8 +3482,12 @@ _ZN4sycl3_V17handler10depends_onERKSt6vectorINS0_5eventESaIS3_EE _ZN4sycl3_V17handler10depends_onERKSt6vectorISt10shared_ptrINS0_6detail10event_implEESaIS6_EE _ZN4sycl3_V17handler10mem_adviseEPKvmi _ZN4sycl3_V17handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmbb +_ZN4sycl3_V17handler11SetHostTaskEOSt8functionIFvNS0_14interop_handleEEE +_ZN4sycl3_V17handler11SetHostTaskEOSt8functionIFvvEE +_ZN4sycl3_V17handler11storeRawArgEPKvm _ZN4sycl3_V17handler12addReductionERKSt10shared_ptrIKvE _ZN4sycl3_V17handler13getKernelNameEv +_ZN4sycl3_V17handler14addAccessorReqESt10shared_ptrINS0_6detail16AccessorImplHostEE _ZN4sycl3_V17handler14setNDRangeUsedEb _ZN4sycl3_V17handler15ext_oneapi_copyENS0_3ext6oneapi12experimental16image_mem_handleENS0_5rangeILi3EEERKNS4_16image_descriptorEPvS7_S7_S7_ _ZN4sycl3_V17handler15ext_oneapi_copyENS0_3ext6oneapi12experimental16image_mem_handleEPvRKNS4_16image_descriptorE @@ -3521,12 +3525,17 @@ _ZN4sycl3_V17handler24ext_oneapi_memcpy2d_implEPvmPKvmmm _ZN4sycl3_V17handler24ext_oneapi_memset2d_implEPvmimm _ZN4sycl3_V17handler24registerDynamicParameterERNS0_3ext6oneapi12experimental6detail22dynamic_parameter_baseEi _ZN4sycl3_V17handler25ext_intel_write_host_pipeENS0_6detail11string_viewEPvmb +_ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEEbi +_ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEENS0_2idILi3EEEi +_ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEES3_NS0_2idILi3EEEi _ZN4sycl3_V17handler26associateWithHandlerCommonESt10shared_ptrINS0_6detail16AccessorImplHostEEi -_ZN4sycl3_V17handler26setKernelUsesClusterLaunchEv +_ZN4sycl3_V17handler27addLifetimeSharedPtrStorageESt10shared_ptrIKvE +_ZN4sycl3_V17handler22setKernelClusterLaunchENS0_5rangeILi3EEEi _ZN4sycl3_V17handler27computeFallbackKernelBoundsEmm _ZN4sycl3_V17handler28extractArgsAndReqsFromLambdaEPcmPKNS0_6detail19kernel_param_desc_tEb _ZN4sycl3_V17handler28memcpyToHostOnlyDeviceGlobalEPKvS3_mbmm _ZN4sycl3_V17handler28setStateExplicitKernelBundleEv +_ZN4sycl3_V17handler28setArgsToAssociatedAccessorsEv _ZN4sycl3_V17handler30memcpyFromHostOnlyDeviceGlobalEPvPKvbmm _ZN4sycl3_V17handler30verifyUsedKernelBundleInternalENS0_6detail11string_viewE _ZN4sycl3_V17handler32verifyDeviceHasProgressGuaranteeENS0_3ext6oneapi12experimental26forward_progress_guaranteeENS4_15execution_scopeES6_ @@ -3534,10 +3543,13 @@ _ZN4sycl3_V17handler34ext_oneapi_wait_external_semaphoreENS0_3ext6oneapi12experi _ZN4sycl3_V17handler34ext_oneapi_wait_external_semaphoreENS0_3ext6oneapi12experimental24interop_semaphore_handleEm _ZN4sycl3_V17handler36ext_oneapi_signal_external_semaphoreENS0_3ext6oneapi12experimental24interop_semaphore_handleE _ZN4sycl3_V17handler36ext_oneapi_signal_external_semaphoreENS0_3ext6oneapi12experimental24interop_semaphore_handleEm +_ZN4sycl3_V17handler6addArgENS0_6detail19kernel_param_kind_tEPvii _ZN4sycl3_V17handler6memcpyEPvPKvm _ZN4sycl3_V17handler6memsetEPvim +_ZN4sycl3_V17handler7setTypeENS0_6detail6CGTypeE _ZN4sycl3_V17handler8finalizeEv _ZN4sycl3_V17handler8prefetchEPKvm +_ZN4sycl3_V17handler9clearArgsEv _ZN4sycl3_V17handler9fill_implEPvPKvmm _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b @@ -3960,8 +3972,10 @@ _ZNK4sycl3_V17handler11eventNeededEv _ZNK4sycl3_V17handler15getCommandGraphEv _ZNK4sycl3_V17handler16getDeviceBackendEv _ZNK4sycl3_V17handler17getContextImplPtrEv +_ZNK4sycl3_V17handler21HasAssociatedAccessorEPNS0_6detail16AccessorImplHostENS0_6access6targetE _ZNK4sycl3_V17handler27isStateExplicitKernelBundleEv _ZNK4sycl3_V17handler30getOrInsertHandlerKernelBundleEb +_ZNK4sycl3_V17handler7getTypeEv _ZNK4sycl3_V17sampler11getPropListEv _ZNK4sycl3_V17sampler18get_filtering_modeEv _ZNK4sycl3_V17sampler19get_addressing_modeEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index d79c06baf7d8..b7c33e3d5943 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -640,6 +640,7 @@ ??_Fqueue@_V1@sycl@@QEAAXXZ ?AccessTargetMask@handler@_V1@sycl@@0HB ?Clear@exception_list@_V1@sycl@@AEAAXXZ +?clearArgs@handler@_V1@sycl@@AEAAXXZ ?DirSep@OSUtil@detail@_V1@sycl@@2QEBDEB ?DisableRangeRounding@handler@_V1@sycl@@AEAA_NXZ ?GDBMethodsAnchor@SampledImageAccessorBaseHost@detail@_V1@sycl@@IEAAXXZ @@ -3695,6 +3696,9 @@ ?addOrReplaceAccessorProperties@buffer_plain@detail@_V1@sycl@@IEAAXAEBVproperty_list@34@@Z ?addReduction@handler@_V1@sycl@@AEAAXAEBV?$shared_ptr@$$CBX@std@@@Z ?addStream@handler@_V1@sycl@@AEAAXAEBV?$shared_ptr@Vstream_impl@detail@_V1@sycl@@@std@@@Z +?addArg@handler@_V1@sycl@@AEAAXW4kernel_param_kind_t@detail@23@PEAXHH@Z +?addLifetimeSharedPtrStorage@handler@_V1@sycl@@AEAAXV?$shared_ptr@$$CBX@std@@@Z +?addAccessorReq@handler@_V1@sycl@@AEAAXV?$shared_ptr@VAccessorImplHost@detail@_V1@sycl@@@std@@@Z ?alignedAlloc@OSUtil@detail@_V1@sycl@@SAPEAX_K0@Z ?alignedFree@OSUtil@detail@_V1@sycl@@SAXPEAX@Z ?aligned_alloc@_V1@sycl@@YAPEAX_K0AEBVdevice@12@AEBVcontext@12@W4alloc@usm@12@AEBUcode_location@detail@12@@Z @@ -3983,7 +3987,7 @@ ?getSize@buffer_plain@detail@_V1@sycl@@IEBA_KXZ ?getSlicePitch@image_plain@detail@_V1@sycl@@IEBA_KXZ ?getStartTime@HostProfilingInfo@detail@_V1@sycl@@QEBA_KXZ -?getType@handler@_V1@sycl@@AEAA?AW4CGTYPE@CG@detail@23@XZ +?getType@handler@_V1@sycl@@AEBA?AW4CGType@detail@23@XZ ?getValueFromDynamicParameter@detail@_V1@sycl@@YAPEAXAEAVdynamic_parameter_base@1experimental@oneapi@ext@23@@Z ?get_access_mode@experimental@oneapi@ext@_V1@sycl@@YA?AW4address_access_mode@12345@PEBX_KAEBVcontext@45@@Z ?get_addressing_mode@sampler@_V1@sycl@@QEBA?AW4addressing_mode@23@XZ @@ -4069,6 +4073,7 @@ ?get_width@stream@_V1@sycl@@QEBA_KXZ ?get_work_item_buffer_size@stream@_V1@sycl@@QEBA_KXZ ?gpu_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z +?HasAssociatedAccessor@handler@_V1@sycl@@AEBA_NPEAVAccessorImplHost@detail@23@W4target@access@23@@Z ?handleRelease@buffer_plain@detail@_V1@sycl@@IEBAXXZ ?has@device@_V1@sycl@@QEBA_NW4aspect@23@@Z ?has@platform@_V1@sycl@@QEBA_NW4aspect@23@@Z @@ -4224,13 +4229,19 @@ ?setHandlerKernelBundle@handler@_V1@sycl@@AEAAXVkernel@23@@Z ?setKernelCacheConfig@handler@_V1@sycl@@AEAAXW4_pi_kernel_cache_config@@@Z ?setKernelIsCooperative@handler@_V1@sycl@@AEAAX_N@Z -?setKernelUsesClusterLaunch@handler@_V1@sycl@@AEAAXXZ ?setLocalAccessorArgHelper@handler@_V1@sycl@@AEAAXHAEAVLocalAccessorBaseHost@detail@23@@Z ?setNDRangeUsed@handler@_V1@sycl@@AEAAX_N@Z ?setStateExplicitKernelBundle@handler@_V1@sycl@@AEAAXXZ ?setStateSpecConstSet@handler@_V1@sycl@@AEAAXXZ -?setType@handler@_V1@sycl@@AEAAXW4CGTYPE@CG@detail@23@@Z ?setUserFacingNodeType@handler@_V1@sycl@@AEAAXW4node_type@experimental@oneapi@ext@23@@Z +?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@H@Z +?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$02@23@H@Z +?setArgsToAssociatedAccessors@handler@_V1@sycl@@AEAAXXZ +?setType@handler@_V1@sycl@@AEAAXW4CGType@detail@23@@Z +?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@_NH@Z +?SetHostTask@handler@_V1@sycl@@AEAAX$$QEAV?$function@$$A6AXVinterop_handle@_V1@sycl@@@Z@std@@@Z +?SetHostTask@handler@_V1@sycl@@AEAAX$$QEAV?$function@$$A6AXXZ@std@@@Z +?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@H@Z ?set_access_mode@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KW4address_access_mode@12345@AEBVcontext@45@@Z ?set_arg@handler@_V1@sycl@@QEAAXH$$QEAVraw_kernel_arg@experimental@oneapi@ext@23@@Z ?set_final_data_internal@buffer_plain@detail@_V1@sycl@@IEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z @@ -4254,6 +4265,7 @@ ?start@HostProfilingInfo@detail@_V1@sycl@@QEAAXXZ ?start_fusion@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEAAXXZ ?storeRawArg@handler@_V1@sycl@@AEAAPEAXAEBVraw_kernel_arg@experimental@oneapi@ext@23@@Z +?storeRawArg@handler@_V1@sycl@@AEAAPEAXPEBX_K@Z ?stringifyErrorCode@detail@_V1@sycl@@YAPEBDH@Z ?submit_impl@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@@Z ?submit_impl@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@@Z diff --git a/sycl/test/abi/symbol_size_alignment.cpp b/sycl/test/abi/symbol_size_alignment.cpp index db310b629192..03ab7022045c 100644 --- a/sycl/test/abi/symbol_size_alignment.cpp +++ b/sycl/test/abi/symbol_size_alignment.cpp @@ -52,9 +52,9 @@ int main() { check(); check(); #ifdef _MSC_VER - check(); + check(); #else - check(); + check(); #endif check, 16, 8>(); check(); diff --git a/sycl/test/abi/vtable.cpp b/sycl/test/abi/vtable.cpp index 8f1766d0960d..f1ff7e02f190 100644 --- a/sycl/test/abi/vtable.cpp +++ b/sycl/test/abi/vtable.cpp @@ -8,30 +8,6 @@ // Changing vtable breaks ABI. If this test fails, please, refer to ABI Policy // Guide for further instructions. -void foo(sycl::detail::HostKernelBase &HKB) { - sycl::detail::NDRDescT Desc; - sycl::detail::HostProfilingInfo HPInfo; - HKB.call(Desc, &HPInfo); -} - -// CHECK: Vtable for 'sycl::detail::HostKernelBase' (6 entries). -// CHECK-NEXT: 0 | offset_to_top (0) -// CHECK-NEXT: 1 | sycl::detail::HostKernelBase RTTI -// CHECK-NEXT: -- (sycl::detail::HostKernelBase, 0) vtable address -- -// CHECK-NEXT: 2 | void sycl::detail::HostKernelBase::call(const NDRDescT &, HostProfilingInfo *) [pure] -// CHECK-NEXT: 3 | char *sycl::detail::HostKernelBase::getPtr() [pure] -// CHECK-NEXT: 4 | sycl::detail::HostKernelBase::~HostKernelBase() [complete] -// CHECK-NEXT: 5 | sycl::detail::HostKernelBase::~HostKernelBase() [deleting] - -void foo(sycl::detail::CG *CG) { delete CG; } -// CHECK: Vtable for 'sycl::detail::CG' (6 entries). -// CHECK-NEXT: 0 | offset_to_top (0) -// CHECK-NEXT: 1 | sycl::detail::CG RTTI -// CHECK-NEXT: -- (sycl::detail::CG, 0) vtable address -- -// CHECK-NEXT: 2 | std::vector> sycl::detail::CG::getAuxiliaryResources() const -// CHECK-NEXT: 3 | void sycl::detail::CG::clearAuxiliaryResources() -// CHECK-NEXT: 4 | sycl::detail::CG::~CG() [complete] - void foo(sycl::detail::PropertyWithDataBase *Prop) { delete Prop; } // CHECK: Vtable for 'sycl::detail::PropertyWithDataBase' (4 entries). // CHECK-NEXT: 0 | offset_to_top (0) diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index 027bf700f548..711073ad7ff5 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -145,9 +145,6 @@ // CHECK-NEXT: ext/oneapi/properties/properties.hpp // CHECK-NEXT: ext/oneapi/experimental/graph.hpp // CHECK-NEXT: handler.hpp -// CHECK-NEXT: detail/cg.hpp -// CHECK-NEXT: kernel.hpp -// CHECK-NEXT: kernel_bundle.hpp // CHECK-NEXT: detail/reduction_forward.hpp // CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp @@ -158,5 +155,7 @@ // CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp // CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp +// CHECK-NEXT: kernel.hpp +// CHECK-NEXT: kernel_bundle.hpp // CHECK-NEXT: feature_test.hpp // CHECK-EMPTY: diff --git a/sycl/unittests/Extensions/CommandGraph/Barrier.cpp b/sycl/unittests/Extensions/CommandGraph/Barrier.cpp index 790956d5aeff..4d3d8532efb4 100644 --- a/sycl/unittests/Extensions/CommandGraph/Barrier.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Barrier.cpp @@ -43,7 +43,7 @@ TEST_F(CommandGraphTest, EnqueueBarrier) { auto Node = Root.lock(); ASSERT_EQ(Node->MSuccessors.size(), 1lu); auto BarrierNode = Node->MSuccessors.front().lock(); - ASSERT_EQ(BarrierNode->MCGType, sycl::detail::CG::Barrier); + ASSERT_EQ(BarrierNode->MCGType, sycl::detail::CGType::Barrier); ASSERT_EQ(GraphImpl->getEventForNode(BarrierNode), sycl::detail::getSyclObjImpl(Barrier)); ASSERT_EQ(BarrierNode->MPredecessors.size(), 3lu); @@ -151,7 +151,7 @@ TEST_F(CommandGraphTest, EnqueueBarrierWaitList) { auto Node = Root.lock(); ASSERT_EQ(Node->MSuccessors.size(), 1lu); auto SuccNode = Node->MSuccessors.front().lock(); - if (SuccNode->MCGType == sycl::detail::CG::Barrier) { + if (SuccNode->MCGType == sycl::detail::CGType::Barrier) { ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), sycl::detail::getSyclObjImpl(Barrier)); ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); @@ -208,7 +208,7 @@ TEST_F(CommandGraphTest, EnqueueBarrierWaitListMultipleQueues) { auto Node = Root.lock(); ASSERT_EQ(Node->MSuccessors.size(), 1lu); auto SuccNode = Node->MSuccessors.front().lock(); - if (SuccNode->MCGType == sycl::detail::CG::Barrier) { + if (SuccNode->MCGType == sycl::detail::CGType::Barrier) { ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), sycl::detail::getSyclObjImpl(Barrier)); ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); @@ -271,14 +271,14 @@ TEST_F(CommandGraphTest, EnqueueMultipleBarrier) { auto Node = Root.lock(); ASSERT_EQ(Node->MSuccessors.size(), 1lu); auto SuccNode = Node->MSuccessors.front().lock(); - if (SuccNode->MCGType == sycl::detail::CG::Barrier) { + if (SuccNode->MCGType == sycl::detail::CGType::Barrier) { ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), sycl::detail::getSyclObjImpl(Barrier1)); ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); ASSERT_EQ(SuccNode->MSuccessors.size(), 3lu); for (auto Succ1 : SuccNode->MSuccessors) { auto SuccBarrier1 = Succ1.lock(); - if (SuccBarrier1->MCGType == sycl::detail::CG::Barrier) { + if (SuccBarrier1->MCGType == sycl::detail::CGType::Barrier) { ASSERT_EQ(GraphImpl->getEventForNode(SuccBarrier1), sycl::detail::getSyclObjImpl(Barrier2)); ASSERT_EQ(SuccBarrier1->MPredecessors.size(), 3lu); @@ -338,7 +338,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithPreviousCommand) { for (auto Root : GraphImpl->MRoots) { auto RootNode = Root.lock(); ASSERT_EQ(RootNode->MSuccessors.size(), 0lu); - ASSERT_TRUE(RootNode->MCGType == sycl::detail::CG::Barrier); + ASSERT_TRUE(RootNode->MCGType == sycl::detail::CGType::Barrier); } } @@ -378,7 +378,7 @@ TEST_F(CommandGraphTest, InOrderQueuesWithBarrier) { ASSERT_EQ(RootNode->MSuccessors.size(), 1lu); auto SuccNode = RootNode->MSuccessors.front().lock(); - ASSERT_TRUE(SuccNode->MCGType == sycl::detail::CG::Barrier); + ASSERT_TRUE(SuccNode->MCGType == sycl::detail::CGType::Barrier); ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); diff --git a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp index 20b95f99a2d1..655bbf0d0587 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp @@ -167,7 +167,7 @@ TEST_F(CommandGraphTest, GetCGCopy) { auto Node2Imp = sycl::detail::getSyclObjImpl(Node2); auto Node2CGCopy = Node2Imp->getCGCopy(); ASSERT_EQ(Node2CGCopy->getType(), Node2Imp->MCGType); - ASSERT_EQ(Node2CGCopy->getType(), sycl::detail::CG::Kernel); + ASSERT_EQ(Node2CGCopy->getType(), sycl::detail::CGType::Kernel); ASSERT_EQ(Node2CGCopy->getType(), Node2Imp->MCommandGroup->getType()); ASSERT_EQ(Node2CGCopy->getAccStorage(), Node2Imp->MCommandGroup->getAccStorage()); diff --git a/sycl/unittests/event/EventDestruction.cpp b/sycl/unittests/event/EventDestruction.cpp index a9381af5dcda..20427b7623cf 100644 --- a/sycl/unittests/event/EventDestruction.cpp +++ b/sycl/unittests/event/EventDestruction.cpp @@ -8,7 +8,6 @@ #include #include -#include #include diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index c2d53e3d8e6d..ae5bca9c4a8b 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -110,6 +110,8 @@ inline pi_result redefinedProgramCreateEAM(pi_context, const void *, size_t, class MockHandler : public sycl::handler { public: + using sycl::handler::impl; + MockHandler(std::shared_ptr Queue) : sycl::handler(Queue, /*CallerNeedsEvent*/ true) {} @@ -117,14 +119,14 @@ class MockHandler : public sycl::handler { auto CGH = static_cast(this); std::unique_ptr CommandGroup; switch (getType()) { - case sycl::detail::CG::Kernel: { + case sycl::detail::CGType::Kernel: { CommandGroup.reset(new sycl::detail::CGExecKernel( - std::move(CGH->MNDRDesc), std::move(CGH->MHostKernel), - std::move(CGH->MKernel), std::move(MImpl->MKernelBundle), - std::move(CGH->CGData), std::move(CGH->MArgs), + std::move(impl->MNDRDesc), std::move(CGH->MHostKernel), + std::move(CGH->MKernel), std::move(impl->MKernelBundle), + std::move(impl->CGData), std::move(impl->MArgs), CGH->MKernelName.c_str(), std::move(CGH->MStreamStorage), - std::move(MImpl->MAuxiliaryResources), CGH->MCGType, {}, - MImpl->MKernelIsCooperative, MImpl->MKernelUsesClusterLaunch, + std::move(impl->MAuxiliaryResources), impl->MCGType, {}, + impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, CGH->MCodeLoc)); break; } @@ -135,6 +137,7 @@ class MockHandler : public sycl::handler { return CommandGroup; } + }; const sycl::detail::KernelArgMask *getKernelArgMaskFromBundle( diff --git a/sycl/unittests/scheduler/Commands.cpp b/sycl/unittests/scheduler/Commands.cpp index bd0df10d1309..2e686a94f67b 100644 --- a/sycl/unittests/scheduler/Commands.cpp +++ b/sycl/unittests/scheduler/Commands.cpp @@ -80,7 +80,7 @@ TEST_F(SchedulerTest, WaitEmptyEventWithBarrier) { for (auto &Arg : InputEventWaitLists) { std::unique_ptr CommandGroup(new detail::CGBarrier( std::move(Arg), detail::CG::StorageInitHelper({}, {}, {}, {}, {}), - detail::CG::CGTYPE::BarrierWaitlist, {})); + detail::CGType::BarrierWaitlist, {})); MS.Scheduler::addCG(std::move(CommandGroup), QueueImpl, /*EventNeeded=*/true); } diff --git a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp index 3f97ffb003ad..66c40572440d 100644 --- a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp +++ b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp @@ -9,6 +9,7 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" #include +#include #include #include #include @@ -31,7 +32,7 @@ class MockQueueImpl : public sycl::detail::queue_impl { // Define type with the only methods called by finalizeHandler class LimitedHandler { public: - LimitedHandler(sycl::detail::CG::CGTYPE CGType, + LimitedHandler(sycl::detail::CGType CGType, std::shared_ptr Queue) : MCGType(CGType), MQueue(Queue) {} @@ -46,17 +47,18 @@ class LimitedHandler { return sycl::detail::createSyclObjFromImpl(NewEvent); } - sycl::detail::CG::CGTYPE getType() { return MCGType; } + sycl::detail::CGType getType() { return MCGType; } - sycl::detail::CG::CGTYPE MCGType; + sycl::detail::CGType MCGType; std::shared_ptr MQueue; + std::shared_ptr impl; }; // Needed to use EXPECT_CALL to verify depends_on that originally appends lst // event as dependency to the new CG class LimitedHandlerSimulation : public LimitedHandler { public: - LimitedHandlerSimulation(sycl::detail::CG::CGTYPE CGType, + LimitedHandlerSimulation(sycl::detail::CGType CGType, std::shared_ptr Queue) : LimitedHandler(CGType, Queue) {} @@ -80,14 +82,14 @@ TEST_F(SchedulerTest, InOrderQueueSyncCheck) { // previous task, this is needed to properly sync blocking & blocked tasks. sycl::event Event; { - LimitedHandlerSimulation MockCGH{detail::CG::CGTYPE::CodeplayHostTask, + LimitedHandlerSimulation MockCGH{detail::CGType::CodeplayHostTask, Queue}; EXPECT_CALL(MockCGH, depends_on(An())) .Times(0); Queue->finalizeHandler(MockCGH, Event); } { - LimitedHandlerSimulation MockCGH{detail::CG::CGTYPE::CodeplayHostTask, + LimitedHandlerSimulation MockCGH{detail::CGType::CodeplayHostTask, Queue}; EXPECT_CALL(MockCGH, depends_on(An())) .Times(1); diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index b98aff0e040d..cc4bfe123e21 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -230,12 +230,12 @@ class MockHandler : public sycl::handler { : sycl::handler(Queue, CallerNeedsEvent) {} // Methods using sycl::handler::addReduction; + using sycl::handler::impl; using sycl::handler::getType; - using sycl::handler::MImpl; + using sycl::handler::setNDRangeDescriptor; - sycl::detail::NDRDescT &getNDRDesc() { return MNDRDesc; } + sycl::detail::NDRDescT &getNDRDesc() { return impl->MNDRDesc; } sycl::detail::code_location &getCodeLoc() { return MCodeLoc; } - sycl::detail::CG::CGTYPE &getCGType() { return MCGType; } std::vector> &getStreamStorage() { return MStreamStorage; } @@ -243,28 +243,30 @@ class MockHandler : public sycl::handler { return MHostKernel; } std::vector> &getArgsStorage() { - return CGData.MArgsStorage; + return impl->CGData.MArgsStorage; } std::vector &getAccStorage() { - return CGData.MAccStorage; + return impl->CGData.MAccStorage; } std::vector> &getSharedPtrStorage() { - return CGData.MSharedPtrStorage; + return impl->CGData.MSharedPtrStorage; } std::vector &getRequirements() { - return CGData.MRequirements; + return impl->CGData.MRequirements; } std::vector &getEvents() { - return CGData.MEvents; + return impl->CGData.MEvents; } - std::vector &getArgs() { return MArgs; } + std::vector &getArgs() { return impl->MArgs; } std::string getKernelName() { return MKernelName.c_str(); } std::shared_ptr &getKernel() { return MKernel; } - std::unique_ptr &getHostTask() { return MHostTask; } + std::shared_ptr &getHostTask() { + return impl->MHostTask; + } std::shared_ptr &getQueue() { return MQueue; } - void setType(sycl::detail::CG::CGTYPE Type) { - static_cast(this)->MCGType = Type; + void setType(sycl::detail::CGType Type) { + impl->MCGType = Type; } template void setNDRangeDesc(sycl::nd_range Range) { - static_cast(this)->MNDRDesc.set(std::move(Range)); + setNDRangeDescriptor(std::move(Range)); } void addStream(const sycl::detail::StreamImplPtr &Stream) { @@ -302,19 +304,19 @@ class MockHandlerCustomFinalize : public MockHandler { getArgsStorage(), getAccStorage(), getSharedPtrStorage(), getRequirements(), getEvents()); switch (getType()) { - case sycl::detail::CG::Kernel: { + case sycl::detail::CGType::Kernel: { CommandGroup.reset(new sycl::detail::CGExecKernel( getNDRDesc(), std::move(getHostKernel()), getKernel(), - std::move(MImpl->MKernelBundle), std::move(CGData), getArgs(), - getKernelName(), getStreamStorage(), MImpl->MAuxiliaryResources, - getCGType(), {}, MImpl->MKernelIsCooperative, - MImpl->MKernelUsesClusterLaunch, getCodeLoc())); + std::move(impl->MKernelBundle), std::move(CGData), getArgs(), + getKernelName(), getStreamStorage(), impl->MAuxiliaryResources, + getType(), {}, impl->MKernelIsCooperative, + impl->MKernelUsesClusterLaunch, getCodeLoc())); break; } - case sycl::detail::CG::CodeplayHostTask: { + case sycl::detail::CGType::CodeplayHostTask: { CommandGroup.reset(new sycl::detail::CGHostTask( std::move(getHostTask()), getQueue(), getQueue()->getContextImplPtr(), - getArgs(), std::move(CGData), getCGType(), getCodeLoc())); + getArgs(), std::move(CGData), getType(), getCodeLoc())); break; } default: diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index c8aac819e1c3..a45a1cb6842c 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -27,16 +27,16 @@ class MockHandlerStreamInit : public MockHandler { std::unique_ptr finalize() { std::unique_ptr CommandGroup; switch (getType()) { - case detail::CG::Kernel: { + case detail::CGType::Kernel: { CommandGroup.reset(new detail::CGExecKernel( getNDRDesc(), std::move(getHostKernel()), getKernel(), - std::move(MImpl->MKernelBundle), + std::move(impl->MKernelBundle), detail::CG::StorageInitHelper(getArgsStorage(), getAccStorage(), getSharedPtrStorage(), getRequirements(), getEvents()), getArgs(), getKernelName(), getStreamStorage(), - std::move(MImpl->MAuxiliaryResources), getCGType(), {}, - MImpl->MKernelIsCooperative, MImpl->MKernelUsesClusterLaunch, + std::move(impl->MAuxiliaryResources), getType(), {}, + impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, getCodeLoc())); break; }