diff --git a/sycl/include/sycl/detail/cg_types.hpp b/sycl/include/sycl/detail/cg_types.hpp index 2a59324a3b0c..89049c7dbf68 100644 --- a/sycl/include/sycl/detail/cg_types.hpp +++ b/sycl/include/sycl/detail/cg_types.hpp @@ -37,117 +37,34 @@ class handler; namespace detail { class HostTask; -// The structure represents kernel argument. -class ArgDesc { -public: - ArgDesc(sycl::detail::kernel_param_kind_t Type, void *Ptr, int Size, - int Index) - : MType(Type), MPtr(Ptr), MSize(Size), MIndex(Index) {} - - sycl::detail::kernel_param_kind_t MType; - void *MPtr; - int MSize; - int MIndex; -}; - -// The structure represents NDRange - global, local sizes, global offset, -// number of dimensions, and the cluster dimensions if applicable. -class NDRDescT { - // The method initializes all sizes for dimensions greater than the passed one - // to the default values, so they will not affect execution. - void setNDRangeLeftover(int Dims_) { - for (int I = Dims_; I < 3; ++I) { - GlobalSize[I] = 1; - LocalSize[I] = LocalSize[0] ? 1 : 0; - GlobalOffset[I] = 0; - NumWorkGroups[I] = 0; - } - } - -public: - NDRDescT() - : GlobalSize{0, 0, 0}, LocalSize{0, 0, 0}, NumWorkGroups{0, 0, 0}, - Dims{0} {} - - template void set(sycl::range NumWorkItems) { - for (int I = 0; I < Dims_; ++I) { - GlobalSize[I] = NumWorkItems[I]; - LocalSize[I] = 0; - GlobalOffset[I] = 0; - NumWorkGroups[I] = 0; - } - setNDRangeLeftover(Dims_); - Dims = Dims_; - } - - // Initializes this ND range descriptor with given range of work items and - // offset. - template - void set(sycl::range NumWorkItems, sycl::id Offset) { - for (int I = 0; I < Dims_; ++I) { - GlobalSize[I] = NumWorkItems[I]; - LocalSize[I] = 0; - GlobalOffset[I] = Offset[I]; - NumWorkGroups[I] = 0; - } - setNDRangeLeftover(Dims_); - Dims = Dims_; - } - - template void set(sycl::nd_range ExecutionRange) { - for (int I = 0; I < Dims_; ++I) { - GlobalSize[I] = ExecutionRange.get_global_range()[I]; - LocalSize[I] = ExecutionRange.get_local_range()[I]; - GlobalOffset[I] = ExecutionRange.get_offset()[I]; - NumWorkGroups[I] = 0; - } - setNDRangeLeftover(Dims_); - Dims = Dims_; - } - - void set(int Dims_, sycl::nd_range<3> ExecutionRange) { - for (int I = 0; I < Dims_; ++I) { - GlobalSize[I] = ExecutionRange.get_global_range()[I]; - LocalSize[I] = ExecutionRange.get_local_range()[I]; - GlobalOffset[I] = ExecutionRange.get_offset()[I]; - NumWorkGroups[I] = 0; - } - setNDRangeLeftover(Dims_); - Dims = Dims_; - } - - template void setNumWorkGroups(sycl::range N) { - for (int I = 0; I < Dims_; ++I) { - GlobalSize[I] = 0; - // '0' is a mark to adjust before kernel launch when there is enough info: - LocalSize[I] = 0; - GlobalOffset[I] = 0; - NumWorkGroups[I] = N[I]; - } - setNDRangeLeftover(Dims_); - Dims = Dims_; - } - - template void setClusterDimensions(sycl::range N) { - if (Dims_ != Dims) { - throw std::runtime_error( - "Dimensionality of cluster, global and local ranges must be same"); - } - - for (int I = 0; I < Dims_; ++I) { - ClusterDimensions[I] = N[I]; - } - } - - sycl::range<3> GlobalSize; - sycl::range<3> LocalSize; - sycl::id<3> GlobalOffset; - /// Number of workgroups, used to record the number of workgroups from the - /// simplest form of parallel_for_work_group. If set, all other fields must be - /// zero - sycl::range<3> NumWorkGroups; - sycl::range<3> ClusterDimensions{1, 1, 1}; - size_t Dims; +/// Type of the command group. +/// NOTE: Changing the values of any of these enumerators is an API-break. +enum class CGType : unsigned int { + None = 0, + Kernel = 1, + CopyAccToPtr = 2, + CopyPtrToAcc = 3, + CopyAccToAcc = 4, + Barrier = 5, + BarrierWaitlist = 6, + Fill = 7, + UpdateHost = 8, + CopyUSM = 10, + FillUSM = 11, + PrefetchUSM = 12, + CodeplayHostTask = 14, + AdviseUSM = 15, + Copy2DUSM = 16, + Fill2DUSM = 17, + Memset2DUSM = 18, + CopyToDeviceGlobal = 19, + CopyFromDeviceGlobal = 20, + ReadWriteHostPipe = 21, + ExecCommandBuffer = 22, + CopyImage = 23, + SemaphoreWait = 24, + SemaphoreSignal = 25, + ProfilingTag = 26, }; template struct check_fn_signature { @@ -236,8 +153,6 @@ runKernelWithArg(KernelType KernelName, ArgType Arg) { // The pure virtual class aimed to store lambda/functors of any type. class HostKernelBase { public: - // The method executes lambda stored using NDRange passed. - virtual void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) = 0; // Return pointer to the lambda object. // Used to extract captured variables. virtual char *getPtr() = 0; @@ -255,177 +170,9 @@ class HostKernel : public HostKernelBase { public: HostKernel(KernelType Kernel) : MKernel(Kernel) {} - void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) override { - // adjust ND range for serial host: - NDRDescT AdjustedRange = NDRDesc; - - if (NDRDesc.GlobalSize[0] == 0 && NDRDesc.NumWorkGroups[0] != 0) { - // This is a special case - NDRange information is not complete, only the - // desired number of work groups is set by the user. Choose work group - // size (LocalSize), calculate the missing NDRange characteristics - // needed to invoke the kernel and adjust the NDRange descriptor - // accordingly. For some devices the work group size selection requires - // access to the device's properties, hence such late "adjustment". - range<3> WGsize{1, 1, 1}; // no better alternative for serial host? - AdjustedRange.set(NDRDesc.Dims, - nd_range<3>(NDRDesc.NumWorkGroups * WGsize, WGsize)); - } - // If local size for host is not set explicitly, let's adjust it to 1, - // so an exception for zero local size is not thrown. - if (AdjustedRange.LocalSize[0] == 0) - for (size_t I = 0; I < AdjustedRange.Dims; ++I) - AdjustedRange.LocalSize[I] = 1; - if (HPI) - HPI->start(); - runOnHost(AdjustedRange); - if (HPI) - HPI->end(); - } char *getPtr() override { return reinterpret_cast(&MKernel); } - template - typename std::enable_if_t> - runOnHost(const NDRDescT &) { - runKernelWithoutArg(MKernel); - } - - template - typename std::enable_if_t>> - runOnHost(const NDRDescT &NDRDesc) { - sycl::range Range(InitializedVal::template get<0>()); - sycl::id Offset; - sycl::range Stride( - InitializedVal::template get<1>()); // initialized to 1 - sycl::range UpperBound( - InitializedVal::template get<0>()); - for (int I = 0; I < Dims; ++I) { - Range[I] = NDRDesc.GlobalSize[I]; - Offset[I] = NDRDesc.GlobalOffset[I]; - UpperBound[I] = Range[I] + Offset[I]; - } - - detail::NDLoop::iterate( - /*LowerBound=*/Offset, Stride, UpperBound, - [&](const sycl::id &ID) { - sycl::item Item = - IDBuilder::createItem(Range, ID, Offset); - - runKernelWithArg &>(MKernel, ID); - }); - } - - template - typename std::enable_if_t>> - runOnHost(const NDRDescT &NDRDesc) { - sycl::id ID; - sycl::range Range(InitializedVal::template get<0>()); - for (int I = 0; I < Dims; ++I) - Range[I] = NDRDesc.GlobalSize[I]; - - detail::NDLoop::iterate(Range, [&](const sycl::id ID) { - sycl::item Item = - IDBuilder::createItem(Range, ID); - sycl::item ItemWithOffset = Item; - - runKernelWithArg>(MKernel, Item); - }); - } - - template - typename std::enable_if_t>> - runOnHost(const NDRDescT &NDRDesc) { - sycl::range Range(InitializedVal::template get<0>()); - sycl::id Offset; - sycl::range Stride( - InitializedVal::template get<1>()); // initialized to 1 - sycl::range UpperBound( - InitializedVal::template get<0>()); - for (int I = 0; I < Dims; ++I) { - Range[I] = NDRDesc.GlobalSize[I]; - Offset[I] = NDRDesc.GlobalOffset[I]; - UpperBound[I] = Range[I] + Offset[I]; - } - - detail::NDLoop::iterate( - /*LowerBound=*/Offset, Stride, UpperBound, - [&](const sycl::id &ID) { - sycl::item Item = - IDBuilder::createItem(Range, ID, Offset); - - runKernelWithArg>(MKernel, Item); - }); - } - - template - typename std::enable_if_t>> - runOnHost(const NDRDescT &NDRDesc) { - sycl::range GroupSize(InitializedVal::template get<0>()); - for (int I = 0; I < Dims; ++I) { - if (NDRDesc.LocalSize[I] == 0 || - NDRDesc.GlobalSize[I] % NDRDesc.LocalSize[I] != 0) - throw sycl::exception(make_error_code(errc::nd_range), - "Invalid local size for global size"); - GroupSize[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I]; - } - - sycl::range LocalSize(InitializedVal::template get<0>()); - sycl::range GlobalSize( - InitializedVal::template get<0>()); - sycl::id GlobalOffset; - for (int I = 0; I < Dims; ++I) { - GlobalOffset[I] = NDRDesc.GlobalOffset[I]; - LocalSize[I] = NDRDesc.LocalSize[I]; - GlobalSize[I] = NDRDesc.GlobalSize[I]; - } - - detail::NDLoop::iterate(GroupSize, [&](const id &GroupID) { - sycl::group Group = IDBuilder::createGroup( - GlobalSize, LocalSize, GroupSize, GroupID); - - detail::NDLoop::iterate(LocalSize, [&](const id &LocalID) { - id GlobalID = - GroupID * id{LocalSize} + LocalID + GlobalOffset; - const sycl::item GlobalItem = - IDBuilder::createItem(GlobalSize, GlobalID, - GlobalOffset); - const sycl::item LocalItem = - IDBuilder::createItem(LocalSize, LocalID); - const sycl::nd_item NDItem = - IDBuilder::createNDItem(GlobalItem, LocalItem, Group); - - runKernelWithArg>(MKernel, NDItem); - }); - }); - } - - template - std::enable_if_t>> - runOnHost(const NDRDescT &NDRDesc) { - sycl::range NGroups(InitializedVal::template get<0>()); - - for (int I = 0; I < Dims; ++I) { - if (NDRDesc.LocalSize[I] == 0 || - NDRDesc.GlobalSize[I] % NDRDesc.LocalSize[I] != 0) - throw sycl::exception(make_error_code(errc::nd_range), - "Invalid local size for global size"); - NGroups[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I]; - } - - sycl::range LocalSize(InitializedVal::template get<0>()); - sycl::range GlobalSize( - InitializedVal::template get<0>()); - for (int I = 0; I < Dims; ++I) { - LocalSize[I] = NDRDesc.LocalSize[I]; - GlobalSize[I] = NDRDesc.GlobalSize[I]; - } - detail::NDLoop::iterate(NGroups, [&](const id &GroupID) { - sycl::group Group = - IDBuilder::createGroup(GlobalSize, LocalSize, NGroups, GroupID); - runKernelWithArg>(MKernel, Group); - }); - } - ~HostKernel() = default; }; diff --git a/sycl/include/sycl/ext/oneapi/memcpy2d.hpp b/sycl/include/sycl/ext/oneapi/memcpy2d.hpp index f7399213ac88..3e29c20c390d 100644 --- a/sycl/include/sycl/ext/oneapi/memcpy2d.hpp +++ b/sycl/include/sycl/ext/oneapi/memcpy2d.hpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #pragma once -#include #include #include #include diff --git a/sycl/include/sycl/group.hpp b/sycl/include/sycl/group.hpp index 35c6bc9c9eac..7ce1ead1a229 100644 --- a/sycl/include/sycl/group.hpp +++ b/sycl/include/sycl/group.hpp @@ -673,13 +673,7 @@ template class __SYCL_TYPE(group) group { friend class detail::Builder; group(const range &G, const range &L, const range GroupRange, const id &I) - : globalRange(G), localRange(L), groupRange(GroupRange), index(I) { - // Make sure local range divides global without remainder: - __SYCL_ASSERT(((G % L).size() == 0) && - "global range is not multiple of local"); - __SYCL_ASSERT((((G / L) - GroupRange).size() == 0) && - "inconsistent group constructor arguments"); - } + : globalRange(G), localRange(L), groupRange(GroupRange), index(I) {} }; } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index ee796cb78464..3654d735e39b 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -175,10 +174,15 @@ class handler_impl; class kernel_impl; class queue_impl; class stream_impl; +class event_impl; template class image_accessor; class HandlerAccess; +class HostTask; + +using EventImplPtr = std::shared_ptr; + template static Arg member_ptr_helper(RetType (Func::*)(Arg) const); @@ -493,28 +497,24 @@ class __SYCL_EXPORT handler { /// \param Graph is a SYCL command_graph handler(std::shared_ptr Graph); - /// Stores copy of Arg passed to the CGData.MArgsStorage. - template void *storePlainArg(T &&Arg) { - CGData.MArgsStorage.emplace_back(sizeof(T)); - void *Storage = static_cast(CGData.MArgsStorage.back().data()); - std::memcpy(Storage, &Arg, sizeof(T)); - return Storage; - } + void *storeRawArg(const void *Ptr, size_t Size); void * storeRawArg(const sycl::ext::oneapi::experimental::raw_kernel_arg &RKA) { - CGData.MArgsStorage.emplace_back(RKA.MArgSize); - void *Storage = static_cast(CGData.MArgsStorage.back().data()); - std::memcpy(Storage, RKA.MArgData, RKA.MArgSize); - return Storage; + return storeRawArg(RKA.MArgData, RKA.MArgSize); + } + + /// Stores copy of Arg passed to the argument storage. + template void *storePlainArg(T &&Arg) { + return storeRawArg(&Arg, sizeof(T)); } - void setType(detail::CG::CGTYPE Type) { MCGType = Type; } + void setType(detail::CGType Type); - detail::CG::CGTYPE getType() { return MCGType; } + detail::CGType getType() const; void throwIfActionIsCreated() { - if (detail::CG::None != getType()) + if (detail::CGType::None != getType()) throw sycl::exception(make_error_code(errc::runtime), "Attempt to set multiple actions for the " "command group. Command group must consist of " @@ -662,8 +662,8 @@ class __SYCL_EXPORT handler { detail::getSyclObjImpl(LocalAccBase); detail::LocalAccessorImplHost *Req = LocalAccImpl.get(); MLocalAccStorage.push_back(std::move(LocalAccImpl)); - MArgs.emplace_back(detail::kernel_param_kind_t::kind_accessor, Req, - static_cast(access::target::local), ArgIndex); + addArg(detail::kernel_param_kind_t::kind_accessor, Req, + static_cast(access::target::local), ArgIndex); } // setArgHelper for local accessor argument (legacy accessor interface) @@ -699,31 +699,28 @@ class __SYCL_EXPORT handler { detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Arg; detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); detail::AccessorImplHost *Req = AccImpl.get(); - // Add accessor to the list of requirements. - CGData.MRequirements.push_back(Req); - // Store copy of the accessor. - CGData.MAccStorage.push_back(std::move(AccImpl)); + addAccessorReq(std::move(AccImpl)); // Add accessor to the list of arguments. - MArgs.emplace_back(detail::kernel_param_kind_t::kind_accessor, Req, - static_cast(AccessTarget), ArgIndex); + addArg(detail::kernel_param_kind_t::kind_accessor, Req, + static_cast(AccessTarget), ArgIndex); } template void setArgHelper(int ArgIndex, T &&Arg) { void *StoredArg = storePlainArg(Arg); if (!std::is_same::value && std::is_pointer::value) { - MArgs.emplace_back(detail::kernel_param_kind_t::kind_pointer, StoredArg, - sizeof(T), ArgIndex); + addArg(detail::kernel_param_kind_t::kind_pointer, StoredArg, sizeof(T), + ArgIndex); } else { - MArgs.emplace_back(detail::kernel_param_kind_t::kind_std_layout, - StoredArg, sizeof(T), ArgIndex); + addArg(detail::kernel_param_kind_t::kind_std_layout, StoredArg, sizeof(T), + ArgIndex); } } void setArgHelper(int ArgIndex, sampler &&Arg) { void *StoredArg = storePlainArg(Arg); - MArgs.emplace_back(detail::kernel_param_kind_t::kind_sampler, StoredArg, - sizeof(sampler), ArgIndex); + addArg(detail::kernel_param_kind_t::kind_sampler, StoredArg, + sizeof(sampler), ArgIndex); } // setArgHelper for graph dynamic_parameters @@ -745,8 +742,8 @@ class __SYCL_EXPORT handler { void setArgHelper(int ArgIndex, sycl::ext::oneapi::experimental::raw_kernel_arg &&Arg) { auto StoredArg = storeRawArg(Arg); - MArgs.emplace_back(detail::kernel_param_kind_t::kind_std_layout, StoredArg, - Arg.MArgSize, ArgIndex); + addArg(detail::kernel_param_kind_t::kind_std_layout, StoredArg, + Arg.MArgSize, ArgIndex); } /// Registers a dynamic parameter with the handler for later association with @@ -934,7 +931,7 @@ class __SYCL_EXPORT handler { // header, so don't perform things that require it. if (KernelHasName) { // TODO support ESIMD in no-integration-header case too. - MArgs.clear(); + clearArgs(); extractArgsAndReqsFromLambda(reinterpret_cast(KernelPtr), KI::getNumParams(), &KI::getParamDesc(0), KI::isESIMD()); @@ -944,7 +941,7 @@ class __SYCL_EXPORT handler { // accessors from the list(which are associated with this handler) as // arguments. We must copy the associated accessors as they are checked // later during finalize. - MArgs = MAssociatedAccesors; + setArgsToAssociatedAccessors(); } // If the kernel lambda is callable with a kernel_handler argument, manifest @@ -962,15 +959,14 @@ class __SYCL_EXPORT handler { template void checkAndSetClusterRange(const Properties &Props) { namespace syclex = sycl::ext::oneapi::experimental; - constexpr std::size_t cluster_dim = + constexpr std::size_t ClusterDim = syclex::detail::getClusterDim(); - if constexpr (cluster_dim > 0) { - setKernelUsesClusterLaunch(); - MNDRDesc.setClusterDimensions( - Props - .template get_property< - syclex::cuda::cluster_size_key>() - .get_cluster_size()); + if constexpr (ClusterDim > 0) { + auto ClusterSize = Props + .template get_property< + syclex::cuda::cluster_size_key>() + .get_cluster_size(); + setKernelClusterLaunch(padRange(ClusterSize), ClusterDim); } } @@ -1358,10 +1354,10 @@ class __SYCL_EXPORT handler { // __SYCL_ASSUME_INT can still be violated. So check the bounds // of the user range, instead of the rounded range. detail::checkValueRange(UserRange); - MNDRDesc.set(RoundedRange); + setNDRangeDescriptor(RoundedRange); StoreLambda( std::move(Wrapper)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); setNDRangeUsed(false); #endif } else @@ -1379,10 +1375,10 @@ class __SYCL_EXPORT handler { #ifndef __SYCL_DEVICE_ONLY__ processProperties(Props); detail::checkValueRange(UserRange); - MNDRDesc.set(std::move(UserRange)); + setNDRangeDescriptor(std::move(UserRange)); StoreLambda( std::move(KernelFunc)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); setNDRangeUsed(false); #endif #else @@ -1428,11 +1424,11 @@ class __SYCL_EXPORT handler { PropertiesT>(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ detail::checkValueRange(ExecutionRange); - MNDRDesc.set(std::move(ExecutionRange)); + setNDRangeDescriptor(std::move(ExecutionRange)); processProperties(Props); StoreLambda( std::move(KernelFunc)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); setNDRangeUsed(true); #endif } @@ -1449,8 +1445,8 @@ class __SYCL_EXPORT handler { throwIfActionIsCreated(); MKernel = detail::getSyclObjImpl(std::move(Kernel)); detail::checkValueRange(NumWorkItems); - MNDRDesc.set(std::move(NumWorkItems)); - setType(detail::CG::Kernel); + setNDRangeDescriptor(std::move(NumWorkItems)); + setType(detail::CGType::Kernel); setNDRangeUsed(false); extractArgsAndReqs(); MKernelName = getKernelName(); @@ -1487,9 +1483,9 @@ class __SYCL_EXPORT handler { #ifndef __SYCL_DEVICE_ONLY__ processProperties(Props); detail::checkValueRange(NumWorkGroups); - MNDRDesc.setNumWorkGroups(NumWorkGroups); + setNDRangeDescriptor(NumWorkGroups, /*SetNumWorkGroups=*/true); StoreLambda(std::move(KernelFunc)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); setNDRangeUsed(false); #endif // __SYCL_DEVICE_ONLY__ } @@ -1531,9 +1527,9 @@ class __SYCL_EXPORT handler { nd_range ExecRange = nd_range(NumWorkGroups * WorkGroupSize, WorkGroupSize); detail::checkValueRange(ExecRange); - MNDRDesc.set(std::move(ExecRange)); + setNDRangeDescriptor(std::move(ExecRange)); StoreLambda(std::move(KernelFunc)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); #endif // __SYCL_DEVICE_ONLY__ } @@ -1797,10 +1793,10 @@ class __SYCL_EXPORT handler { #ifndef __SYCL_DEVICE_ONLY__ // No need to check if range is out of INT_MAX limits as it's compile-time // known constant. - MNDRDesc.set(range<1>{1}); + setNDRangeDescriptor(range<1>{1}); processProperties(Props); StoreLambda(KernelFunc); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); #endif } @@ -1816,12 +1812,23 @@ class __SYCL_EXPORT handler { void setHandlerKernelBundle( const std::shared_ptr &NewKernelBundleImpPtr); + void SetHostTask(std::function &&Func); + void SetHostTask(std::function &&Func); + template std::enable_if_t, void()>::value || detail::check_fn_signature, void(interop_handle)>::value> - host_task_impl(FuncT &&Func); + host_task_impl(FuncT &&Func) { + throwIfActionIsCreated(); + + // Need to copy these rather than move so that we can check associated + // accessors during finalize + setArgsToAssociatedAccessors(); + + SetHostTask(std::move(Func)); + } /// @brief Get the command graph if any associated with this handler. It can /// come from either the associated queue or from being set explicitly through @@ -2062,10 +2069,10 @@ class __SYCL_EXPORT handler { kernel_parallel_for_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ detail::checkValueRange(NumWorkItems, WorkItemOffset); - MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset)); + setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset)); StoreLambda( std::move(KernelFunc)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); setNDRangeUsed(false); #endif } @@ -2123,9 +2130,9 @@ class __SYCL_EXPORT handler { setHandlerKernelBundle(Kernel); // No need to check if range is out of INT_MAX limits as it's compile-time // known constant - MNDRDesc.set(range<1>{1}); + setNDRangeDescriptor(range<1>{1}); MKernel = detail::getSyclObjImpl(std::move(Kernel)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); extractArgsAndReqs(); MKernelName = getKernelName(); } @@ -2157,8 +2164,8 @@ class __SYCL_EXPORT handler { throwIfActionIsCreated(); MKernel = detail::getSyclObjImpl(std::move(Kernel)); detail::checkValueRange(NumWorkItems, WorkItemOffset); - MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset)); - setType(detail::CG::Kernel); + setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset)); + setType(detail::CGType::Kernel); setNDRangeUsed(false); extractArgsAndReqs(); MKernelName = getKernelName(); @@ -2176,8 +2183,8 @@ class __SYCL_EXPORT handler { throwIfActionIsCreated(); MKernel = detail::getSyclObjImpl(std::move(Kernel)); detail::checkValueRange(NDRange); - MNDRDesc.set(std::move(NDRange)); - setType(detail::CG::Kernel); + setNDRangeDescriptor(std::move(NDRange)); + setType(detail::CGType::Kernel); setNDRangeUsed(true); extractArgsAndReqs(); MKernelName = getKernelName(); @@ -2202,9 +2209,9 @@ class __SYCL_EXPORT handler { #ifndef __SYCL_DEVICE_ONLY__ // No need to check if range is out of INT_MAX limits as it's compile-time // known constant - MNDRDesc.set(range<1>{1}); + setNDRangeDescriptor(range<1>{1}); MKernel = detail::getSyclObjImpl(std::move(Kernel)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); if (!lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); MKernelName = getKernelName(); @@ -2238,9 +2245,9 @@ class __SYCL_EXPORT handler { kernel_parallel_for_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ detail::checkValueRange(NumWorkItems); - MNDRDesc.set(std::move(NumWorkItems)); + setNDRangeDescriptor(std::move(NumWorkItems)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); setNDRangeUsed(false); if (!lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); @@ -2278,9 +2285,9 @@ class __SYCL_EXPORT handler { kernel_parallel_for_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ detail::checkValueRange(NumWorkItems, WorkItemOffset); - MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset)); + setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); setNDRangeUsed(false); if (!lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); @@ -2317,9 +2324,9 @@ class __SYCL_EXPORT handler { kernel_parallel_for_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ detail::checkValueRange(NDRange); - MNDRDesc.set(std::move(NDRange)); + setNDRangeDescriptor(std::move(NDRange)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); setNDRangeUsed(true); if (!lambdaAndKernelHaveEqualName()) { extractArgsAndReqs(); @@ -2360,10 +2367,10 @@ class __SYCL_EXPORT handler { kernel_parallel_for_work_group_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ detail::checkValueRange(NumWorkGroups); - MNDRDesc.setNumWorkGroups(NumWorkGroups); + setNDRangeDescriptor(NumWorkGroups, /*SetNumWorkGroups=*/true); MKernel = detail::getSyclObjImpl(std::move(Kernel)); StoreLambda(std::move(KernelFunc)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); #endif // __SYCL_DEVICE_ONLY__ } @@ -2403,10 +2410,10 @@ class __SYCL_EXPORT handler { nd_range ExecRange = nd_range(NumWorkGroups * WorkGroupSize, WorkGroupSize); detail::checkValueRange(ExecRange); - MNDRDesc.set(std::move(ExecRange)); + setNDRangeDescriptor(std::move(ExecRange)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); StoreLambda(std::move(KernelFunc)); - setType(detail::CG::Kernel); + setType(detail::CGType::Kernel); #endif // __SYCL_DEVICE_ONLY__ } @@ -2593,7 +2600,7 @@ class __SYCL_EXPORT handler { "Invalid accessor mode for the copy method."); // Make sure data shared_ptr points to is not released until we finish // work with it. - CGData.MSharedPtrStorage.push_back(Dst); + addLifetimeSharedPtrStorage(Dst); typename std::shared_ptr::element_type *RawDstPtr = Dst.get(); copy(Src, RawDstPtr); } @@ -2623,7 +2630,7 @@ class __SYCL_EXPORT handler { // device-copyable. // Make sure data shared_ptr points to is not released until we finish // work with it. - CGData.MSharedPtrStorage.push_back(Src); + addLifetimeSharedPtrStorage(Src); typename std::shared_ptr::element_type *RawSrcPtr = Src.get(); copy(RawSrcPtr, Dst); } @@ -2648,17 +2655,16 @@ class __SYCL_EXPORT handler { "Invalid accessor target for the copy method."); static_assert(isValidModeForSourceAccessor(AccessMode), "Invalid accessor mode for the copy method."); - setType(detail::CG::CopyAccToPtr); + setType(detail::CGType::CopyAccToPtr); detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Src; detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); - CGData.MRequirements.push_back(AccImpl.get()); MSrcPtr = static_cast(AccImpl.get()); MDstPtr = static_cast(Dst); // Store copy of accessor to the local storage to make sure it is alive // until we finish - CGData.MAccStorage.push_back(std::move(AccImpl)); + addAccessorReq(std::move(AccImpl)); } /// Copies the content of memory pointed by Src into the memory object @@ -2685,17 +2691,16 @@ class __SYCL_EXPORT handler { // TODO: Add static_assert with is_device_copyable when vec is // device-copyable. - setType(detail::CG::CopyPtrToAcc); + setType(detail::CGType::CopyPtrToAcc); detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Dst; detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); - CGData.MRequirements.push_back(AccImpl.get()); MSrcPtr = const_cast(Src); MDstPtr = static_cast(AccImpl.get()); // Store copy of accessor to the local storage to make sure it is alive // until we finish - CGData.MAccStorage.push_back(std::move(AccImpl)); + addAccessorReq(std::move(AccImpl)); } /// Copies the content of memory object accessed by Src to the memory @@ -2738,7 +2743,7 @@ class __SYCL_EXPORT handler { if (copyAccToAccHelper(Src, Dst)) return; - setType(detail::CG::CopyAccToAcc); + setType(detail::CGType::CopyAccToAcc); detail::AccessorBaseHost *AccBaseSrc = (detail::AccessorBaseHost *)&Src; detail::AccessorImplPtr AccImplSrc = detail::getSyclObjImpl(*AccBaseSrc); @@ -2746,14 +2751,12 @@ class __SYCL_EXPORT handler { detail::AccessorBaseHost *AccBaseDst = (detail::AccessorBaseHost *)&Dst; detail::AccessorImplPtr AccImplDst = detail::getSyclObjImpl(*AccBaseDst); - CGData.MRequirements.push_back(AccImplSrc.get()); - CGData.MRequirements.push_back(AccImplDst.get()); MSrcPtr = AccImplSrc.get(); MDstPtr = AccImplDst.get(); // Store copy of accessor to the local storage to make sure it is alive // until we finish - CGData.MAccStorage.push_back(std::move(AccImplSrc)); - CGData.MAccStorage.push_back(std::move(AccImplDst)); + addAccessorReq(std::move(AccImplSrc)); + addAccessorReq(std::move(AccImplDst)); } /// Provides guarantees that the memory object accessed via Acc is updated @@ -2771,14 +2774,13 @@ class __SYCL_EXPORT handler { throwIfActionIsCreated(); static_assert(isValidTargetForExplicitOp(AccessTarget), "Invalid accessor target for the update_host method."); - setType(detail::CG::UpdateHost); + setType(detail::CGType::UpdateHost); detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Acc; detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); MDstPtr = static_cast(AccImpl.get()); - CGData.MRequirements.push_back(AccImpl.get()); - CGData.MAccStorage.push_back(std::move(AccImpl)); + addAccessorReq(std::move(AccImpl)); } public: @@ -2859,7 +2861,7 @@ class __SYCL_EXPORT handler { /// complete state. void ext_oneapi_barrier() { throwIfActionIsCreated(); - setType(detail::CG::Barrier); + setType(detail::CGType::Barrier); } /// Prevents any commands submitted afterward to this queue from executing @@ -3287,31 +3289,14 @@ class __SYCL_EXPORT handler { uint64_t SignalValue); private: - std::shared_ptr MImpl; + std::shared_ptr impl; std::shared_ptr MQueue; - /// The storage for the arguments passed. - /// We need to store a copy of values that are passed explicitly through - /// set_arg, require and so on, because we need them to be alive after - /// we exit the method they are passed in. - mutable detail::CG::StorageInitHelper CGData; std::vector MLocalAccStorage; std::vector> MStreamStorage; - /// The list of arguments for the kernel. - std::vector MArgs; - /// The list of associated accessors with this handler. - /// These accessors were created with this handler as argument or - /// have become required for this handler via require method. - std::vector MAssociatedAccesors; - /// Struct that encodes global size, local size, ... - detail::NDRDescT MNDRDesc; detail::string MKernelName; /// Storage for a sycl::kernel object. std::shared_ptr MKernel; - /// Type of the command group, e.g. kernel, fill. Can also encode version. - /// Use getType and setType methods to access this variable unless - /// manipulations with version are required - detail::CG::CGTYPE MCGType = detail::CG::None; /// Pointer to the source host memory or accessor(depending on command type). void *MSrcPtr = nullptr; /// Pointer to the dest host memory or accessor(depends on command type). @@ -3322,22 +3307,6 @@ class __SYCL_EXPORT handler { std::vector MPattern; /// Storage for a lambda or function object. std::unique_ptr MHostKernel; - /// Storage for lambda/function when using HostTask - std::unique_ptr MHostTask; - /// The list of valid SYCL events that need to complete - /// before barrier command can be executed - std::vector MEventsWaitWithBarrier; - - /// The graph that is associated with this handler. - std::shared_ptr MGraph; - /// If we are submitting a graph using ext_oneapi_graph this will be the graph - /// to be executed. - std::shared_ptr - MExecGraph; - /// Storage for a node created from a subgraph submission. - std::shared_ptr MSubgraphNode; - /// Storage for the CG created when handling graph nodes added explicitly. - std::unique_ptr MGraphNodeCG; detail::code_location MCodeLoc = {}; bool MIsFinalized = false; @@ -3399,6 +3368,10 @@ class __SYCL_EXPORT handler { class _propertiesT, class> friend class ext::intel::experimental::pipe; + template + friend decltype(Obj::impl) + sycl::detail::getSyclObjImpl(const Obj &SyclObject); + /// Read from a host pipe given a host address and /// \param Name name of the host pipe to be passed into lower level runtime /// \param Ptr host pointer of host pipe as identified by address of its const @@ -3523,13 +3496,12 @@ class __SYCL_EXPORT handler { accessor Dst, const T &Pattern) { - setType(detail::CG::Fill); + setType(detail::CGType::Fill); detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Dst; detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); MDstPtr = static_cast(AccImpl.get()); - CGData.MRequirements.push_back(AccImpl.get()); - CGData.MAccStorage.push_back(std::move(AccImpl)); + addAccessorReq(std::move(AccImpl)); MPattern.resize(sizeof(T)); auto PatternPtr = reinterpret_cast(MPattern.data()); @@ -3623,15 +3595,8 @@ class __SYCL_EXPORT handler { accessor Acc) { auto *AccBase = reinterpret_cast(&Acc); - detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); - detail::AccessorImplHost *Req = AccImpl.get(); - if (std::find_if(MAssociatedAccesors.begin(), MAssociatedAccesors.end(), - [&](const detail::ArgDesc &AD) { - return AD.MType == - detail::kernel_param_kind_t::kind_accessor && - AD.MPtr == Req && - AD.MSize == static_cast(AccessTarget); - }) == MAssociatedAccesors.end()) + detail::AccessorImplHost *Req = detail::getSyclObjImpl(*AccBase).get(); + if (HasAssociatedAccessor(Req, AccessTarget)) throw sycl::exception(make_error_code(errc::kernel_argument), "placeholder accessor must be bound by calling " "handler::require() before it can be used."); @@ -3642,8 +3607,8 @@ class __SYCL_EXPORT handler { // Set value of the kernel is cooperative flag void setKernelIsCooperative(bool); - // Set using cuda thread block cluster launch flag true - void setKernelUsesClusterLaunch(); + // Set using cuda thread block cluster launch flag and set the launch bounds. + void setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims); template < ext::oneapi::experimental::detail::UnsupportedGraphFeatures FeatureT> @@ -3665,9 +3630,70 @@ class __SYCL_EXPORT handler { inline void internalProfilingTagImpl() { throwIfActionIsCreated(); - setType(detail::CG::ProfilingTag); + setType(detail::CGType::ProfilingTag); } + void addAccessorReq(detail::AccessorImplPtr Accessor); + + void addLifetimeSharedPtrStorage(std::shared_ptr SPtr); + + void addArg(detail::kernel_param_kind_t ArgKind, void *Req, int AccessTarget, + int ArgIndex); + void clearArgs(); + void setArgsToAssociatedAccessors(); + + bool HasAssociatedAccessor(detail::AccessorImplHost *Req, + access::target AccessTarget) const; + + template static sycl::range<3> padRange(sycl::range Range) { + if constexpr (Dims == 3) { + return Range; + } else { + sycl::range<3> Res{0, 0, 0}; + for (int I = 0; I < Dims; ++I) + Res[I] = Range[I]; + return Res; + } + } + + template static sycl::id<3> padId(sycl::id Id) { + if constexpr (Dims == 3) { + return Id; + } else { + sycl::id<3> Res{0, 0, 0}; + for (int I = 0; I < Dims; ++I) + Res[I] = Id[I]; + return Res; + } + } + + template + void setNDRangeDescriptor(sycl::range N, + bool SetNumWorkGroups = false) { + return setNDRangeDescriptorPadded(padRange(N), SetNumWorkGroups, Dims); + } + template + void setNDRangeDescriptor(sycl::range NumWorkItems, + sycl::id Offset) { + return setNDRangeDescriptorPadded(padRange(NumWorkItems), padId(Offset), + Dims); + } + template + void setNDRangeDescriptor(sycl::nd_range ExecutionRange) { + return setNDRangeDescriptorPadded( + padRange(ExecutionRange.get_global_range()), + padRange(ExecutionRange.get_local_range()), + padId(ExecutionRange.get_offset()), Dims); + } + + void setNDRangeDescriptorPadded(sycl::range<3> N, bool SetNumWorkGroups, + int Dims); + void setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, + sycl::id<3> Offset, int Dims); + void setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, + sycl::range<3> LocalSize, sycl::id<3> Offset, + int Dims); + friend class detail::HandlerAccess; protected: @@ -3688,17 +3714,3 @@ class HandlerAccess { } // namespace _V1 } // namespace sycl - -#ifdef __SYCL_BUILD_SYCL_DLL -// The following fails (somewhat expectedly) when compiled with MSVC: -// -// #include -// struct __declspec(dllexport) handler { -// std::unique_ptr Member; -// }; -// -// We do __SYCL_EXPORT sycl::handler class and it has an -// std::unique_ptr member. As such, ensure the type is -// complete if we're building the SYCL shared library. -#include -#endif diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index b1b7d93bc679..3ce31300ba0f 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -41,12 +41,6 @@ #include // for property_list #include // for range -#if __SYCL_USE_FALLBACK_ASSERT -// TODO: maybe we can move detail::submitAssertCapture into the shared library -// instead. -#include -#endif - #include // for size_t #include // for function #include // for shared_ptr, hash diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 3e969837759b..737306818767 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -45,14 +45,6 @@ #include // for dynamic_e... #include // for malloc_de... -// reduction::withAuxHandler calls handler::~handler() and that, in turn, needs -// all the dtors from std::unique_pointer handler's data members, including the -// host_task-related stuff. That's not the case for -// because handler object is only ctor/dtor'ed inside SYCL shared library but -// not in the current translation unit. It would be nice to find a better way -// than this include in future. -#include - #include // for min #include // for array #include // for assert diff --git a/sycl/include/sycl/stream.hpp b/sycl/include/sycl/stream.hpp index eef0fc51d3b0..c965b80236bd 100644 --- a/sycl/include/sycl/stream.hpp +++ b/sycl/include/sycl/stream.hpp @@ -14,7 +14,6 @@ #include // for atomic #include // for isinf, isnan, signbit #include // for array -#include // for stream_impl #include // for __SYCL_SPECIAL_CLASS, __S... #include // for __SYCL2020_DEPRECATED #include // for __SYCL_EXPORT @@ -44,6 +43,8 @@ inline namespace _V1 { namespace detail { +class stream_impl; + using FmtFlags = unsigned int; // Mapping from stream_manipulator to FmtFlags. Each manipulator corresponds diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index fa5041ff0ece..28dc24fb9cd2 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -10,8 +10,6 @@ #include -#include - #include #include #include @@ -37,6 +35,7 @@ #include #include #include +#include #include #include #include diff --git a/sycl/include/syclcompat/memory.hpp b/sycl/include/syclcompat/memory.hpp index ad33ce9a9bdf..93dace8bb60d 100644 --- a/sycl/include/syclcompat/memory.hpp +++ b/sycl/include/syclcompat/memory.hpp @@ -42,7 +42,6 @@ #include #include -#include #include #include #include diff --git a/sycl/source/accessor.cpp b/sycl/source/accessor.cpp index d3f1557871d9..aa3547a4a44a 100644 --- a/sycl/source/accessor.cpp +++ b/sycl/source/accessor.cpp @@ -15,12 +15,12 @@ namespace sycl { inline namespace _V1 { namespace detail { device getDeviceFromHandler(handler &cgh) { - assert((cgh.MQueue || cgh.MGraph) && + assert((cgh.MQueue || getSyclObjImpl(cgh)->MGraph) && "One of MQueue or MGraph should be nonnull!"); if (cgh.MQueue) return cgh.MQueue->get_device(); - return cgh.MGraph->getDevice(); + return getSyclObjImpl(cgh)->MGraph->getDevice(); } AccessorBaseHost::AccessorBaseHost(id<3> Offset, range<3> AccessRange, diff --git a/sycl/include/sycl/detail/cg.hpp b/sycl/source/detail/cg.hpp similarity index 76% rename from sycl/include/sycl/detail/cg.hpp rename to sycl/source/detail/cg.hpp index b6955af412b3..48d80c06394b 100644 --- a/sycl/include/sycl/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -46,41 +46,119 @@ class stream_impl; class queue_impl; class kernel_bundle_impl; -// If there's a need to add new members to CG classes without breaking ABI -// compatibility, we can bring back the extended members mechanism. See -// https://github.com/intel/llvm/pull/6759 +// The structure represents kernel argument. +class ArgDesc { +public: + ArgDesc(sycl::detail::kernel_param_kind_t Type, void *Ptr, int Size, + int Index) + : MType(Type), MPtr(Ptr), MSize(Size), MIndex(Index) {} + + sycl::detail::kernel_param_kind_t MType; + void *MPtr; + int MSize; + int MIndex; +}; + +// The structure represents NDRange - global, local sizes, global offset and +// number of dimensions. +class NDRDescT { + // The method initializes all sizes for dimensions greater than the passed one + // to the default values, so they will not affect execution. + void setNDRangeLeftover() { + for (int I = Dims; I < 3; ++I) { + GlobalSize[I] = 1; + LocalSize[I] = LocalSize[0] ? 1 : 0; + GlobalOffset[I] = 0; + NumWorkGroups[I] = 0; + } + } + + template + static sycl::range<3> padRange(sycl::range Range) { + if constexpr (Dims == 3) { + return Range; + } else { + sycl::range<3> Res{0, 0, 0}; + for (int I = 0; I < Dims; ++I) + Res[I] = Range[I]; + return Res; + } + } + + template static sycl::id<3> padId(sycl::id Id) { + if constexpr (Dims == 3) { + return Id; + } else { + sycl::id<3> Res{0, 0, 0}; + for (int I = 0; I < Dims; ++I) + Res[I] = Id[I]; + return Res; + } + } + +public: + NDRDescT() = default; + NDRDescT(const NDRDescT &Desc) = default; + NDRDescT(NDRDescT &&Desc) = default; + + NDRDescT(sycl::range<3> N, bool SetNumWorkGroups, int DimsArg) + : GlobalSize{SetNumWorkGroups ? sycl::range<3>{0, 0, 0} : N}, + NumWorkGroups{SetNumWorkGroups ? N : sycl::range<3>{0, 0, 0}}, + Dims{size_t(DimsArg)} { + setNDRangeLeftover(); + } + + NDRDescT(sycl::range<3> NumWorkItems, sycl::id<3> Offset, int DimsArg) + : GlobalSize{NumWorkItems}, GlobalOffset{Offset}, Dims{size_t(DimsArg)} {} + + NDRDescT(sycl::range<3> NumWorkItems, sycl::range<3> LocalSize, + sycl::id<3> Offset, int DimsArg) + : GlobalSize{NumWorkItems}, LocalSize{LocalSize}, GlobalOffset{Offset}, + Dims{size_t(DimsArg)} { + setNDRangeLeftover();} + + template + NDRDescT(sycl::nd_range ExecutionRange, int DimsArg) + : NDRDescT(padRange(ExecutionRange.get_global_range()), + padRange(ExecutionRange.get_local_range()), + padId(ExecutionRange.get_offset()), size_t(DimsArg)) { + setNDRangeLeftover();} + + template + NDRDescT(sycl::nd_range ExecutionRange) + : NDRDescT(ExecutionRange, Dims_) {} + + template + NDRDescT(sycl::range Range) + : NDRDescT(padRange(Range), /*SetNumWorkGroups=*/false, Dims_) {} + + void setClusterDimensions(sycl::range<3> N, int Dims) { + if (this->Dims != size_t(Dims)) { + throw std::runtime_error( + "Dimensionality of cluster, global and local ranges must be same"); + } + + for (int I = 0; I < 3; ++I) + ClusterDimensions[I] = (I < Dims) ? N[I] : 1; + } + + NDRDescT &operator=(const NDRDescT &Desc) = default; + NDRDescT &operator=(NDRDescT &&Desc) = default; + + sycl::range<3> GlobalSize{0, 0, 0}; + sycl::range<3> LocalSize{0, 0, 0}; + sycl::id<3> GlobalOffset{0, 0, 0}; + /// Number of workgroups, used to record the number of workgroups from the + /// simplest form of parallel_for_work_group. If set, all other fields must be + /// zero + sycl::range<3> NumWorkGroups{0, 0, 0}; + sycl::range<3> ClusterDimensions{1, 1, 1}; + size_t Dims = 0; +}; + /// Base class for all types of command groups. class CG { public: - /// Type of the command group. - enum CGTYPE : unsigned int { - None = 0, - Kernel = 1, - CopyAccToPtr = 2, - CopyPtrToAcc = 3, - CopyAccToAcc = 4, - Barrier = 5, - BarrierWaitlist = 6, - Fill = 7, - UpdateHost = 8, - CopyUSM = 10, - FillUSM = 11, - PrefetchUSM = 12, - CodeplayHostTask = 14, - AdviseUSM = 15, - Copy2DUSM = 16, - Fill2DUSM = 17, - Memset2DUSM = 18, - CopyToDeviceGlobal = 19, - CopyFromDeviceGlobal = 20, - ReadWriteHostPipe = 21, - ExecCommandBuffer = 22, - CopyImage = 23, - SemaphoreWait = 24, - SemaphoreSignal = 25, - ProfilingTag = 26, - }; - struct StorageInitHelper { StorageInitHelper() = default; StorageInitHelper(std::vector> ArgsStorage, @@ -110,7 +188,7 @@ class CG { std::vector MEvents; }; - CG(CGTYPE Type, StorageInitHelper D, detail::code_location loc = {}) + CG(CGType Type, StorageInitHelper D, detail::code_location loc = {}) : MType(Type), MData(std::move(D)) { // Capture the user code-location from Q.submit(), Q.parallel_for() // etc for later use; if code location information is not available, @@ -126,7 +204,7 @@ class CG { CG(CG &&CommandGroup) = default; CG(const CG &CommandGroup) = default; - CGTYPE getType() const { return MType; } + CGType getType() const { return MType; } std::vector> &getArgsStorage() { return MData.MArgsStorage; @@ -152,7 +230,7 @@ class CG { virtual ~CG() = default; private: - CGTYPE MType; + CGType MType; StorageInitHelper MData; public: @@ -187,7 +265,7 @@ class CGExecKernel : public CG { std::string KernelName, std::vector> Streams, std::vector> AuxiliaryResources, - CGTYPE Type, + CGType Type, sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig, bool KernelIsCooperative, bool MKernelUsesClusterLaunch, detail::code_location loc = {}) @@ -200,7 +278,7 @@ class CGExecKernel : public CG { MKernelCacheConfig(std::move(KernelCacheConfig)), MKernelIsCooperative(KernelIsCooperative), MKernelUsesClusterLaunch(MKernelUsesClusterLaunch) { - assert(getType() == Kernel && "Wrong type of exec kernel CG."); + assert(getType() == CGType::Kernel && "Wrong type of exec kernel CG."); } CGExecKernel(const CGExecKernel &CGExec) = default; @@ -232,7 +310,7 @@ class CGCopy : public CG { std::vector> MAuxiliaryResources; public: - CGCopy(CGTYPE CopyType, void *Src, void *Dst, CG::StorageInitHelper CGData, + CGCopy(CGType CopyType, void *Src, void *Dst, CG::StorageInitHelper CGData, std::vector> AuxiliaryResources, detail::code_location loc = {}) : CG(CopyType, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), @@ -255,7 +333,7 @@ class CGFill : public CG { CGFill(std::vector Pattern, void *Ptr, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(Fill, std::move(CGData), std::move(loc)), + : CG(CGType::Fill, std::move(CGData), std::move(loc)), MPattern(std::move(Pattern)), MPtr((AccessorImplHost *)Ptr) {} AccessorImplHost *getReqToFill() { return MPtr; } }; @@ -267,7 +345,7 @@ class CGUpdateHost : public CG { public: CGUpdateHost(void *Ptr, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(UpdateHost, std::move(CGData), std::move(loc)), + : CG(CGType::UpdateHost, std::move(CGData), std::move(loc)), MPtr((AccessorImplHost *)Ptr) {} AccessorImplHost *getReqToUpdate() { return MPtr; } @@ -282,7 +360,7 @@ class CGCopyUSM : public CG { public: CGCopyUSM(void *Src, void *Dst, size_t Length, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(CopyUSM, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), + : CG(CGType::CopyUSM, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), MLength(Length) {} void *getSrc() { return MSrc; } @@ -299,7 +377,7 @@ class CGFillUSM : public CG { public: CGFillUSM(std::vector Pattern, void *DstPtr, size_t Length, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(FillUSM, std::move(CGData), std::move(loc)), + : CG(CGType::FillUSM, std::move(CGData), std::move(loc)), MPattern(std::move(Pattern)), MDst(DstPtr), MLength(Length) {} void *getDst() { return MDst; } size_t getLength() { return MLength; } @@ -314,7 +392,7 @@ class CGPrefetchUSM : public CG { public: CGPrefetchUSM(void *DstPtr, size_t Length, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(PrefetchUSM, std::move(CGData), std::move(loc)), MDst(DstPtr), + : CG(CGType::PrefetchUSM, std::move(CGData), std::move(loc)), MDst(DstPtr), MLength(Length) {} void *getDst() { return MDst; } size_t getLength() { return MLength; } @@ -328,7 +406,7 @@ class CGAdviseUSM : public CG { public: CGAdviseUSM(void *DstPtr, size_t Length, pi_mem_advice Advice, - CG::StorageInitHelper CGData, CGTYPE Type, + CG::StorageInitHelper CGData, CGType Type, detail::code_location loc = {}) : CG(Type, std::move(CGData), std::move(loc)), MDst(DstPtr), MLength(Length), MAdvice(Advice) {} @@ -342,7 +420,7 @@ class CGBarrier : public CG { std::vector MEventsWaitWithBarrier; CGBarrier(std::vector EventsWaitWithBarrier, - CG::StorageInitHelper CGData, CGTYPE Type, + CG::StorageInitHelper CGData, CGType Type, detail::code_location loc = {}) : CG(Type, std::move(CGData), std::move(loc)), MEventsWaitWithBarrier(std::move(EventsWaitWithBarrier)) {} @@ -351,7 +429,7 @@ class CGBarrier : public CG { class CGProfilingTag : public CG { public: CGProfilingTag(CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(CG::ProfilingTag, std::move(CGData), std::move(loc)) {} + : CG(CGType::ProfilingTag, std::move(CGData), std::move(loc)) {} }; /// "Copy 2D USM" command group class. @@ -367,7 +445,7 @@ class CGCopy2DUSM : public CG { CGCopy2DUSM(void *Src, void *Dst, size_t SrcPitch, size_t DstPitch, size_t Width, size_t Height, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(Copy2DUSM, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), + : CG(CGType::Copy2DUSM, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), MSrcPitch(SrcPitch), MDstPitch(DstPitch), MWidth(Width), MHeight(Height) {} @@ -391,7 +469,7 @@ class CGFill2DUSM : public CG { CGFill2DUSM(std::vector Pattern, void *DstPtr, size_t Pitch, size_t Width, size_t Height, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(Fill2DUSM, std::move(CGData), std::move(loc)), + : CG(CGType::Fill2DUSM, std::move(CGData), std::move(loc)), MPattern(std::move(Pattern)), MDst(DstPtr), MPitch(Pitch), MWidth(Width), MHeight(Height) {} void *getDst() const { return MDst; } @@ -413,7 +491,7 @@ class CGMemset2DUSM : public CG { CGMemset2DUSM(char Value, void *DstPtr, size_t Pitch, size_t Width, size_t Height, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(Memset2DUSM, std::move(CGData), std::move(loc)), MValue(Value), + : CG(CGType::Memset2DUSM, std::move(CGData), std::move(loc)), MValue(Value), MDst(DstPtr), MPitch(Pitch), MWidth(Width), MHeight(Height) {} void *getDst() const { return MDst; } size_t getPitch() const { return MPitch; } @@ -434,7 +512,7 @@ class CGReadWriteHostPipe : public CG { CGReadWriteHostPipe(const std::string &Name, bool Block, void *Ptr, size_t Size, bool Read, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(ReadWriteHostPipe, std::move(CGData), std::move(loc)), + : CG(CGType::ReadWriteHostPipe, std::move(CGData), std::move(loc)), PipeName(Name), Blocking(Block), HostPtr(Ptr), TypeSize(Size), IsReadOp(Read) {} @@ -458,7 +536,7 @@ class CGCopyToDeviceGlobal : public CG { bool IsDeviceImageScoped, size_t NumBytes, size_t Offset, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(CopyToDeviceGlobal, std::move(CGData), std::move(loc)), MSrc(Src), + : CG(CGType::CopyToDeviceGlobal, std::move(CGData), std::move(loc)), MSrc(Src), MDeviceGlobalPtr(DeviceGlobalPtr), MIsDeviceImageScoped(IsDeviceImageScoped), MNumBytes(NumBytes), MOffset(Offset) {} @@ -483,7 +561,7 @@ class CGCopyFromDeviceGlobal : public CG { bool IsDeviceImageScoped, size_t NumBytes, size_t Offset, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(CopyFromDeviceGlobal, std::move(CGData), std::move(loc)), + : CG(CGType::CopyFromDeviceGlobal, std::move(CGData), std::move(loc)), MDeviceGlobalPtr(DeviceGlobalPtr), MDest(Dest), MIsDeviceImageScoped(IsDeviceImageScoped), MNumBytes(NumBytes), MOffset(Offset) {} @@ -515,7 +593,7 @@ class CGCopyImage : public CG { sycl::detail::pi::PiImageRegion HostExtent, sycl::detail::pi::PiImageRegion CopyExtent, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(CopyImage, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), + : CG(CGType::CopyImage, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), MImageDesc(ImageDesc), MImageFormat(ImageFormat), MImageCopyFlags(ImageCopyFlags), MSrcOffset(SrcOffset), MDstOffset(DstOffset), MHostExtent(HostExtent), @@ -544,7 +622,7 @@ class CGSemaphoreWait : public CG { sycl::detail::pi::PiInteropSemaphoreHandle InteropSemaphoreHandle, std::optional WaitValue, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(SemaphoreWait, std::move(CGData), std::move(loc)), + : CG(CGType::SemaphoreWait, std::move(CGData), std::move(loc)), MInteropSemaphoreHandle(InteropSemaphoreHandle), MWaitValue(WaitValue) { } @@ -564,7 +642,7 @@ class CGSemaphoreSignal : public CG { sycl::detail::pi::PiInteropSemaphoreHandle InteropSemaphoreHandle, std::optional SignalValue, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(SemaphoreSignal, std::move(CGData), std::move(loc)), + : CG(CGType::SemaphoreSignal, std::move(CGData), std::move(loc)), MInteropSemaphoreHandle(InteropSemaphoreHandle), MSignalValue(SignalValue) {} @@ -586,10 +664,29 @@ class CGExecCommandBuffer : public CG { const std::shared_ptr< sycl::ext::oneapi::experimental::detail::exec_graph_impl> &ExecGraph, CG::StorageInitHelper CGData) - : CG(CGTYPE::ExecCommandBuffer, std::move(CGData)), + : CG(CGType::ExecCommandBuffer, std::move(CGData)), MCommandBuffer(CommandBuffer), MExecGraph(ExecGraph) {} }; +class CGHostTask : public CG { +public: + std::shared_ptr MHostTask; + // queue for host-interop task + std::shared_ptr MQueue; + // context for host-interop task + std::shared_ptr MContext; + std::vector MArgs; + + CGHostTask(std::shared_ptr HostTask, + std::shared_ptr Queue, + std::shared_ptr Context, + std::vector Args, CG::StorageInitHelper CGData, + CGType Type, detail::code_location loc = {}) + : CG(Type, std::move(CGData), std::move(loc)), + MHostTask(std::move(HostTask)), MQueue(Queue), MContext(Context), + MArgs(std::move(Args)) {} +}; + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/error_handling/error_handling.hpp b/sycl/source/detail/error_handling/error_handling.hpp index 49bad6f2a5e3..879a26905088 100644 --- a/sycl/source/detail/error_handling/error_handling.hpp +++ b/sycl/source/detail/error_handling/error_handling.hpp @@ -8,8 +8,8 @@ #pragma once +#include #include -#include #include namespace sycl { diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 127e0d618925..7b6907d2298d 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -98,7 +98,7 @@ void sortTopological(std::shared_ptr NodeImpl, /// @param PartitionNum Number to propagate. void propagatePartitionUp(std::shared_ptr Node, int PartitionNum) { if (((Node->MPartitionNum != -1) && (Node->MPartitionNum <= PartitionNum)) || - (Node->MCGType == sycl::detail::CG::CGTYPE::CodeplayHostTask)) { + (Node->MCGType == sycl::detail::CGType::CodeplayHostTask)) { return; } Node->MPartitionNum = PartitionNum; @@ -117,7 +117,7 @@ void propagatePartitionUp(std::shared_ptr Node, int PartitionNum) { void propagatePartitionDown( std::shared_ptr Node, int PartitionNum, std::list> &HostTaskList) { - if (Node->MCGType == sycl::detail::CG::CGTYPE::CodeplayHostTask) { + if (Node->MCGType == sycl::detail::CGType::CodeplayHostTask) { if (Node->MPartitionNum != -1) { HostTaskList.push_front(Node); } @@ -185,7 +185,7 @@ void exec_graph_impl::makePartitions() { std::list> HostTaskList; // find all the host-tasks in the graph for (auto &Node : MNodeStorage) { - if (Node->MCGType == sycl::detail::CG::CodeplayHostTask) { + if (Node->MCGType == sycl::detail::CGType::CodeplayHostTask) { HostTaskList.push_back(Node); } } @@ -374,7 +374,7 @@ graph_impl::add(const std::shared_ptr &Impl, sycl::handler Handler{Impl}; CGF(Handler); - if (Handler.MCGType == sycl::detail::CG::Barrier) { + if (Handler.getType() == sycl::detail::CGType::Barrier) { throw sycl::exception( make_error_code(errc::invalid), "The sycl_ext_oneapi_enqueue_barrier feature is not available with " @@ -384,20 +384,21 @@ graph_impl::add(const std::shared_ptr &Impl, Handler.finalize(); node_type NodeType = - Handler.MImpl->MUserFacingNodeType != + Handler.impl->MUserFacingNodeType != ext::oneapi::experimental::node_type::empty - ? Handler.MImpl->MUserFacingNodeType + ? Handler.impl->MUserFacingNodeType : ext::oneapi::experimental::detail::getNodeTypeFromCG( - Handler.MCGType); + Handler.getType()); - auto NodeImpl = this->add(NodeType, std::move(Handler.MGraphNodeCG), Dep); - NodeImpl->MNDRangeUsed = Handler.MImpl->MNDRangeUsed; + auto NodeImpl = + this->add(NodeType, std::move(Handler.impl->MGraphNodeCG), Dep); + NodeImpl->MNDRangeUsed = Handler.impl->MNDRangeUsed; // Add an event associated with this explicit node for mixed usage addEventForNode(Impl, std::make_shared(), NodeImpl); // Retrieve any dynamic parameters which have been registered in the CGF and // register the actual nodes with them. - auto &DynamicParams = Handler.MImpl->MDynamicParameters; + auto &DynamicParams = Handler.impl->MDynamicParameters; if (NodeType != node_type::kernel && DynamicParams.size() > 0) { throw sycl::exception(sycl::make_error_code(errc::invalid), @@ -721,10 +722,10 @@ void exec_graph_impl::createCommandBuffers( if (Node->isEmpty()) continue; - sycl::detail::CG::CGTYPE type = Node->MCGType; + sycl::detail::CGType type = Node->MCGType; // If the node is a kernel with no special requirements we can enqueue it // directly. - if (type == sycl::detail::CG::Kernel && + if (type == sycl::detail::CGType::Kernel && Node->MCommandGroup->getRequirements().size() + static_cast( Node->MCommandGroup.get()) @@ -936,7 +937,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, NewEvent->setEventFromSubmittedExecCommandBuffer(true); } else if ((CurrentPartition->MSchedule.size() > 0) && (CurrentPartition->MSchedule.front()->MCGType == - sycl::detail::CG::CGTYPE::CodeplayHostTask)) { + sycl::detail::CGType::CodeplayHostTask)) { auto NodeImpl = CurrentPartition->MSchedule.front(); // Schedule host task NodeImpl->MCommandGroup->getEvents().insert( @@ -957,7 +958,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, // If the node has no requirements for accessors etc. then we skip the // scheduler and enqueue directly. - if (NodeImpl->MCGType == sycl::detail::CG::Kernel && + if (NodeImpl->MCGType == sycl::detail::CGType::Kernel && NodeImpl->MCommandGroup->getRequirements().size() + static_cast( NodeImpl->MCommandGroup.get()) @@ -1198,7 +1199,7 @@ void exec_graph_impl::update(std::shared_ptr GraphImpl) { "of nodes being updated must have the same type"); } - if (MNodeStorage[i]->MCGType == sycl::detail::CG::Kernel) { + if (MNodeStorage[i]->MCGType == sycl::detail::CGType::Kernel) { sycl::detail::CGExecKernel *TargetCGExec = static_cast( MNodeStorage[i]->MCommandGroup.get()); @@ -1259,8 +1260,8 @@ void exec_graph_impl::update( "Node passed to update() is not part of the graph."); } - if (!(Node->isEmpty() || Node->MCGType == sycl::detail::CG::Kernel || - Node->MCGType == sycl::detail::CG::Barrier)) { + if (!(Node->isEmpty() || Node->MCGType == sycl::detail::CGType::Kernel || + Node->MCGType == sycl::detail::CGType::Barrier)) { throw sycl::exception(errc::invalid, "Unsupported node type for update. Only kernel, " "barrier and empty nodes are supported."); @@ -1318,7 +1319,7 @@ void exec_graph_impl::update( void exec_graph_impl::updateImpl(std::shared_ptr Node) { // Kernel node update is the only command type supported in UR for update. // Updating any other types of nodes, e.g. empty & barrier nodes is a no-op. - if (Node->MCGType != sycl::detail::CG::Kernel) { + if (Node->MCGType != sycl::detail::CGType::Kernel) { return; } auto ContextImpl = sycl::detail::getSyclObjImpl(MContext); diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index fe8fc14842d6..ac02da934340 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -13,10 +13,10 @@ #include #include -#include - #include +#include #include +#include #include #include @@ -41,34 +41,34 @@ namespace oneapi { namespace experimental { namespace detail { -inline node_type getNodeTypeFromCG(sycl::detail::CG::CGTYPE CGType) { +inline node_type getNodeTypeFromCG(sycl::detail::CGType CGType) { using sycl::detail::CG; switch (CGType) { - case CG::None: + case sycl::detail::CGType::None: return node_type::empty; - case CG::Kernel: + case sycl::detail::CGType::Kernel: return node_type::kernel; - case CG::CopyAccToPtr: - case CG::CopyPtrToAcc: - case CG::CopyAccToAcc: - case CG::CopyUSM: + case sycl::detail::CGType::CopyAccToPtr: + case sycl::detail::CGType::CopyPtrToAcc: + case sycl::detail::CGType::CopyAccToAcc: + case sycl::detail::CGType::CopyUSM: return node_type::memcpy; - case CG::Memset2DUSM: + case sycl::detail::CGType::Memset2DUSM: return node_type::memset; - case CG::Fill: - case CG::FillUSM: + case sycl::detail::CGType::Fill: + case sycl::detail::CGType::FillUSM: return node_type::memfill; - case CG::PrefetchUSM: + case sycl::detail::CGType::PrefetchUSM: return node_type::prefetch; - case CG::AdviseUSM: + case sycl::detail::CGType::AdviseUSM: return node_type::memadvise; - case CG::Barrier: - case CG::BarrierWaitlist: + case sycl::detail::CGType::Barrier: + case sycl::detail::CGType::BarrierWaitlist: return node_type::ext_oneapi_barrier; - case CG::CodeplayHostTask: + case sycl::detail::CGType::CodeplayHostTask: return node_type::host_task; - case CG::ExecCommandBuffer: + case sycl::detail::CGType::ExecCommandBuffer: return node_type::subgraph; default: assert(false && "Invalid Graph Node Type"); @@ -90,7 +90,7 @@ class node_impl { /// Using weak_ptr here to prevent circular references between nodes. std::vector> MPredecessors; /// Type of the command-group for the node. - sycl::detail::CG::CGTYPE MCGType = sycl::detail::CG::None; + sycl::detail::CGType MCGType = sycl::detail::CGType::None; /// User facing type of the node. node_type MNodeType = node_type::empty; /// Command group object which stores all args etc needed to enqueue the node @@ -221,50 +221,50 @@ class node_impl { /// workload but only dependencies /// @return True if this is an empty node, false otherwise. bool isEmpty() const { - return ((MCGType == sycl::detail::CG::None) || - (MCGType == sycl::detail::CG::Barrier)); + return ((MCGType == sycl::detail::CGType::None) || + (MCGType == sycl::detail::CGType::Barrier)); } /// Get a deep copy of this node's command group /// @return A unique ptr to the new command group object. std::unique_ptr getCGCopy() const { switch (MCGType) { - case sycl::detail::CG::Kernel: { + case sycl::detail::CGType::Kernel: { auto CGCopy = createCGCopy(); rebuildArgStorage(CGCopy->MArgs, MCommandGroup->getArgsStorage(), CGCopy->getArgsStorage()); return std::move(CGCopy); } - case sycl::detail::CG::CopyAccToPtr: - case sycl::detail::CG::CopyPtrToAcc: - case sycl::detail::CG::CopyAccToAcc: + case sycl::detail::CGType::CopyAccToPtr: + case sycl::detail::CGType::CopyPtrToAcc: + case sycl::detail::CGType::CopyAccToAcc: return createCGCopy(); - case sycl::detail::CG::Fill: + case sycl::detail::CGType::Fill: return createCGCopy(); - case sycl::detail::CG::UpdateHost: + case sycl::detail::CGType::UpdateHost: return createCGCopy(); - case sycl::detail::CG::CopyUSM: + case sycl::detail::CGType::CopyUSM: return createCGCopy(); - case sycl::detail::CG::FillUSM: + case sycl::detail::CGType::FillUSM: return createCGCopy(); - case sycl::detail::CG::PrefetchUSM: + case sycl::detail::CGType::PrefetchUSM: return createCGCopy(); - case sycl::detail::CG::AdviseUSM: + case sycl::detail::CGType::AdviseUSM: return createCGCopy(); - case sycl::detail::CG::Copy2DUSM: + case sycl::detail::CGType::Copy2DUSM: return createCGCopy(); - case sycl::detail::CG::Fill2DUSM: + case sycl::detail::CGType::Fill2DUSM: return createCGCopy(); - case sycl::detail::CG::Memset2DUSM: + case sycl::detail::CGType::Memset2DUSM: return createCGCopy(); - case sycl::detail::CG::CodeplayHostTask: { + case sycl::detail::CGType::CodeplayHostTask: { // The unique_ptr to the `sycl::detail::HostTask` in the HostTask CG // prevents from copying the CG. // We overcome this restriction by creating a new CG with the same data. auto CommandGroupPtr = static_cast(MCommandGroup.get()); sycl::detail::HostTask HostTask = *CommandGroupPtr->MHostTask.get(); - auto HostTaskUPtr = std::make_unique(HostTask); + auto HostTaskSPtr = std::make_shared(HostTask); sycl::detail::CG::StorageInitHelper Data( CommandGroupPtr->getArgsStorage(), CommandGroupPtr->getAccStorage(), @@ -283,32 +283,32 @@ class node_impl { return std::make_unique( sycl::detail::CGHostTask( - std::move(HostTaskUPtr), CommandGroupPtr->MQueue, + std::move(HostTaskSPtr), CommandGroupPtr->MQueue, CommandGroupPtr->MContext, std::move(NewArgs), std::move(Data), CommandGroupPtr->getType(), Loc)); } - case sycl::detail::CG::Barrier: - case sycl::detail::CG::BarrierWaitlist: + case sycl::detail::CGType::Barrier: + case sycl::detail::CGType::BarrierWaitlist: // Barrier nodes are stored in the graph with only the base CG class, // since they are treated internally as empty nodes. return createCGCopy(); - case sycl::detail::CG::CopyToDeviceGlobal: + case sycl::detail::CGType::CopyToDeviceGlobal: return createCGCopy(); - case sycl::detail::CG::CopyFromDeviceGlobal: + case sycl::detail::CGType::CopyFromDeviceGlobal: return createCGCopy(); - case sycl::detail::CG::ReadWriteHostPipe: + case sycl::detail::CGType::ReadWriteHostPipe: return createCGCopy(); - case sycl::detail::CG::CopyImage: + case sycl::detail::CGType::CopyImage: return createCGCopy(); - case sycl::detail::CG::SemaphoreSignal: + case sycl::detail::CGType::SemaphoreSignal: return createCGCopy(); - case sycl::detail::CG::SemaphoreWait: + case sycl::detail::CGType::SemaphoreWait: return createCGCopy(); - case sycl::detail::CG::ProfilingTag: + case sycl::detail::CGType::ProfilingTag: return createCGCopy(); - case sycl::detail::CG::ExecCommandBuffer: + case sycl::detail::CGType::ExecCommandBuffer: return createCGCopy(); - case sycl::detail::CG::None: + case sycl::detail::CGType::None: return nullptr; } return nullptr; @@ -332,14 +332,14 @@ class node_impl { return false; switch (MCGType) { - case sycl::detail::CG::CGTYPE::Kernel: { + case sycl::detail::CGType::Kernel: { sycl::detail::CGExecKernel *ExecKernelA = static_cast(MCommandGroup.get()); sycl::detail::CGExecKernel *ExecKernelB = static_cast(Node->MCommandGroup.get()); return ExecKernelA->MKernelName.compare(ExecKernelB->MKernelName) == 0; } - case sycl::detail::CG::CGTYPE::CopyUSM: { + case sycl::detail::CGType::CopyUSM: { sycl::detail::CGCopyUSM *CopyA = static_cast(MCommandGroup.get()); sycl::detail::CGCopyUSM *CopyB = @@ -348,9 +348,9 @@ class node_impl { (CopyA->getDst() == CopyB->getDst()) && (CopyA->getLength() == CopyB->getLength()); } - case sycl::detail::CG::CGTYPE::CopyAccToAcc: - case sycl::detail::CG::CGTYPE::CopyAccToPtr: - case sycl::detail::CG::CGTYPE::CopyPtrToAcc: { + case sycl::detail::CGType::CopyAccToAcc: + case sycl::detail::CGType::CopyAccToPtr: + case sycl::detail::CGType::CopyPtrToAcc: { sycl::detail::CGCopy *CopyA = static_cast(MCommandGroup.get()); sycl::detail::CGCopy *CopyB = @@ -394,9 +394,9 @@ class node_impl { /// Test if the node contains a N-D copy /// @return true if the op is a N-D copy bool isNDCopyNode() const { - if ((MCGType != sycl::detail::CG::CGTYPE::CopyAccToAcc) && - (MCGType != sycl::detail::CG::CGTYPE::CopyAccToPtr) && - (MCGType != sycl::detail::CG::CGTYPE::CopyPtrToAcc)) { + if ((MCGType != sycl::detail::CGType::CopyAccToAcc) && + (MCGType != sycl::detail::CGType::CopyAccToPtr) && + (MCGType != sycl::detail::CGType::CopyPtrToAcc)) { return false; } @@ -477,7 +477,7 @@ class node_impl { template void updateNDRange(nd_range ExecutionRange) { - if (MCGType != sycl::detail::CG::Kernel) { + if (MCGType != sycl::detail::CGType::Kernel) { throw sycl::exception( sycl::errc::invalid, "Cannot update execution range of nodes which are not kernel nodes"); @@ -499,11 +499,11 @@ class node_impl { "the node was originall created with."); } - NDRDesc.set(ExecutionRange); + NDRDesc = sycl::detail::NDRDescT{ExecutionRange}; } template void updateRange(range ExecutionRange) { - if (MCGType != sycl::detail::CG::Kernel) { + if (MCGType != sycl::detail::CGType::Kernel) { throw sycl::exception( sycl::errc::invalid, "Cannot update execution range of nodes which are not kernel nodes"); @@ -525,7 +525,7 @@ class node_impl { "the node was originall created with."); } - NDRDesc.set(ExecutionRange); + NDRDesc = sycl::detail::NDRDescT{ExecutionRange}; } void updateFromOtherNode(const std::shared_ptr &Other) { @@ -595,10 +595,10 @@ class node_impl { Stream << "TYPE = "; switch (MCGType) { - case sycl::detail::CG::CGTYPE::None: + case sycl::detail::CGType::None: Stream << "None \\n"; break; - case sycl::detail::CG::CGTYPE::Kernel: { + case sycl::detail::CGType::Kernel: { Stream << "CGExecKernel \\n"; sycl::detail::CGExecKernel *Kernel = static_cast(MCommandGroup.get()); @@ -645,7 +645,7 @@ class node_impl { } break; } - case sycl::detail::CG::CGTYPE::CopyAccToPtr: + case sycl::detail::CGType::CopyAccToPtr: Stream << "CGCopy Device-to-Host \\n"; if (Verbose) { sycl::detail::CGCopy *Copy = @@ -654,7 +654,7 @@ class node_impl { << "\\n"; } break; - case sycl::detail::CG::CGTYPE::CopyPtrToAcc: + case sycl::detail::CGType::CopyPtrToAcc: Stream << "CGCopy Host-to-Device \\n"; if (Verbose) { sycl::detail::CGCopy *Copy = @@ -663,7 +663,7 @@ class node_impl { << "\\n"; } break; - case sycl::detail::CG::CGTYPE::CopyAccToAcc: + case sycl::detail::CGType::CopyAccToAcc: Stream << "CGCopy Device-to-Device \\n"; if (Verbose) { sycl::detail::CGCopy *Copy = @@ -672,7 +672,7 @@ class node_impl { << "\\n"; } break; - case sycl::detail::CG::CGTYPE::Fill: + case sycl::detail::CGType::Fill: Stream << "CGFill \\n"; if (Verbose) { sycl::detail::CGFill *Fill = @@ -680,7 +680,7 @@ class node_impl { Stream << "Ptr: " << Fill->MPtr << "\\n"; } break; - case sycl::detail::CG::CGTYPE::UpdateHost: + case sycl::detail::CGType::UpdateHost: Stream << "CGCUpdateHost \\n"; if (Verbose) { sycl::detail::CGUpdateHost *Host = @@ -688,7 +688,7 @@ class node_impl { Stream << "Ptr: " << Host->getReqToUpdate() << "\\n"; } break; - case sycl::detail::CG::CGTYPE::CopyUSM: + case sycl::detail::CGType::CopyUSM: Stream << "CGCopyUSM \\n"; if (Verbose) { sycl::detail::CGCopyUSM *CopyUSM = @@ -697,7 +697,7 @@ class node_impl { << " Length: " << CopyUSM->getLength() << "\\n"; } break; - case sycl::detail::CG::CGTYPE::FillUSM: + case sycl::detail::CGType::FillUSM: Stream << "CGFillUSM \\n"; if (Verbose) { sycl::detail::CGFillUSM *FillUSM = @@ -709,7 +709,7 @@ class node_impl { Stream << "\\n"; } break; - case sycl::detail::CG::CGTYPE::PrefetchUSM: + case sycl::detail::CGType::PrefetchUSM: Stream << "CGPrefetchUSM \\n"; if (Verbose) { sycl::detail::CGPrefetchUSM *Prefetch = @@ -718,7 +718,7 @@ class node_impl { << " Length: " << Prefetch->getLength() << "\\n"; } break; - case sycl::detail::CG::CGTYPE::AdviseUSM: + case sycl::detail::CGType::AdviseUSM: Stream << "CGAdviseUSM \\n"; if (Verbose) { sycl::detail::CGAdviseUSM *AdviseUSM = @@ -727,13 +727,13 @@ class node_impl { << " Length: " << AdviseUSM->getLength() << "\\n"; } break; - case sycl::detail::CG::CGTYPE::CodeplayHostTask: + case sycl::detail::CGType::CodeplayHostTask: Stream << "CGHostTask \\n"; break; - case sycl::detail::CG::CGTYPE::Barrier: + case sycl::detail::CGType::Barrier: Stream << "CGBarrier \\n"; break; - case sycl::detail::CG::CGTYPE::Copy2DUSM: + case sycl::detail::CGType::Copy2DUSM: Stream << "CGCopy2DUSM \\n"; if (Verbose) { sycl::detail::CGCopy2DUSM *Copy2DUSM = @@ -742,7 +742,7 @@ class node_impl { << " Dst: " << Copy2DUSM->getDst() << "\\n"; } break; - case sycl::detail::CG::CGTYPE::Fill2DUSM: + case sycl::detail::CGType::Fill2DUSM: Stream << "CGFill2DUSM \\n"; if (Verbose) { sycl::detail::CGFill2DUSM *Fill2DUSM = @@ -750,7 +750,7 @@ class node_impl { Stream << "Dst: " << Fill2DUSM->getDst() << "\\n"; } break; - case sycl::detail::CG::CGTYPE::Memset2DUSM: + case sycl::detail::CGType::Memset2DUSM: Stream << "CGMemset2DUSM \\n"; if (Verbose) { sycl::detail::CGMemset2DUSM *Memset2DUSM = @@ -758,10 +758,10 @@ class node_impl { Stream << "Dst: " << Memset2DUSM->getDst() << "\\n"; } break; - case sycl::detail::CG::CGTYPE::ReadWriteHostPipe: + case sycl::detail::CGType::ReadWriteHostPipe: Stream << "CGReadWriteHostPipe \\n"; break; - case sycl::detail::CG::CGTYPE::CopyToDeviceGlobal: + case sycl::detail::CGType::CopyToDeviceGlobal: Stream << "CGCopyToDeviceGlobal \\n"; if (Verbose) { sycl::detail::CGCopyToDeviceGlobal *CopyToDeviceGlobal = @@ -771,7 +771,7 @@ class node_impl { << " Dst: " << CopyToDeviceGlobal->getDeviceGlobalPtr() << "\\n"; } break; - case sycl::detail::CG::CGTYPE::CopyFromDeviceGlobal: + case sycl::detail::CGType::CopyFromDeviceGlobal: Stream << "CGCopyFromDeviceGlobal \\n"; if (Verbose) { sycl::detail::CGCopyFromDeviceGlobal *CopyFromDeviceGlobal = @@ -781,7 +781,7 @@ class node_impl { << " Dst: " << CopyFromDeviceGlobal->getDest() << "\\n"; } break; - case sycl::detail::CG::CGTYPE::ExecCommandBuffer: + case sycl::detail::CGType::ExecCommandBuffer: Stream << "CGExecCommandBuffer \\n"; break; default: @@ -822,7 +822,7 @@ class partition { /// @return True if the partition contains a host task bool isHostTask() const { return (MRoots.size() && ((*MRoots.begin()).lock()->MCGType == - sycl::detail::CG::CGTYPE::CodeplayHostTask)); + sycl::detail::CGType::CodeplayHostTask)); } /// Checks if the graph is single path, i.e. each node has a single successor. diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index daea9816deac..b3cf5f1bebed 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -10,6 +10,7 @@ #include "sycl/handler.hpp" #include +#include #include #include @@ -37,6 +38,10 @@ class handler_impl { MSubmissionSecondaryQueue(std::move(SubmissionSecondaryQueue)), MEventNeeded(EventNeeded) {}; + handler_impl( + std::shared_ptr Graph) + : MGraph{Graph} {} + handler_impl() = default; void setStateExplicitKernelBundle() { @@ -150,6 +155,45 @@ class handler_impl { // Track whether an NDRange was used when submitting a kernel (as opposed to a // range), needed for graph update bool MNDRangeUsed = false; + + /// The storage for the arguments passed. + /// We need to store a copy of values that are passed explicitly through + /// set_arg, require and so on, because we need them to be alive after + /// we exit the method they are passed in. + detail::CG::StorageInitHelper CGData; + + /// The list of arguments for the kernel. + std::vector MArgs; + + /// The list of associated accessors with this handler. + /// These accessors were created with this handler as argument or + /// have become required for this handler via require method. + std::vector MAssociatedAccesors; + + /// Struct that encodes global size, local size, ... + detail::NDRDescT MNDRDesc; + + /// Type of the command group, e.g. kernel, fill. Can also encode version. + /// Use getType and setType methods to access this variable unless + /// manipulations with version are required + detail::CGType MCGType = detail::CGType::None; + + /// The graph that is associated with this handler. + std::shared_ptr MGraph; + /// If we are submitting a graph using ext_oneapi_graph this will be the graph + /// to be executed. + std::shared_ptr + MExecGraph; + /// Storage for a node created from a subgraph submission. + std::shared_ptr MSubgraphNode; + /// Storage for the CG created when handling graph nodes added explicitly. + std::unique_ptr MGraphNodeCG; + + /// Storage for lambda/function when using HostTask + std::shared_ptr MHostTask; + /// The list of valid SYCL events that need to complete + /// before barrier command can be executed + std::vector MEventsWaitWithBarrier; }; } // namespace detail diff --git a/sycl/include/sycl/detail/host_task_impl.hpp b/sycl/source/detail/host_task.hpp similarity index 50% rename from sycl/include/sycl/detail/host_task_impl.hpp rename to sycl/source/detail/host_task.hpp index 42b49eab820c..48cb3ce7b854 100644 --- a/sycl/include/sycl/detail/host_task_impl.hpp +++ b/sycl/source/detail/host_task.hpp @@ -1,4 +1,4 @@ -//==---- host_task_impl.hpp ------------------------------------------------==// +//==---- host_task.hpp -----------------------------------------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -12,7 +12,7 @@ #pragma once -#include +#include #include #include #include @@ -51,43 +51,6 @@ class HostTask { friend class DispatchHostTask; }; -class CGHostTask : public CG { -public: - std::unique_ptr MHostTask; - // queue for host-interop task - std::shared_ptr MQueue; - // context for host-interop task - std::shared_ptr MContext; - std::vector MArgs; - - CGHostTask(std::unique_ptr HostTask, - std::shared_ptr Queue, - std::shared_ptr Context, - std::vector Args, CG::StorageInitHelper CGData, - CGTYPE Type, detail::code_location loc = {}) - : CG(Type, std::move(CGData), std::move(loc)), - MHostTask(std::move(HostTask)), MQueue(Queue), MContext(Context), - MArgs(std::move(Args)) {} -}; - } // namespace detail -template -std::enable_if_t< - detail::check_fn_signature, void()>::value || - detail::check_fn_signature, - void(interop_handle)>::value> -handler::host_task_impl(FuncT &&Func) { - throwIfActionIsCreated(); - - MNDRDesc.set(range<1>(1)); - // Need to copy these rather than move so that we can check associated - // accessors during finalize - MArgs = MAssociatedAccesors; - - MHostTask.reset(new detail::HostTask(std::move(Func))); - - setType(detail::CG::CodeplayHostTask); -} - } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index 51631d0d3e4f..179f11b97b49 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -970,7 +970,7 @@ jit_compiler::fuseKernels(QueueImplPtr Queue, FusedCG.reset(new detail::CGExecKernel( NDRDesc, nullptr, nullptr, std::move(KernelBundleImplPtr), std::move(CGData), std::move(FusedArgs), FusedOrCachedKernelName, {}, {}, - CG::CGTYPE::Kernel, KernelCacheConfig, false /* KernelIsCooperative */, + CGType::Kernel, KernelCacheConfig, false /* KernelIsCooperative */, false /* KernelUsesClusterLaunch*/)); return FusedCG; } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 3489dba53ffa..55d9fe3de353 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #pragma once +#include #include #include #include diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index ebfa91bc8a19..6bc27710e766 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -365,15 +365,15 @@ event queue_impl::submit_impl(const std::function &CGF, // Scheduler will later omit events, that are not required to execute tasks. // Host and interop tasks, however, are not submitted to low-level runtimes // and require separate dependency management. - const CG::CGTYPE Type = Handler.getType(); + const CGType Type = detail::getSyclObjImpl(Handler)->MCGType; event Event = detail::createSyclObjFromImpl( std::make_shared()); std::vector Streams; - if (Type == CG::Kernel) + if (Type == CGType::Kernel) Streams = std::move(Handler.MStreamStorage); if (PostProcess) { - bool IsKernel = Type == CG::Kernel; + bool IsKernel = Type == CGType::Kernel; bool KernelUsesAssert = false; if (IsKernel) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 699cfdce5e6b..b0c989c35fa5 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -817,7 +817,7 @@ class queue_impl { // Note that host_task events can never be discarded, so this will not // insert barriers between host_task enqueues. if (EventToBuildDeps->isDiscarded() && - Handler.getType() == CG::CodeplayHostTask) + getSyclObjImpl(Handler)->MCGType == CGType::CodeplayHostTask) EventToBuildDeps = insertHelperBarrier(Handler); if (!EventToBuildDeps->isDiscarded()) @@ -834,7 +834,7 @@ class queue_impl { EventRet = Handler.finalize(); EventToBuildDeps = getSyclObjImpl(EventRet); } else { - const CG::CGTYPE Type = Handler.getType(); + const CGType Type = getSyclObjImpl(Handler)->MCGType; std::lock_guard Lock{MMutex}; // The following code supports barrier synchronization if host task is // involved in the scenario. Native barriers cannot handle host task @@ -848,17 +848,17 @@ class queue_impl { MMissedCleanupRequests.clear(); } auto &Deps = MGraph.expired() ? MDefaultGraphDeps : MExtGraphDeps; - if (Type == CG::Barrier && !Deps.UnenqueuedCmdEvents.empty()) { + if (Type == CGType::Barrier && !Deps.UnenqueuedCmdEvents.empty()) { Handler.depends_on(Deps.UnenqueuedCmdEvents); } if (Deps.LastBarrier) Handler.depends_on(Deps.LastBarrier); EventRet = Handler.finalize(); EventImplPtr EventRetImpl = getSyclObjImpl(EventRet); - if (Type == CG::CodeplayHostTask) + if (Type == CGType::CodeplayHostTask) Deps.UnenqueuedCmdEvents.push_back(EventRetImpl); else if (!EventRetImpl->isEnqueued()) { - if (Type == CG::Barrier || Type == CG::BarrierWaitlist) { + if (Type == CGType::Barrier || Type == CGType::BarrierWaitlist) { Deps.LastBarrier = EventRetImpl; Deps.UnenqueuedCmdEvents.clear(); } else diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index e8ec2d9764af..a41231c188e3 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -294,7 +294,7 @@ std::vector Command::getPiEventsBlocking( bool Command::isHostTask() const { return (MType == CommandType::RUN_CG) /* host task has this type also */ && ((static_cast(this))->getCG().getType() == - CG::CGTYPE::CodeplayHostTask); + CGType::CodeplayHostTask); } bool Command::isFusable() const { @@ -302,7 +302,7 @@ bool Command::isFusable() const { return false; } const auto &CG = (static_cast(*this)).getCG(); - return (CG.getType() == CG::CGTYPE::Kernel) && + return (CG.getType() == CGType::Kernel) && (!static_cast(CG).MKernelIsCooperative) && (!static_cast(CG).MKernelUsesClusterLaunch); } @@ -382,7 +382,7 @@ class DispatchHostTask { MReqPiMem(std::move(ReqPiMem)) {} void operator()() const { - assert(MThisCmd->getCG().getType() == CG::CGTYPE::CodeplayHostTask); + assert(MThisCmd->getCG().getType() == CGType::CodeplayHostTask); CGHostTask &HostTask = static_cast(MThisCmd->getCG()); @@ -1603,13 +1603,13 @@ AllocaCommandBase *ExecCGCommand::getAllocaForReq(Requirement *Req) { std::vector> ExecCGCommand::getAuxiliaryResources() const { - if (MCommandGroup->getType() == CG::Kernel) + if (MCommandGroup->getType() == CGType::Kernel) return ((CGExecKernel *)MCommandGroup.get())->getAuxiliaryResources(); return {}; } void ExecCGCommand::clearAuxiliaryResources() { - if (MCommandGroup->getType() == CG::Kernel) + if (MCommandGroup->getType() == CGType::Kernel) ((CGExecKernel *)MCommandGroup.get())->clearAuxiliaryResources(); } @@ -1852,68 +1852,68 @@ void UpdateHostRequirementCommand::emitInstrumentationData() { #endif } -static std::string_view cgTypeToString(detail::CG::CGTYPE Type) { +static std::string_view cgTypeToString(detail::CGType Type) { switch (Type) { - case detail::CG::Kernel: + case detail::CGType::Kernel: return "Kernel"; break; - case detail::CG::UpdateHost: + case detail::CGType::UpdateHost: return "update_host"; break; - case detail::CG::Fill: + case detail::CGType::Fill: return "fill"; break; - case detail::CG::CopyAccToAcc: + case detail::CGType::CopyAccToAcc: return "copy acc to acc"; break; - case detail::CG::CopyAccToPtr: + case detail::CGType::CopyAccToPtr: return "copy acc to ptr"; break; - case detail::CG::CopyPtrToAcc: + case detail::CGType::CopyPtrToAcc: return "copy ptr to acc"; break; - case detail::CG::Barrier: + case detail::CGType::Barrier: return "barrier"; - case detail::CG::BarrierWaitlist: + case detail::CGType::BarrierWaitlist: return "barrier waitlist"; - case detail::CG::CopyUSM: + case detail::CGType::CopyUSM: return "copy usm"; break; - case detail::CG::FillUSM: + case detail::CGType::FillUSM: return "fill usm"; break; - case detail::CG::PrefetchUSM: + case detail::CGType::PrefetchUSM: return "prefetch usm"; break; - case detail::CG::CodeplayHostTask: + case detail::CGType::CodeplayHostTask: return "host task"; break; - case detail::CG::Copy2DUSM: + case detail::CGType::Copy2DUSM: return "copy 2d usm"; break; - case detail::CG::Fill2DUSM: + case detail::CGType::Fill2DUSM: return "fill 2d usm"; break; - case detail::CG::AdviseUSM: + case detail::CGType::AdviseUSM: return "advise usm"; - case detail::CG::Memset2DUSM: + case detail::CGType::Memset2DUSM: return "memset 2d usm"; break; - case detail::CG::CopyToDeviceGlobal: + case detail::CGType::CopyToDeviceGlobal: return "copy to device_global"; break; - case detail::CG::CopyFromDeviceGlobal: + case detail::CGType::CopyFromDeviceGlobal: return "copy from device_global"; break; - case detail::CG::ReadWriteHostPipe: + case detail::CGType::ReadWriteHostPipe: return "read_write host pipe"; - case detail::CG::ExecCommandBuffer: + case detail::CGType::ExecCommandBuffer: return "exec command buffer"; - case detail::CG::CopyImage: + case detail::CGType::CopyImage: return "copy image"; - case detail::CG::SemaphoreWait: + case detail::CGType::SemaphoreWait: return "semaphore wait"; - case detail::CG::SemaphoreSignal: + case detail::CGType::SemaphoreSignal: return "semaphore signal"; default: return "unknown"; @@ -1928,11 +1928,11 @@ ExecCGCommand::ExecCGCommand( : Command(CommandType::RUN_CG, std::move(Queue), CommandBuffer, Dependencies), MEventNeeded(EventNeeded), MCommandGroup(std::move(CommandGroup)) { - if (MCommandGroup->getType() == detail::CG::CodeplayHostTask) { + if (MCommandGroup->getType() == detail::CGType::CodeplayHostTask) { MEvent->setSubmittedQueue( static_cast(MCommandGroup.get())->MQueue); } - if (MCommandGroup->getType() == detail::CG::ProfilingTag) + if (MCommandGroup->getType() == detail::CGType::ProfilingTag) MEvent->markAsProfilingTagEvent(); emitInstrumentationDataProxy(); @@ -2138,7 +2138,7 @@ void ExecCGCommand::emitInstrumentationData() { std::string KernelName; std::optional FromSource; switch (MCommandGroup->getType()) { - case detail::CG::Kernel: { + case detail::CGType::Kernel: { auto KernelCG = reinterpret_cast(MCommandGroup.get()); KernelName = instrumentationGetKernelName( @@ -2160,7 +2160,7 @@ void ExecCGCommand::emitInstrumentationData() { xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); MTraceEvent = static_cast(CmdTraceEvent); - if (MCommandGroup->getType() == detail::CG::Kernel) { + if (MCommandGroup->getType() == detail::CGType::Kernel) { auto KernelCG = reinterpret_cast(MCommandGroup.get()); instrumentationAddExtraKernelMetadata( @@ -2184,7 +2184,7 @@ void ExecCGCommand::printDot(std::ostream &Stream) const { Stream << "EXEC CG ON " << queueDeviceToString(MQueue.get()) << "\\n"; switch (MCommandGroup->getType()) { - case detail::CG::Kernel: { + case detail::CGType::Kernel: { auto KernelCG = reinterpret_cast(MCommandGroup.get()); Stream << "Kernel name: "; @@ -2241,7 +2241,8 @@ static void adjustNDRangePerKernel(NDRDescT &NDR, if (WGSize[0] == 0) { WGSize = {1, 1, 1}; } - NDR.set(NDR.Dims, nd_range<3>(NDR.NumWorkGroups * WGSize, WGSize)); + NDR = sycl::detail::NDRDescT{nd_range<3>(NDR.NumWorkGroups * WGSize, WGSize), + static_cast(NDR.Dims)}; } // We have the following mapping between dimensions with SPIR-V builtins: @@ -2453,31 +2454,6 @@ static pi_result SetKernelParamsAndLaunch( return Error; } -// The function initialize accessors and calls lambda. -void DispatchNativeKernel(void *Blob) { - void **CastedBlob = (void **)Blob; - - std::vector *Reqs = - static_cast *>(CastedBlob[0]); - - std::shared_ptr *HostKernel = - static_cast *>(CastedBlob[1]); - - NDRDescT *NDRDesc = static_cast(CastedBlob[2]); - - // Other value are pointer to the buffers. - void **NextArg = CastedBlob + 3; - for (detail::Requirement *Req : *Reqs) - Req->MData = *(NextArg++); - - (*HostKernel)->call(*NDRDesc, nullptr); - - // The ownership of these objects have been passed to us, need to cleanup - delete Reqs; - delete HostKernel; - delete NDRDesc; -} - pi_int32 enqueueImpCommandBufferKernel( context Ctx, DeviceImplPtr DeviceImpl, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, @@ -2783,7 +2759,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { sycl::detail::pi::PiExtSyncPoint OutSyncPoint; sycl::detail::pi::PiExtCommandBufferCommand OutCommand = nullptr; switch (MCommandGroup->getType()) { - case CG::CGTYPE::Kernel: { + case CGType::Kernel: { CGExecKernel *ExecKernel = (CGExecKernel *)MCommandGroup.get(); auto getMemAllocationFunc = [this](Requirement *Req) { @@ -2809,7 +2785,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MEvent->setCommandBufferCommand(OutCommand); return result; } - case CG::CGTYPE::CopyUSM: { + case CGType::CopyUSM: { CGCopyUSM *Copy = (CGCopyUSM *)MCommandGroup.get(); MemoryManager::ext_oneapi_copy_usm_cmd_buffer( MQueue->getContextImplPtr(), Copy->getSrc(), MCommandBuffer, @@ -2817,7 +2793,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; } - case CG::CGTYPE::CopyAccToAcc: { + case CGType::CopyAccToAcc: { CGCopy *Copy = (CGCopy *)MCommandGroup.get(); Requirement *ReqSrc = (Requirement *)(Copy->getSrc()); Requirement *ReqDst = (Requirement *)(Copy->getDst()); @@ -2836,7 +2812,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; } - case CG::CGTYPE::CopyAccToPtr: { + case CGType::CopyAccToPtr: { CGCopy *Copy = (CGCopy *)MCommandGroup.get(); Requirement *Req = (Requirement *)Copy->getSrc(); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); @@ -2851,7 +2827,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; } - case CG::CGTYPE::CopyPtrToAcc: { + case CGType::CopyPtrToAcc: { CGCopy *Copy = (CGCopy *)MCommandGroup.get(); Requirement *Req = (Requirement *)(Copy->getDst()); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); @@ -2865,7 +2841,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; } - case CG::CGTYPE::Fill: { + case CGType::Fill: { CGFill *Fill = (CGFill *)MCommandGroup.get(); Requirement *Req = (Requirement *)(Fill->getReqToFill()); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); @@ -2878,7 +2854,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; } - case CG::CGTYPE::FillUSM: { + case CGType::FillUSM: { CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get(); MemoryManager::ext_oneapi_fill_usm_cmd_buffer( MQueue->getContextImplPtr(), MCommandBuffer, Fill->getDst(), @@ -2887,7 +2863,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; } - case CG::CGTYPE::PrefetchUSM: { + case CGType::PrefetchUSM: { CGPrefetchUSM *Prefetch = (CGPrefetchUSM *)MCommandGroup.get(); MemoryManager::ext_oneapi_prefetch_usm_cmd_buffer( MQueue->getContextImplPtr(), MCommandBuffer, Prefetch->getDst(), @@ -2895,7 +2871,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { MEvent->setSyncPoint(OutSyncPoint); return PI_SUCCESS; } - case CG::CGTYPE::AdviseUSM: { + case CGType::AdviseUSM: { CGAdviseUSM *Advise = (CGAdviseUSM *)MCommandGroup.get(); MemoryManager::ext_oneapi_advise_usm_cmd_buffer( MQueue->getContextImplPtr(), MCommandBuffer, Advise->getDst(), @@ -2920,7 +2896,7 @@ pi_int32 ExecCGCommand::enqueueImp() { } pi_int32 ExecCGCommand::enqueueImpQueue() { - if (getCG().getType() != CG::CGTYPE::CodeplayHostTask) + if (getCG().getType() != CGType::CodeplayHostTask) waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; auto RawEvents = getPiEvents(EventImpls); @@ -2939,12 +2915,12 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { switch (MCommandGroup->getType()) { - case CG::CGTYPE::UpdateHost: { + case CGType::UpdateHost: { throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), "Update host should be handled by the Scheduler. " + codeToString(PI_ERROR_INVALID_VALUE)); } - case CG::CGTYPE::CopyAccToPtr: { + case CGType::CopyAccToPtr: { CGCopy *Copy = (CGCopy *)MCommandGroup.get(); Requirement *Req = (Requirement *)Copy->getSrc(); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); @@ -2958,7 +2934,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::CopyPtrToAcc: { + case CGType::CopyPtrToAcc: { CGCopy *Copy = (CGCopy *)MCommandGroup.get(); Requirement *Req = (Requirement *)(Copy->getDst()); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); @@ -2972,7 +2948,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::CopyAccToAcc: { + case CGType::CopyAccToAcc: { CGCopy *Copy = (CGCopy *)MCommandGroup.get(); Requirement *ReqSrc = (Requirement *)(Copy->getSrc()); Requirement *ReqDst = (Requirement *)(Copy->getDst()); @@ -2990,7 +2966,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::Fill: { + case CGType::Fill: { CGFill *Fill = (CGFill *)MCommandGroup.get(); Requirement *Req = (Requirement *)(Fill->getReqToFill()); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); @@ -3003,7 +2979,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::Kernel: { + case CGType::Kernel: { assert(MQueue && "Kernel submissions should have an associated queue"); CGExecKernel *ExecKernel = (CGExecKernel *)MCommandGroup.get(); @@ -3039,7 +3015,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::CopyUSM: { + case CGType::CopyUSM: { CGCopyUSM *Copy = (CGCopyUSM *)MCommandGroup.get(); MemoryManager::copy_usm(Copy->getSrc(), MQueue, Copy->getLength(), Copy->getDst(), std::move(RawEvents), Event, @@ -3047,7 +3023,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::FillUSM: { + case CGType::FillUSM: { CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get(); MemoryManager::fill_usm(Fill->getDst(), MQueue, Fill->getLength(), Fill->getPattern(), std::move(RawEvents), Event, @@ -3055,7 +3031,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::PrefetchUSM: { + case CGType::PrefetchUSM: { CGPrefetchUSM *Prefetch = (CGPrefetchUSM *)MCommandGroup.get(); MemoryManager::prefetch_usm(Prefetch->getDst(), MQueue, Prefetch->getLength(), std::move(RawEvents), @@ -3063,7 +3039,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::AdviseUSM: { + case CGType::AdviseUSM: { CGAdviseUSM *Advise = (CGAdviseUSM *)MCommandGroup.get(); MemoryManager::advise_usm(Advise->getDst(), MQueue, Advise->getLength(), Advise->getAdvice(), std::move(RawEvents), Event, @@ -3071,7 +3047,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::Copy2DUSM: { + case CGType::Copy2DUSM: { CGCopy2DUSM *Copy = (CGCopy2DUSM *)MCommandGroup.get(); MemoryManager::copy_2d_usm(Copy->getSrc(), Copy->getSrcPitch(), MQueue, Copy->getDst(), Copy->getDstPitch(), @@ -3079,7 +3055,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { std::move(RawEvents), Event, MEvent); return PI_SUCCESS; } - case CG::CGTYPE::Fill2DUSM: { + case CGType::Fill2DUSM: { CGFill2DUSM *Fill = (CGFill2DUSM *)MCommandGroup.get(); MemoryManager::fill_2d_usm(Fill->getDst(), MQueue, Fill->getPitch(), Fill->getWidth(), Fill->getHeight(), @@ -3087,7 +3063,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { MEvent); return PI_SUCCESS; } - case CG::CGTYPE::Memset2DUSM: { + case CGType::Memset2DUSM: { CGMemset2DUSM *Memset = (CGMemset2DUSM *)MCommandGroup.get(); MemoryManager::memset_2d_usm(Memset->getDst(), MQueue, Memset->getPitch(), Memset->getWidth(), Memset->getHeight(), @@ -3095,7 +3071,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Event, MEvent); return PI_SUCCESS; } - case CG::CGTYPE::CodeplayHostTask: { + case CGType::CodeplayHostTask: { CGHostTask *HostTask = static_cast(MCommandGroup.get()); for (ArgDesc &Arg : HostTask->MArgs) { @@ -3161,7 +3137,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::Barrier: { + case CGType::Barrier: { assert(MQueue && "Barrier submission should have an associated queue"); const PluginPtr &Plugin = MQueue->getPlugin(); if (MEvent != nullptr) @@ -3171,7 +3147,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::BarrierWaitlist: { + case CGType::BarrierWaitlist: { assert(MQueue && "Barrier submission should have an associated queue"); CGBarrier *Barrier = static_cast(MCommandGroup.get()); std::vector Events = Barrier->MEventsWaitWithBarrier; @@ -3189,7 +3165,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::ProfilingTag: { + case CGType::ProfilingTag: { const PluginPtr &Plugin = MQueue->getPlugin(); // If the queue is not in-order, we need to insert a barrier. This barrier // does not need output events as it will implicitly enforce the following @@ -3205,7 +3181,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::CopyToDeviceGlobal: { + case CGType::CopyToDeviceGlobal: { CGCopyToDeviceGlobal *Copy = (CGCopyToDeviceGlobal *)MCommandGroup.get(); MemoryManager::copy_to_device_global( Copy->getDeviceGlobalPtr(), Copy->isDeviceImageScoped(), MQueue, @@ -3214,7 +3190,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return CL_SUCCESS; } - case CG::CGTYPE::CopyFromDeviceGlobal: { + case CGType::CopyFromDeviceGlobal: { CGCopyFromDeviceGlobal *Copy = (CGCopyFromDeviceGlobal *)MCommandGroup.get(); MemoryManager::copy_from_device_global( @@ -3224,7 +3200,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return CL_SUCCESS; } - case CG::CGTYPE::ReadWriteHostPipe: { + case CGType::ReadWriteHostPipe: { CGReadWriteHostPipe *ExecReadWriteHostPipe = (CGReadWriteHostPipe *)MCommandGroup.get(); std::string pipeName = ExecReadWriteHostPipe->getPipeName(); @@ -3239,7 +3215,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return enqueueReadWriteHostPipe(MQueue, pipeName, blocking, hostPtr, typeSize, RawEvents, EventImpl, read); } - case CG::CGTYPE::ExecCommandBuffer: { + case CGType::ExecCommandBuffer: { assert(MQueue && "Command buffer submissions should have an associated queue"); CGExecCommandBuffer *CmdBufferCG = @@ -3252,7 +3228,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], Event); } - case CG::CGTYPE::CopyImage: { + case CGType::CopyImage: { CGCopyImage *Copy = (CGCopyImage *)MCommandGroup.get(); sycl::detail::pi::PiMemImageDesc Desc = Copy->getDesc(); @@ -3264,7 +3240,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Event); return PI_SUCCESS; } - case CG::CGTYPE::SemaphoreWait: { + case CGType::SemaphoreWait: { assert(MQueue && "Semaphore wait submissions should have an associated queue"); CGSemaphoreWait *SemWait = (CGSemaphoreWait *)MCommandGroup.get(); @@ -3278,7 +3254,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::SemaphoreSignal: { + case CGType::SemaphoreSignal: { assert(MQueue && "Semaphore signal submissions should have an associated queue"); CGSemaphoreSignal *SemSignal = (CGSemaphoreSignal *)MCommandGroup.get(); @@ -3293,7 +3269,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { return PI_SUCCESS; } - case CG::CGTYPE::None: + case CGType::None: throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), "CG type not implemented. " + codeToString(PI_ERROR_INVALID_OPERATION)); @@ -3303,17 +3279,17 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { bool ExecCGCommand::producesPiEvent() const { return !MCommandBuffer && - MCommandGroup->getType() != CG::CGTYPE::CodeplayHostTask; + MCommandGroup->getType() != CGType::CodeplayHostTask; } bool ExecCGCommand::supportsPostEnqueueCleanup() const { // Host tasks are cleaned up upon completion instead. return Command::supportsPostEnqueueCleanup() && - (MCommandGroup->getType() != CG::CGTYPE::CodeplayHostTask); + (MCommandGroup->getType() != CGType::CodeplayHostTask); } bool ExecCGCommand::readyForCleanup() const { - if (MCommandGroup->getType() == CG::CGTYPE::CodeplayHostTask) + if (MCommandGroup->getType() == CGType::CodeplayHostTask) return MLeafCounter == 0 && MEvent->isCompleted(); return Command::readyForCleanup(); } diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 924ee7dff412..8944444c44ed 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -18,10 +18,10 @@ #include #include +#include #include #include #include -#include namespace sycl { inline namespace _V1 { diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 6ac1f8b34828..863752506cd0 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -906,7 +906,7 @@ EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd( } static bool isInteropHostTask(ExecCGCommand *Cmd) { - if (Cmd->getCG().getType() != CG::CGTYPE::CodeplayHostTask) + if (Cmd->getCG().getType() != CGType::CodeplayHostTask) return false; const detail::CGHostTask &HT = @@ -1017,7 +1017,7 @@ Scheduler::GraphBuildResult Scheduler::GraphBuilder::addCG( } else { std::string s; std::stringstream ss(s); - if (NewCmd->getCG().getType() == CG::CGTYPE::Kernel) { + if (NewCmd->getCG().getType() == CGType::Kernel) { ss << "Not fusing kernel with 'use_root_sync' property. Can only fuse " "non-cooperative device kernels."; } else { @@ -1333,7 +1333,7 @@ Command *Scheduler::GraphBuilder::connectDepEvent( ExecCGCommand *ConnectCmd = nullptr; try { - std::unique_ptr HT(new detail::HostTask); + std::shared_ptr HT(new detail::HostTask); std::unique_ptr ConnectCG(new detail::CGHostTask( std::move(HT), /* Queue = */ Cmd->getQueue(), /* Context = */ {}, /* Args = */ {}, @@ -1341,7 +1341,7 @@ Command *Scheduler::GraphBuilder::connectDepEvent( /* ArgsStorage = */ {}, /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, /* Requirements = */ {}, /* DepEvents = */ {DepEvent}), - CG::CodeplayHostTask, + CGType::CodeplayHostTask, /* Payload */ {})); ConnectCmd = new ExecCGCommand(std::move(ConnectCG), nullptr, /*EventNeeded=*/true); diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 258ab3f6d6a5..5fd4b17f746d 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -97,7 +97,7 @@ EventImplPtr Scheduler::addCG( bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const std::vector &Dependencies) { EventImplPtr NewEvent = nullptr; - const CG::CGTYPE Type = CommandGroup->getType(); + const CGType Type = CommandGroup->getType(); std::vector AuxiliaryCmds; std::vector> AuxiliaryResources; AuxiliaryResources = CommandGroup->getAuxiliaryResources(); @@ -109,12 +109,12 @@ EventImplPtr Scheduler::addCG( Command *NewCmd = nullptr; switch (Type) { - case CG::UpdateHost: + case CGType::UpdateHost: NewCmd = MGraphBuilder.addCGUpdateHost(std::move(CommandGroup), AuxiliaryCmds); NewEvent = NewCmd->getEvent(); break; - case CG::CodeplayHostTask: { + case CGType::CodeplayHostTask: { auto Result = MGraphBuilder.addCG(std::move(CommandGroup), nullptr, AuxiliaryCmds, EventNeeded); NewCmd = Result.NewCmd; diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index e9a8f4d56628..0cdf4ae9ec25 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -8,10 +8,10 @@ #pragma once +#include #include #include #include -#include #include #include diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index cf67dcfde8d7..3beb84cc5764 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -88,31 +89,31 @@ handler::handler(std::shared_ptr Queue, std::shared_ptr PrimaryQueue, std::shared_ptr SecondaryQueue, bool CallerNeedsEvent) - : MImpl(std::make_shared(std::move(PrimaryQueue), + : impl(std::make_shared(std::move(PrimaryQueue), std::move(SecondaryQueue), CallerNeedsEvent)), MQueue(std::move(Queue)) {} handler::handler( std::shared_ptr Graph) - : MImpl(std::make_shared()), MGraph(Graph) {} + : impl(std::make_shared(Graph)) {} // Sets the submission state to indicate that an explicit kernel bundle has been // set. Throws a sycl::exception with errc::invalid if the current state // indicates that a specialization constant has been set. void handler::setStateExplicitKernelBundle() { - MImpl->setStateExplicitKernelBundle(); + impl->setStateExplicitKernelBundle(); } // Sets the submission state to indicate that a specialization constant has been // set. Throws a sycl::exception with errc::invalid if the current state // indicates that an explicit kernel bundle has been set. -void handler::setStateSpecConstSet() { MImpl->setStateSpecConstSet(); } +void handler::setStateSpecConstSet() { impl->setStateSpecConstSet(); } // Returns true if the submission state is EXPLICIT_KERNEL_BUNDLE_STATE and // false otherwise. bool handler::isStateExplicitKernelBundle() const { - return MImpl->isStateExplicitKernelBundle(); + return impl->isStateExplicitKernelBundle(); } // Returns a shared_ptr to the kernel_bundle. @@ -121,19 +122,19 @@ bool handler::isStateExplicitKernelBundle() const { // returns shared_ptr(nullptr) if Insert is false std::shared_ptr handler::getOrInsertHandlerKernelBundle(bool Insert) const { - if (!MImpl->MKernelBundle && Insert) { - auto Ctx = MGraph ? MGraph->getContext() : MQueue->get_context(); - auto Dev = MGraph ? MGraph->getDevice() : MQueue->get_device(); - MImpl->MKernelBundle = detail::getSyclObjImpl( + if (!impl->MKernelBundle && Insert) { + auto Ctx = impl->MGraph ? impl->MGraph->getContext() : MQueue->get_context(); + auto Dev = impl->MGraph ? impl->MGraph->getDevice() : MQueue->get_device(); + impl->MKernelBundle = detail::getSyclObjImpl( get_kernel_bundle(Ctx, {Dev}, {})); } - return MImpl->MKernelBundle; + return impl->MKernelBundle; } // Sets kernel bundle to the provided one. void handler::setHandlerKernelBundle( const std::shared_ptr &NewKernelBundleImpPtr) { - MImpl->MKernelBundle = NewKernelBundleImpPtr; + impl->MKernelBundle = NewKernelBundleImpPtr; } void handler::setHandlerKernelBundle(kernel Kernel) { @@ -156,23 +157,23 @@ event handler::finalize() { // to a command without being bound to a command group, an exception should // be thrown. { - for (const auto &arg : MArgs) { + for (const auto &arg : impl->MArgs) { if (arg.MType != detail::kernel_param_kind_t::kind_accessor) continue; detail::Requirement *AccImpl = static_cast(arg.MPtr); if (AccImpl->MIsPlaceH) { - auto It = std::find(CGData.MRequirements.begin(), - CGData.MRequirements.end(), AccImpl); - if (It == CGData.MRequirements.end()) + auto It = std::find(impl->CGData.MRequirements.begin(), + impl->CGData.MRequirements.end(), AccImpl); + if (It == impl->CGData.MRequirements.end()) throw sycl::exception(make_error_code(errc::kernel_argument), "placeholder accessor must be bound by calling " "handler::require() before it can be used."); // Check associated accessors bool AccFound = false; - for (detail::ArgDesc &Acc : MAssociatedAccesors) { + for (detail::ArgDesc &Acc : impl->MAssociatedAccesors) { if (Acc.MType == detail::kernel_param_kind_t::kind_accessor && static_cast(Acc.MPtr) == AccImpl) { AccFound = true; @@ -190,15 +191,15 @@ event handler::finalize() { } const auto &type = getType(); - if (type == detail::CG::Kernel) { + if (type == detail::CGType::Kernel) { // If there were uses of set_specialization_constant build the kernel_bundle std::shared_ptr KernelBundleImpPtr = getOrInsertHandlerKernelBundle(/*Insert=*/false); if (KernelBundleImpPtr) { // Make sure implicit non-interop kernel bundles have the kernel if (!KernelBundleImpPtr->isInterop() && - !MImpl->isStateExplicitKernelBundle()) { - auto Dev = MGraph ? MGraph->getDevice() : MQueue->get_device(); + !impl->isStateExplicitKernelBundle()) { + auto Dev = impl->MGraph ? impl->MGraph->getDevice() : MQueue->get_device(); kernel_id KernelID = detail::ProgramManager::getInstance().getSYCLKernelID( MKernelName.c_str()); @@ -243,13 +244,13 @@ event handler::finalize() { } } - if (MQueue && !MGraph && !MSubgraphNode && !MQueue->getCommandGraph() && - !MQueue->is_in_fusion_mode() && !CGData.MRequirements.size() && - !MStreamStorage.size() && - (!CGData.MEvents.size() || + if (MQueue && !impl->MGraph && !impl->MSubgraphNode && + !MQueue->getCommandGraph() && !MQueue->is_in_fusion_mode() && + !impl->CGData.MRequirements.size() && !MStreamStorage.size() && + (!impl->CGData.MEvents.size() || (MQueue->isInOrder() && detail::Scheduler::areEventsSafeForSchedulerBypass( - CGData.MEvents, MQueue->getContextImplPtr())))) { + impl->CGData.MEvents, MQueue->getContextImplPtr())))) { // if user does not add a new dependency to the dependency graph, i.e. // the graph is not changed, and the queue is not in fusion mode, then // this faster path is used to submit kernel bypassing scheduler and @@ -262,8 +263,8 @@ event handler::finalize() { // uint32_t StreamID, uint64_t InstanceID, xpti_td* TraceEvent, int32_t StreamID = xptiRegisterStream(detail::SYCL_STREAM_NAME); auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData( - StreamID, MKernel, MCodeLoc, MKernelName.c_str(), MQueue, MNDRDesc, - KernelBundleImpPtr, MArgs); + StreamID, MKernel, MCodeLoc, MKernelName.c_str(), MQueue, + impl->MNDRDesc, KernelBundleImpPtr, impl->MArgs); auto EnqueueKernel = [&, CmdTraceEvent = CmdTraceEvent, InstanceID = InstanceID]() { #else @@ -273,10 +274,11 @@ event handler::finalize() { detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_begin, nullptr); #endif - enqueueImpKernel(MQueue, MNDRDesc, MArgs, KernelBundleImpPtr, MKernel, - MKernelName.c_str(), RawEvents, NewEvent, nullptr, - MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative, - MImpl->MKernelUsesClusterLaunch); + enqueueImpKernel(MQueue, impl->MNDRDesc, impl->MArgs, + KernelBundleImpPtr, MKernel, MKernelName.c_str(), + RawEvents, NewEvent, nullptr, impl->MKernelCacheConfig, + impl->MKernelIsCooperative, + impl->MKernelUsesClusterLaunch); #ifdef XPTI_ENABLE_INSTRUMENTATION // Emit signal only when event is created if (NewEvent != nullptr) { @@ -289,7 +291,7 @@ event handler::finalize() { #endif }; - bool DiscardEvent = (MQueue->MDiscardEvents || !MImpl->MEventNeeded) && + bool DiscardEvent = (MQueue->MDiscardEvents || !impl->MEventNeeded) && MQueue->supportsDiscardingPiEvents(); if (DiscardEvent) { // Kernel only uses assert if it's non interop one @@ -325,118 +327,118 @@ event handler::finalize() { std::unique_ptr CommandGroup; switch (type) { - case detail::CG::Kernel: { + case detail::CGType::Kernel: { // Copy kernel name here instead of move so that it's available after // running of this method by reductions implementation. This allows for // assert feature to check if kernel uses assertions CommandGroup.reset(new detail::CGExecKernel( - std::move(MNDRDesc), std::move(MHostKernel), std::move(MKernel), - std::move(MImpl->MKernelBundle), std::move(CGData), std::move(MArgs), - MKernelName.c_str(), std::move(MStreamStorage), - std::move(MImpl->MAuxiliaryResources), MCGType, - MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative, - MImpl->MKernelUsesClusterLaunch, MCodeLoc)); + std::move(impl->MNDRDesc), std::move(MHostKernel), std::move(MKernel), + std::move(impl->MKernelBundle), std::move(impl->CGData), + std::move(impl->MArgs), MKernelName.c_str(), std::move(MStreamStorage), + std::move(impl->MAuxiliaryResources), getType(), + impl->MKernelCacheConfig, impl->MKernelIsCooperative, + impl->MKernelUsesClusterLaunch, MCodeLoc)); break; } - case detail::CG::CopyAccToPtr: - case detail::CG::CopyPtrToAcc: - case detail::CG::CopyAccToAcc: + case detail::CGType::CopyAccToPtr: + case detail::CGType::CopyPtrToAcc: + case detail::CGType::CopyAccToAcc: CommandGroup.reset( - new detail::CGCopy(MCGType, MSrcPtr, MDstPtr, std::move(CGData), - std::move(MImpl->MAuxiliaryResources), MCodeLoc)); + new detail::CGCopy(getType(), MSrcPtr, MDstPtr, std::move(impl->CGData), + std::move(impl->MAuxiliaryResources), MCodeLoc)); break; - case detail::CG::Fill: + case detail::CGType::Fill: CommandGroup.reset(new detail::CGFill(std::move(MPattern), MDstPtr, - std::move(CGData), MCodeLoc)); + std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::UpdateHost: + case detail::CGType::UpdateHost: CommandGroup.reset( - new detail::CGUpdateHost(MDstPtr, std::move(CGData), MCodeLoc)); + new detail::CGUpdateHost(MDstPtr, std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::CopyUSM: + case detail::CGType::CopyUSM: CommandGroup.reset(new detail::CGCopyUSM(MSrcPtr, MDstPtr, MLength, - std::move(CGData), MCodeLoc)); + std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::FillUSM: + case detail::CGType::FillUSM: CommandGroup.reset(new detail::CGFillUSM( - std::move(MPattern), MDstPtr, MLength, std::move(CGData), MCodeLoc)); + std::move(MPattern), MDstPtr, MLength, std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::PrefetchUSM: + case detail::CGType::PrefetchUSM: CommandGroup.reset(new detail::CGPrefetchUSM(MDstPtr, MLength, - std::move(CGData), MCodeLoc)); + std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::AdviseUSM: - CommandGroup.reset(new detail::CGAdviseUSM(MDstPtr, MLength, MImpl->MAdvice, - std::move(CGData), MCGType, + case detail::CGType::AdviseUSM: + CommandGroup.reset(new detail::CGAdviseUSM(MDstPtr, MLength, impl->MAdvice, + std::move(impl->CGData), getType(), MCodeLoc)); break; - case detail::CG::Copy2DUSM: + case detail::CGType::Copy2DUSM: CommandGroup.reset(new detail::CGCopy2DUSM( - MSrcPtr, MDstPtr, MImpl->MSrcPitch, MImpl->MDstPitch, MImpl->MWidth, - MImpl->MHeight, std::move(CGData), MCodeLoc)); + MSrcPtr, MDstPtr, impl->MSrcPitch, impl->MDstPitch, impl->MWidth, + impl->MHeight, std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::Fill2DUSM: + case detail::CGType::Fill2DUSM: CommandGroup.reset(new detail::CGFill2DUSM( - std::move(MPattern), MDstPtr, MImpl->MDstPitch, MImpl->MWidth, - MImpl->MHeight, std::move(CGData), MCodeLoc)); + std::move(MPattern), MDstPtr, impl->MDstPitch, impl->MWidth, + impl->MHeight, std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::Memset2DUSM: + case detail::CGType::Memset2DUSM: CommandGroup.reset(new detail::CGMemset2DUSM( - MPattern[0], MDstPtr, MImpl->MDstPitch, MImpl->MWidth, MImpl->MHeight, - std::move(CGData), MCodeLoc)); + MPattern[0], MDstPtr, impl->MDstPitch, impl->MWidth, impl->MHeight, + std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::CodeplayHostTask: { - auto context = MGraph ? detail::getSyclObjImpl(MGraph->getContext()) + case detail::CGType::CodeplayHostTask: { + auto context = impl->MGraph ? detail::getSyclObjImpl(impl->MGraph->getContext()) : MQueue->getContextImplPtr(); CommandGroup.reset(new detail::CGHostTask( - std::move(MHostTask), MQueue, context, std::move(MArgs), - std::move(CGData), MCGType, MCodeLoc)); + std::move(impl->MHostTask), MQueue, context, std::move(impl->MArgs), + std::move(impl->CGData), getType(), MCodeLoc)); break; } - case detail::CG::Barrier: - case detail::CG::BarrierWaitlist: { + case detail::CGType::Barrier: + case detail::CGType::BarrierWaitlist: { if (auto GraphImpl = getCommandGraph(); GraphImpl != nullptr) { - CGData.MEvents.insert(std::end(CGData.MEvents), - std::begin(MEventsWaitWithBarrier), - std::end(MEventsWaitWithBarrier)); + impl->CGData.MEvents.insert(std::end(impl->CGData.MEvents), + std::begin(impl->MEventsWaitWithBarrier), + std::end(impl->MEventsWaitWithBarrier)); // Barrier node is implemented as an empty node in Graph // but keep the barrier type to help managing dependencies - MCGType = detail::CG::Barrier; + setType(detail::CGType::Barrier); CommandGroup.reset( - new detail::CG(detail::CG::Barrier, std::move(CGData), MCodeLoc)); + new detail::CG(detail::CGType::Barrier, std::move(impl->CGData), MCodeLoc)); } else { CommandGroup.reset( - new detail::CGBarrier(std::move(MEventsWaitWithBarrier), - std::move(CGData), MCGType, MCodeLoc)); + new detail::CGBarrier(std::move(impl->MEventsWaitWithBarrier), + std::move(impl->CGData), getType(), MCodeLoc)); } break; } - case detail::CG::ProfilingTag: { - CommandGroup.reset(new detail::CGProfilingTag(std::move(CGData), MCodeLoc)); + case detail::CGType::ProfilingTag: { + CommandGroup.reset(new detail::CGProfilingTag(std::move(impl->CGData), MCodeLoc)); break; } - case detail::CG::CopyToDeviceGlobal: { + case detail::CGType::CopyToDeviceGlobal: { CommandGroup.reset(new detail::CGCopyToDeviceGlobal( - MSrcPtr, MDstPtr, MImpl->MIsDeviceImageScoped, MLength, MImpl->MOffset, - std::move(CGData), MCodeLoc)); + MSrcPtr, MDstPtr, impl->MIsDeviceImageScoped, MLength, impl->MOffset, + std::move(impl->CGData), MCodeLoc)); break; } - case detail::CG::CopyFromDeviceGlobal: { + case detail::CGType::CopyFromDeviceGlobal: { CommandGroup.reset(new detail::CGCopyFromDeviceGlobal( - MSrcPtr, MDstPtr, MImpl->MIsDeviceImageScoped, MLength, MImpl->MOffset, - std::move(CGData), MCodeLoc)); + MSrcPtr, MDstPtr, impl->MIsDeviceImageScoped, MLength, impl->MOffset, + std::move(impl->CGData), MCodeLoc)); break; } - case detail::CG::ReadWriteHostPipe: { + case detail::CGType::ReadWriteHostPipe: { CommandGroup.reset(new detail::CGReadWriteHostPipe( - MImpl->HostPipeName, MImpl->HostPipeBlocking, MImpl->HostPipePtr, - MImpl->HostPipeTypeSize, MImpl->HostPipeRead, std::move(CGData), + impl->HostPipeName, impl->HostPipeBlocking, impl->HostPipePtr, + impl->HostPipeTypeSize, impl->HostPipeRead, std::move(impl->CGData), MCodeLoc)); break; } - case detail::CG::ExecCommandBuffer: { + case detail::CGType::ExecCommandBuffer: { std::shared_ptr ParentGraph = - MQueue ? MQueue->getCommandGraph() : MGraph; + MQueue ? MQueue->getCommandGraph() : impl->MGraph; // If a parent graph is set that means we are adding or recording a subgraph // and we don't want to actually execute this command graph submission. @@ -446,37 +448,37 @@ event handler::finalize() { ParentLock = ext::oneapi::experimental::detail::graph_impl::WriteLock( ParentGraph->MMutex); } - CGData.MRequirements = MExecGraph->getRequirements(); + impl->CGData.MRequirements = impl->MExecGraph->getRequirements(); // Here we are using the CommandGroup without passing a CommandBuffer to // pass the exec_graph_impl and event dependencies. Since this subgraph CG // will not be executed this is fine. CommandGroup.reset(new sycl::detail::CGExecCommandBuffer( - nullptr, MExecGraph, std::move(CGData))); + nullptr, impl->MExecGraph, std::move(impl->CGData))); } else { event GraphCompletionEvent = - MExecGraph->enqueue(MQueue, std::move(CGData)); + impl->MExecGraph->enqueue(MQueue, std::move(impl->CGData)); MLastEvent = GraphCompletionEvent; return MLastEvent; } } break; - case detail::CG::CopyImage: + case detail::CGType::CopyImage: CommandGroup.reset(new detail::CGCopyImage( - MSrcPtr, MDstPtr, MImpl->MImageDesc, MImpl->MImageFormat, - MImpl->MImageCopyFlags, MImpl->MSrcOffset, MImpl->MDestOffset, - MImpl->MHostExtent, MImpl->MCopyExtent, std::move(CGData), MCodeLoc)); + MSrcPtr, MDstPtr, impl->MImageDesc, impl->MImageFormat, + impl->MImageCopyFlags, impl->MSrcOffset, impl->MDestOffset, + impl->MHostExtent, impl->MCopyExtent, std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::SemaphoreWait: + case detail::CGType::SemaphoreWait: CommandGroup.reset(new detail::CGSemaphoreWait( - MImpl->MInteropSemaphoreHandle, MImpl->MWaitValue, std::move(CGData), + impl->MInteropSemaphoreHandle, impl->MWaitValue, std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::SemaphoreSignal: + case detail::CGType::SemaphoreSignal: CommandGroup.reset(new detail::CGSemaphoreSignal( - MImpl->MInteropSemaphoreHandle, MImpl->MSignalValue, std::move(CGData), + impl->MInteropSemaphoreHandle, impl->MSignalValue, std::move(impl->CGData), MCodeLoc)); break; - case detail::CG::None: + case detail::CGType::None: if (detail::pi::trace(detail::pi::TraceLevel::PI_TRACE_ALL)) { std::cout << "WARNING: An empty command group is submitted." << std::endl; } @@ -484,9 +486,9 @@ event handler::finalize() { // Empty nodes are handled by Graph like standard nodes // For Standard mode (non-graph), // empty nodes are not sent to the scheduler to save time - if (MGraph || (MQueue && MQueue->getCommandGraph())) { + if (impl->MGraph || (MQueue && MQueue->getCommandGraph())) { CommandGroup.reset( - new detail::CG(detail::CG::None, std::move(CGData), MCodeLoc)); + new detail::CG(detail::CGType::None, std::move(impl->CGData), MCodeLoc)); } else { detail::EventImplPtr Event = std::make_shared(); MLastEvent = detail::createSyclObjFromImpl(Event); @@ -502,8 +504,8 @@ event handler::finalize() { // If there is a graph associated with the handler we are in the explicit // graph mode, so we store the CG instead of submitting it to the scheduler, // so it can be retrieved by the graph later. - if (MGraph) { - MGraphNodeCG = std::move(CommandGroup); + if (impl->MGraph) { + impl->MGraphNodeCG = std::move(CommandGroup); return detail::createSyclObjFromImpl( std::make_shared()); } @@ -522,10 +524,10 @@ event handler::finalize() { GraphImpl->MMutex); ext::oneapi::experimental::node_type NodeType = - MImpl->MUserFacingNodeType != + impl->MUserFacingNodeType != ext::oneapi::experimental::node_type::empty - ? MImpl->MUserFacingNodeType - : ext::oneapi::experimental::detail::getNodeTypeFromCG(MCGType); + ? impl->MUserFacingNodeType + : ext::oneapi::experimental::detail::getNodeTypeFromCG(getType()); // Create a new node in the graph representing this command-group if (MQueue->isInOrder()) { @@ -552,7 +554,7 @@ event handler::finalize() { NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup)); } - if (NodeImpl->MCGType == sycl::detail::CG::Barrier) { + if (NodeImpl->MCGType == sycl::detail::CGType::Barrier) { GraphImpl->setBarrierDep(MQueue, NodeImpl); } } @@ -560,20 +562,20 @@ event handler::finalize() { // Associate an event with this new node and return the event. GraphImpl->addEventForNode(GraphImpl, EventImpl, NodeImpl); - NodeImpl->MNDRangeUsed = MImpl->MNDRangeUsed; + NodeImpl->MNDRangeUsed = impl->MNDRangeUsed; return detail::createSyclObjFromImpl(EventImpl); } detail::EventImplPtr Event = detail::Scheduler::getInstance().addCG( - std::move(CommandGroup), std::move(MQueue), MImpl->MEventNeeded); + std::move(CommandGroup), std::move(MQueue), impl->MEventNeeded); MLastEvent = detail::createSyclObjFromImpl(Event); return MLastEvent; } void handler::addReduction(const std::shared_ptr &ReduObj) { - MImpl->MAuxiliaryResources.push_back(ReduObj); + impl->MAuxiliaryResources.push_back(ReduObj); } void handler::associateWithHandlerCommon(detail::AccessorImplPtr AccImpl, @@ -592,13 +594,13 @@ void handler::associateWithHandlerCommon(detail::AccessorImplPtr AccImpl, } // Add accessor to the list of requirements. if (Req->MAccessRange.size() != 0) - CGData.MRequirements.push_back(Req); + impl->CGData.MRequirements.push_back(Req); // Store copy of the accessor. - CGData.MAccStorage.push_back(std::move(AccImpl)); + impl->CGData.MAccStorage.push_back(std::move(AccImpl)); // Add an accessor to the handler list of associated accessors. // For associated accessors index does not means nothing. - MAssociatedAccesors.emplace_back(detail::kernel_param_kind_t::kind_accessor, - Req, AccTarget, /*index*/ 0); + impl->MAssociatedAccesors.emplace_back( + detail::kernel_param_kind_t::kind_accessor, Req, AccTarget, /*index*/ 0); } void handler::associateWithHandler(detail::AccessorBaseHost *AccBase, @@ -662,7 +664,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, switch (Kind) { case kernel_param_kind_t::kind_std_layout: case kernel_param_kind_t::kind_pointer: { - MArgs.emplace_back(Kind, Ptr, Size, Index + IndexShift); + addArg(Kind, Ptr, Size, Index + IndexShift); break; } case kernel_param_kind_t::kind_stream: { @@ -675,7 +677,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, detail::Requirement *GBufReq = GBufImpl.get(); addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size, IsKernelCreatedFromSource, - MNDRDesc.GlobalSize.size(), MArgs, IsESIMD); + impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD); ++IndexShift; detail::AccessorBaseHost *GOffsetBase = static_cast(&S->GlobalOffset); @@ -683,14 +685,14 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, detail::Requirement *GOffsetReq = GOfssetImpl.get(); addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size, IsKernelCreatedFromSource, - MNDRDesc.GlobalSize.size(), MArgs, IsESIMD); + impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD); ++IndexShift; detail::AccessorBaseHost *GFlushBase = static_cast(&S->GlobalFlushBuf); detail::AccessorImplPtr GFlushImpl = detail::getSyclObjImpl(*GFlushBase); detail::Requirement *GFlushReq = GFlushImpl.get(); - size_t GlobalSize = MNDRDesc.GlobalSize.size(); + size_t GlobalSize = impl->MNDRDesc.GlobalSize.size(); // If work group size wasn't set explicitly then it must be recieved // from kernel attribute or set to default values. // For now we can't get this attribute here. @@ -698,15 +700,14 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, // TODO adjust MNDRDesc when device image contains kernel's attribute if (GlobalSize == 0) { // Suppose that work group size is 1 for every dimension - GlobalSize = MNDRDesc.NumWorkGroups.size(); + GlobalSize = impl->MNDRDesc.NumWorkGroups.size(); } addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size, - IsKernelCreatedFromSource, GlobalSize, MArgs, + IsKernelCreatedFromSource, GlobalSize, impl->MArgs, IsESIMD); ++IndexShift; - MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, - &S->FlushBufferSize, sizeof(S->FlushBufferSize), - Index + IndexShift); + addArg(kernel_param_kind_t::kind_std_layout, &S->FlushBufferSize, + sizeof(S->FlushBufferSize), Index + IndexShift); break; } @@ -719,9 +720,9 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, case access::target::device: case access::target::constant_buffer: { detail::Requirement *AccImpl = static_cast(Ptr); - addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size, - IsKernelCreatedFromSource, - MNDRDesc.GlobalSize.size(), MArgs, IsESIMD); + addArgsForGlobalAccessor( + AccImpl, Index, IndexShift, Size, IsKernelCreatedFromSource, + impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD); break; } case access::target::local: { @@ -736,7 +737,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, // Some backends do not accept zero-sized local memory arguments, so we // make it a minimum allocation of 1 byte. SizeInBytes = std::max(SizeInBytes, 1); - MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr, + impl->MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr, SizeInBytes, Index + IndexShift); // TODO ESIMD currently does not suport MSize field passing yet // accessor::init for ESIMD-mode accessor has a single field, translated @@ -744,21 +745,21 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, if (!IsESIMD && !IsKernelCreatedFromSource) { ++IndexShift; const size_t SizeAccField = (Dims == 0 ? 1 : Dims) * sizeof(Size[0]); - MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size, - SizeAccField, Index + IndexShift); + addArg(kernel_param_kind_t::kind_std_layout, &Size, SizeAccField, + Index + IndexShift); ++IndexShift; - MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size, - SizeAccField, Index + IndexShift); + addArg(kernel_param_kind_t::kind_std_layout, &Size, SizeAccField, + Index + IndexShift); ++IndexShift; - MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size, - SizeAccField, Index + IndexShift); + addArg(kernel_param_kind_t::kind_std_layout, &Size, SizeAccField, + Index + IndexShift); } break; } case access::target::image: case access::target::image_array: { detail::Requirement *AccImpl = static_cast(Ptr); - MArgs.emplace_back(Kind, AccImpl, Size, Index + IndexShift); + addArg(Kind, AccImpl, Size, Index + IndexShift); if (!IsKernelCreatedFromSource) { // TODO Handle additional kernel arguments for image class // if the compiler front-end adds them. @@ -776,14 +777,13 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, break; } case kernel_param_kind_t::kind_sampler: { - MArgs.emplace_back(kernel_param_kind_t::kind_sampler, Ptr, sizeof(sampler), - Index + IndexShift); + addArg(kernel_param_kind_t::kind_sampler, Ptr, sizeof(sampler), + Index + IndexShift); break; } case kernel_param_kind_t::kind_specialization_constants_buffer: { - MArgs.emplace_back( - kernel_param_kind_t::kind_specialization_constants_buffer, Ptr, Size, - Index + IndexShift); + addArg(kernel_param_kind_t::kind_specialization_constants_buffer, Ptr, Size, + Index + IndexShift); break; } case kernel_param_kind_t::kind_invalid: @@ -805,8 +805,8 @@ inline constexpr size_t MaxNumAdditionalArgs = 13; void handler::extractArgsAndReqs() { assert(MKernel && "MKernel is not initialized"); - std::vector UnPreparedArgs = std::move(MArgs); - MArgs.clear(); + std::vector UnPreparedArgs = std::move(impl->MArgs); + clearArgs(); std::sort( UnPreparedArgs.begin(), UnPreparedArgs.end(), @@ -815,7 +815,7 @@ void handler::extractArgsAndReqs() { }); const bool IsKernelCreatedFromSource = MKernel->isCreatedFromSource(); - MArgs.reserve(MaxNumAdditionalArgs * UnPreparedArgs.size()); + impl->MArgs.reserve(MaxNumAdditionalArgs * UnPreparedArgs.size()); size_t IndexShift = 0; for (size_t I = 0; I < UnPreparedArgs.size(); ++I) { @@ -833,7 +833,7 @@ void handler::extractArgsAndReqsFromLambda( const detail::kernel_param_desc_t *KernelArgs, bool IsESIMD) { const bool IsKernelCreatedFromSource = false; size_t IndexShift = 0; - MArgs.reserve(MaxNumAdditionalArgs * KernelArgsNum); + impl->MArgs.reserve(MaxNumAdditionalArgs * KernelArgsNum); for (size_t I = 0; I < KernelArgsNum; ++I) { void *Ptr = LambdaPtr + KernelArgs[I].offset; @@ -876,12 +876,12 @@ void handler::verifyUsedKernelBundleInternal(detail::string_view KernelName) { return; // Implicit kernel bundles are populated late so we ignore them - if (!MImpl->isStateExplicitKernelBundle()) + if (!impl->isStateExplicitKernelBundle()) return; kernel_id KernelID = detail::get_kernel_id_impl(KernelName); device Dev = - MGraph ? MGraph->getDevice() : detail::getDeviceFromHandler(*this); + impl->MGraph ? impl->MGraph->getDevice() : detail::getDeviceFromHandler(*this); if (!UsedKernelBundleImplPtr->has_kernel(KernelID, Dev)) throw sycl::exception( make_error_code(errc::kernel_not_supported), @@ -890,8 +890,8 @@ void handler::verifyUsedKernelBundleInternal(detail::string_view KernelName) { void handler::ext_oneapi_barrier(const std::vector &WaitList) { throwIfActionIsCreated(); - MCGType = detail::CG::BarrierWaitlist; - MEventsWaitWithBarrier.reserve(WaitList.size()); + setType(detail::CGType::BarrierWaitlist); + impl->MEventsWaitWithBarrier.reserve(WaitList.size()); for (auto &Event : WaitList) { auto EventImpl = detail::getSyclObjImpl(Event); // We could not wait for host task events in backend. @@ -899,7 +899,7 @@ void handler::ext_oneapi_barrier(const std::vector &WaitList) { if (EventImpl->isHost()) { depends_on(EventImpl); } - MEventsWaitWithBarrier.push_back(EventImpl); + impl->MEventsWaitWithBarrier.push_back(EventImpl); } } @@ -923,7 +923,7 @@ void handler::memcpy(void *Dest, const void *Src, size_t Count) { MSrcPtr = const_cast(Src); MDstPtr = Dest; MLength = Count; - setType(detail::CG::CopyUSM); + setType(detail::CGType::CopyUSM); } void handler::memset(void *Dest, int Value, size_t Count) { @@ -932,22 +932,22 @@ void handler::memset(void *Dest, int Value, size_t Count) { MPattern.push_back(static_cast(Value)); MLength = Count; setUserFacingNodeType(ext::oneapi::experimental::node_type::memset); - setType(detail::CG::FillUSM); + setType(detail::CGType::FillUSM); } void handler::prefetch(const void *Ptr, size_t Count) { throwIfActionIsCreated(); MDstPtr = const_cast(Ptr); MLength = Count; - setType(detail::CG::PrefetchUSM); + setType(detail::CGType::PrefetchUSM); } void handler::mem_advise(const void *Ptr, size_t Count, int Advice) { throwIfActionIsCreated(); MDstPtr = const_cast(Ptr); MLength = Count; - MImpl->MAdvice = static_cast(Advice); - setType(detail::CG::AdviseUSM); + impl->MAdvice = static_cast(Advice); + setType(detail::CGType::AdviseUSM); } void handler::fill_impl(void *Dest, const void *Value, size_t ValueSize, @@ -956,7 +956,7 @@ void handler::fill_impl(void *Dest, const void *Value, size_t ValueSize, MPattern.resize(ValueSize); std::memcpy(MPattern.data(), Value, ValueSize); MLength = Count * ValueSize; - setType(detail::CG::FillUSM); + setType(detail::CGType::FillUSM); } void handler::ext_oneapi_memcpy2d_impl(void *Dest, size_t DestPitch, @@ -965,11 +965,11 @@ void handler::ext_oneapi_memcpy2d_impl(void *Dest, size_t DestPitch, // Checks done in callers. MSrcPtr = const_cast(Src); MDstPtr = Dest; - MImpl->MSrcPitch = SrcPitch; - MImpl->MDstPitch = DestPitch; - MImpl->MWidth = Width; - MImpl->MHeight = Height; - setType(detail::CG::Copy2DUSM); + impl->MSrcPitch = SrcPitch; + impl->MDstPitch = DestPitch; + impl->MWidth = Width; + impl->MHeight = Height; + setType(detail::CGType::Copy2DUSM); } void handler::ext_oneapi_fill2d_impl(void *Dest, size_t DestPitch, @@ -979,10 +979,10 @@ void handler::ext_oneapi_fill2d_impl(void *Dest, size_t DestPitch, MDstPtr = Dest; MPattern.resize(ValueSize); std::memcpy(MPattern.data(), Value, ValueSize); - MImpl->MDstPitch = DestPitch; - MImpl->MWidth = Width; - MImpl->MHeight = Height; - setType(detail::CG::Fill2DUSM); + impl->MDstPitch = DestPitch; + impl->MWidth = Width; + impl->MHeight = Height; + setType(detail::CGType::Fill2DUSM); } void handler::ext_oneapi_memset2d_impl(void *Dest, size_t DestPitch, int Value, @@ -990,10 +990,10 @@ void handler::ext_oneapi_memset2d_impl(void *Dest, size_t DestPitch, int Value, // Checks done in callers. MDstPtr = Dest; MPattern.push_back(static_cast(Value)); - MImpl->MDstPitch = DestPitch; - MImpl->MWidth = Width; - MImpl->MHeight = Height; - setType(detail::CG::Memset2DUSM); + impl->MDstPitch = DestPitch; + impl->MWidth = Width; + impl->MHeight = Height; + setType(detail::CGType::Memset2DUSM); } void handler::ext_oneapi_copy( @@ -1037,15 +1037,15 @@ void handler::ext_oneapi_copy( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(Desc.num_channels)); - MImpl->MSrcOffset = {0, 0, 0}; - MImpl->MDestOffset = {0, 0, 0}; - MImpl->MCopyExtent = {Desc.width, Desc.height, Desc.depth}; - MImpl->MHostExtent = {Desc.width, Desc.height, Desc.depth}; - MImpl->MImageDesc = PiDesc; - MImpl->MImageFormat = PiFormat; - MImpl->MImageCopyFlags = + impl->MSrcOffset = {0, 0, 0}; + impl->MDestOffset = {0, 0, 0}; + impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth}; + impl->MHostExtent = {Desc.width, Desc.height, Desc.depth}; + impl->MImageDesc = PiDesc; + impl->MImageFormat = PiFormat; + impl->MImageCopyFlags = sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_HOST_TO_DEVICE; - setType(detail::CG::CopyImage); + setType(detail::CGType::CopyImage); } void handler::ext_oneapi_copy( @@ -1091,15 +1091,15 @@ void handler::ext_oneapi_copy( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(DestImgDesc.num_channels)); - MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; - MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; - MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; - MImpl->MHostExtent = {SrcExtent[0], SrcExtent[1], SrcExtent[2]}; - MImpl->MImageDesc = PiDesc; - MImpl->MImageFormat = PiFormat; - MImpl->MImageCopyFlags = + impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; + impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; + impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; + impl->MHostExtent = {SrcExtent[0], SrcExtent[1], SrcExtent[2]}; + impl->MImageDesc = PiDesc; + impl->MImageFormat = PiFormat; + impl->MImageCopyFlags = sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_HOST_TO_DEVICE; - setType(detail::CG::CopyImage); + setType(detail::CGType::CopyImage); } void handler::ext_oneapi_copy( @@ -1143,15 +1143,15 @@ void handler::ext_oneapi_copy( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(Desc.num_channels)); - MImpl->MSrcOffset = {0, 0, 0}; - MImpl->MDestOffset = {0, 0, 0}; - MImpl->MCopyExtent = {Desc.width, Desc.height, Desc.depth}; - MImpl->MHostExtent = {Desc.width, Desc.height, Desc.depth}; - MImpl->MImageDesc = PiDesc; - MImpl->MImageFormat = PiFormat; - MImpl->MImageCopyFlags = + impl->MSrcOffset = {0, 0, 0}; + impl->MDestOffset = {0, 0, 0}; + impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth}; + impl->MHostExtent = {Desc.width, Desc.height, Desc.depth}; + impl->MImageDesc = PiDesc; + impl->MImageFormat = PiFormat; + impl->MImageCopyFlags = sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_DEVICE_TO_HOST; - setType(detail::CG::CopyImage); + setType(detail::CGType::CopyImage); } void handler::ext_oneapi_copy( @@ -1195,15 +1195,15 @@ void handler::ext_oneapi_copy( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(ImageDesc.num_channels)); - MImpl->MSrcOffset = {0, 0, 0}; - MImpl->MDestOffset = {0, 0, 0}; - MImpl->MCopyExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth}; - MImpl->MHostExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth}; - MImpl->MImageDesc = PiDesc; - MImpl->MImageFormat = PiFormat; - MImpl->MImageCopyFlags = + impl->MSrcOffset = {0, 0, 0}; + impl->MDestOffset = {0, 0, 0}; + impl->MCopyExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth}; + impl->MHostExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth}; + impl->MImageDesc = PiDesc; + impl->MImageFormat = PiFormat; + impl->MImageCopyFlags = sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_DEVICE_TO_DEVICE; - setType(detail::CG::CopyImage); + setType(detail::CGType::CopyImage); } void handler::ext_oneapi_copy( @@ -1249,15 +1249,15 @@ void handler::ext_oneapi_copy( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(SrcImgDesc.num_channels)); - MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; - MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; - MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; - MImpl->MHostExtent = {DestExtent[0], DestExtent[1], DestExtent[2]}; - MImpl->MImageDesc = PiDesc; - MImpl->MImageFormat = PiFormat; - MImpl->MImageCopyFlags = + impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; + impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; + impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; + impl->MHostExtent = {DestExtent[0], DestExtent[1], DestExtent[2]}; + impl->MImageDesc = PiDesc; + impl->MImageFormat = PiFormat; + impl->MImageCopyFlags = sycl::detail::pi::PiImageCopyFlags::PI_IMAGE_COPY_DEVICE_TO_HOST; - setType(detail::CG::CopyImage); + setType(detail::CGType::CopyImage); } void handler::ext_oneapi_copy( @@ -1301,17 +1301,17 @@ void handler::ext_oneapi_copy( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(Desc.num_channels)); - MImpl->MSrcOffset = {0, 0, 0}; - MImpl->MDestOffset = {0, 0, 0}; - MImpl->MCopyExtent = {Desc.width, Desc.height, Desc.depth}; - MImpl->MHostExtent = {Desc.width, Desc.height, Desc.depth}; - MImpl->MImageDesc = PiDesc; - MImpl->MImageDesc.image_row_pitch = Pitch; - MImpl->MImageFormat = PiFormat; - MImpl->MImageCopyFlags = detail::getPiImageCopyFlags( + impl->MSrcOffset = {0, 0, 0}; + impl->MDestOffset = {0, 0, 0}; + impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth}; + impl->MHostExtent = {Desc.width, Desc.height, Desc.depth}; + impl->MImageDesc = PiDesc; + impl->MImageDesc.image_row_pitch = Pitch; + impl->MImageFormat = PiFormat; + impl->MImageCopyFlags = detail::getPiImageCopyFlags( get_pointer_type(Src, MQueue->get_context()), get_pointer_type(Dest, MQueue->get_context())); - setType(detail::CG::CopyImage); + setType(detail::CGType::CopyImage); } void handler::ext_oneapi_copy( @@ -1358,17 +1358,17 @@ void handler::ext_oneapi_copy( sycl::_V1::ext::oneapi::experimental::detail:: get_image_default_channel_order(DeviceImgDesc.num_channels)); - MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; - MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; - MImpl->MHostExtent = {HostExtent[0], HostExtent[1], HostExtent[2]}; - MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; - MImpl->MImageDesc = PiDesc; - MImpl->MImageDesc.image_row_pitch = DeviceRowPitch; - MImpl->MImageFormat = PiFormat; - MImpl->MImageCopyFlags = detail::getPiImageCopyFlags( + impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; + impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; + impl->MHostExtent = {HostExtent[0], HostExtent[1], HostExtent[2]}; + impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; + impl->MImageDesc = PiDesc; + impl->MImageDesc.image_row_pitch = DeviceRowPitch; + impl->MImageFormat = PiFormat; + impl->MImageCopyFlags = detail::getPiImageCopyFlags( get_pointer_type(Src, MQueue->get_context()), get_pointer_type(Dest, MQueue->get_context())); - setType(detail::CG::CopyImage); + setType(detail::CGType::CopyImage); } void handler::ext_oneapi_wait_external_semaphore( @@ -1387,10 +1387,10 @@ void handler::ext_oneapi_wait_external_semaphore( "Invalid type of semaphore for this operation. The " "type of semaphore used needs a user passed wait value."); } - MImpl->MInteropSemaphoreHandle = + impl->MInteropSemaphoreHandle = (sycl::detail::pi::PiInteropSemaphoreHandle)SemaphoreHandle.raw_handle; - MImpl->MWaitValue = {}; - setType(detail::CG::SemaphoreWait); + impl->MWaitValue = {}; + setType(detail::CGType::SemaphoreWait); } void handler::ext_oneapi_wait_external_semaphore( @@ -1407,10 +1407,10 @@ void handler::ext_oneapi_wait_external_semaphore( "Invalid type of semaphore for this operation. The " "type of semaphore does not support user passed wait values."); } - MImpl->MInteropSemaphoreHandle = + impl->MInteropSemaphoreHandle = (sycl::detail::pi::PiInteropSemaphoreHandle)SemaphoreHandle.raw_handle; - MImpl->MWaitValue = WaitValue; - setType(detail::CG::SemaphoreWait); + impl->MWaitValue = WaitValue; + setType(detail::CGType::SemaphoreWait); } void handler::ext_oneapi_signal_external_semaphore( @@ -1429,10 +1429,10 @@ void handler::ext_oneapi_signal_external_semaphore( "Invalid type of semaphore for this operation. The " "type of semaphore used needs a user passed signal value."); } - MImpl->MInteropSemaphoreHandle = + impl->MInteropSemaphoreHandle = (sycl::detail::pi::PiInteropSemaphoreHandle)SemaphoreHandle.raw_handle; - MImpl->MSignalValue = {}; - setType(detail::CG::SemaphoreSignal); + impl->MSignalValue = {}; + setType(detail::CGType::SemaphoreSignal); } void handler::ext_oneapi_signal_external_semaphore( @@ -1449,25 +1449,25 @@ void handler::ext_oneapi_signal_external_semaphore( "Invalid type of semaphore for this operation. The " "type of semaphore does not support user passed signal values."); } - MImpl->MInteropSemaphoreHandle = + impl->MInteropSemaphoreHandle = (sycl::detail::pi::PiInteropSemaphoreHandle)SemaphoreHandle.raw_handle; - MImpl->MSignalValue = SignalValue; - setType(detail::CG::SemaphoreSignal); + impl->MSignalValue = SignalValue; + setType(detail::CGType::SemaphoreSignal); } void handler::use_kernel_bundle( const kernel_bundle &ExecBundle) { std::shared_ptr PrimaryQueue = - MImpl->MSubmissionPrimaryQueue; - if ((!MGraph && (PrimaryQueue->get_context() != ExecBundle.get_context())) || - (MGraph && (MGraph->getContext() != ExecBundle.get_context()))) + impl->MSubmissionPrimaryQueue; + if ((!impl->MGraph && (PrimaryQueue->get_context() != ExecBundle.get_context())) || + (impl->MGraph && (impl->MGraph->getContext() != ExecBundle.get_context()))) throw sycl::exception( make_error_code(errc::invalid), "Context associated with the primary queue is different from the " "context associated with the kernel bundle"); std::shared_ptr SecondaryQueue = - MImpl->MSubmissionSecondaryQueue; + impl->MSubmissionSecondaryQueue; if (SecondaryQueue && SecondaryQueue->get_context() != ExecBundle.get_context()) throw sycl::exception( @@ -1510,7 +1510,7 @@ void handler::depends_on(const detail::EventImplPtr &EventImpl) { "Graph nodes cannot depend on events from another graph."); } } - CGData.MEvents.push_back(EventImpl); + impl->CGData.MEvents.push_back(EventImpl); } void handler::depends_on(const std::vector &Events) { @@ -1579,7 +1579,7 @@ void handler::verifyDeviceHasProgressGuarantee( bool handler::supportsUSMMemcpy2D() { for (const std::shared_ptr &QueueImpl : - {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) { + {impl->MSubmissionPrimaryQueue, impl->MSubmissionSecondaryQueue}) { if (QueueImpl && !checkContextSupports(QueueImpl->getContextImplPtr(), PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT)) @@ -1590,7 +1590,7 @@ bool handler::supportsUSMMemcpy2D() { bool handler::supportsUSMFill2D() { for (const std::shared_ptr &QueueImpl : - {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) { + {impl->MSubmissionPrimaryQueue, impl->MSubmissionSecondaryQueue}) { if (QueueImpl && !checkContextSupports(QueueImpl->getContextImplPtr(), PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT)) @@ -1601,7 +1601,7 @@ bool handler::supportsUSMFill2D() { bool handler::supportsUSMMemset2D() { for (const std::shared_ptr &QueueImpl : - {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) { + {impl->MSubmissionPrimaryQueue, impl->MSubmissionSecondaryQueue}) { if (QueueImpl && !checkContextSupports(QueueImpl->getContextImplPtr(), PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT)) @@ -1618,30 +1618,30 @@ id<2> handler::computeFallbackKernelBounds(size_t Width, size_t Height) { } backend handler::getDeviceBackend() const { - if (MGraph) - return MGraph->getDevice().get_backend(); + if (impl->MGraph) + return impl->MGraph->getDevice().get_backend(); else return MQueue->getDeviceImplPtr()->getBackend(); } void handler::ext_intel_read_host_pipe(detail::string_view Name, void *Ptr, size_t Size, bool Block) { - MImpl->HostPipeName = Name.data(); - MImpl->HostPipePtr = Ptr; - MImpl->HostPipeTypeSize = Size; - MImpl->HostPipeBlocking = Block; - MImpl->HostPipeRead = 1; - setType(detail::CG::ReadWriteHostPipe); + impl->HostPipeName = Name.data(); + impl->HostPipePtr = Ptr; + impl->HostPipeTypeSize = Size; + impl->HostPipeBlocking = Block; + impl->HostPipeRead = 1; + setType(detail::CGType::ReadWriteHostPipe); } void handler::ext_intel_write_host_pipe(detail::string_view Name, void *Ptr, size_t Size, bool Block) { - MImpl->HostPipeName = Name.data(); - MImpl->HostPipePtr = Ptr; - MImpl->HostPipeTypeSize = Size; - MImpl->HostPipeBlocking = Block; - MImpl->HostPipeRead = 0; - setType(detail::CG::ReadWriteHostPipe); + impl->HostPipeName = Name.data(); + impl->HostPipePtr = Ptr; + impl->HostPipeTypeSize = Size; + impl->HostPipeBlocking = Block; + impl->HostPipeRead = 0; + setType(detail::CGType::ReadWriteHostPipe); } void handler::memcpyToDeviceGlobal(const void *DeviceGlobalPtr, const void *Src, @@ -1650,10 +1650,10 @@ void handler::memcpyToDeviceGlobal(const void *DeviceGlobalPtr, const void *Src, throwIfActionIsCreated(); MSrcPtr = const_cast(Src); MDstPtr = const_cast(DeviceGlobalPtr); - MImpl->MIsDeviceImageScoped = IsDeviceImageScoped; + impl->MIsDeviceImageScoped = IsDeviceImageScoped; MLength = NumBytes; - MImpl->MOffset = Offset; - setType(detail::CG::CopyToDeviceGlobal); + impl->MOffset = Offset; + setType(detail::CGType::CopyToDeviceGlobal); } void handler::memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr, @@ -1662,10 +1662,10 @@ void handler::memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr, throwIfActionIsCreated(); MSrcPtr = const_cast(DeviceGlobalPtr); MDstPtr = Dest; - MImpl->MIsDeviceImageScoped = IsDeviceImageScoped; + impl->MIsDeviceImageScoped = IsDeviceImageScoped; MLength = NumBytes; - MImpl->MOffset = Offset; - setType(detail::CG::CopyFromDeviceGlobal); + impl->MOffset = Offset; + setType(detail::CGType::CopyFromDeviceGlobal); } void handler::memcpyToHostOnlyDeviceGlobal(const void *DeviceGlobalPtr, @@ -1715,38 +1715,39 @@ handler::getContextImplPtr() const { void handler::setKernelCacheConfig( sycl::detail::pi::PiKernelCacheConfig Config) { - MImpl->MKernelCacheConfig = Config; + impl->MKernelCacheConfig = Config; } void handler::setKernelIsCooperative(bool KernelIsCooperative) { - MImpl->MKernelIsCooperative = KernelIsCooperative; + impl->MKernelIsCooperative = KernelIsCooperative; } -void handler::setKernelUsesClusterLaunch() { +void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_experimental_cuda_cluster_launch>(); - MImpl->MKernelUsesClusterLaunch = true; + impl->MKernelUsesClusterLaunch = true; + impl->MNDRDesc.setClusterDimensions(ClusterSize, Dims); } void handler::ext_oneapi_graph( ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable> Graph) { - MCGType = detail::CG::ExecCommandBuffer; - MExecGraph = detail::getSyclObjImpl(Graph); + setType(detail::CGType::ExecCommandBuffer); + impl->MExecGraph = detail::getSyclObjImpl(Graph); } std::shared_ptr handler::getCommandGraph() const { - if (MGraph) { - return MGraph; + if (impl->MGraph) { + return impl->MGraph; } return MQueue->getCommandGraph(); } void handler::setUserFacingNodeType(ext::oneapi::experimental::node_type Type) { - MImpl->MUserFacingNodeType = Type; + impl->MUserFacingNodeType = Type; } std::optional> handler::getMaxWorkGroups() { @@ -1770,7 +1771,7 @@ std::tuple, bool> handler::getMaxWorkGroups_v2() { return {std::array{0, 0, 0}, false}; } -void handler::setNDRangeUsed(bool Value) { MImpl->MNDRangeUsed = Value; } +void handler::setNDRangeUsed(bool Value) { impl->MNDRangeUsed = Value; } void handler::registerDynamicParameter( ext::oneapi::experimental::detail::dynamic_parameter_base &DynamicParamBase, @@ -1780,22 +1781,94 @@ void handler::registerDynamicParameter( make_error_code(errc::invalid), "Dynamic Parameters cannot be used with Graph Queue recording."); } - if (!MGraph) { + if (!impl->MGraph) { throw sycl::exception( make_error_code(errc::invalid), "Dynamic Parameters cannot be used with normal SYCL submissions"); } - auto ParamImpl = detail::getSyclObjImpl(DynamicParamBase); - if (ParamImpl->MGraph != this->MGraph) { + auto Paraimpl = detail::getSyclObjImpl(DynamicParamBase); + if (Paraimpl->MGraph != this->impl->MGraph) { throw sycl::exception( make_error_code(errc::invalid), "Cannot use a Dynamic Parameter with a node associated with a graph " "other than the one it was created with."); } - MImpl->MDynamicParameters.emplace_back(ParamImpl.get(), ArgIndex); + impl->MDynamicParameters.emplace_back(Paraimpl.get(), ArgIndex); +} + +bool handler::eventNeeded() const { return impl->MEventNeeded; } + +void *handler::storeRawArg(const void *Ptr, size_t Size) { + impl->CGData.MArgsStorage.emplace_back(Size); + void *Storage = static_cast(impl->CGData.MArgsStorage.back().data()); + std::memcpy(Storage, Ptr, Size); + return Storage; +} + +void handler::SetHostTask(std::function &&Func) { + setNDRangeDescriptor(range<1>(1)); + impl->MHostTask.reset(new detail::HostTask(std::move(Func))); + setType(detail::CGType::CodeplayHostTask); +} + +void handler::SetHostTask(std::function &&Func) { + setNDRangeDescriptor(range<1>(1)); + impl->MHostTask.reset(new detail::HostTask(std::move(Func))); + setType(detail::CGType::CodeplayHostTask); +} + +void handler::addAccessorReq(detail::AccessorImplPtr Accessor) { + // Add accessor to the list of requirements. + impl->CGData.MRequirements.push_back(Accessor.get()); + // Store copy of the accessor. + impl->CGData.MAccStorage.push_back(std::move(Accessor)); +} + +void handler::addLifetimeSharedPtrStorage(std::shared_ptr SPtr) { + impl->CGData.MSharedPtrStorage.push_back(std::move(SPtr)); +} + +void handler::addArg(detail::kernel_param_kind_t ArgKind, void *Req, + int AccessTarget, int ArgIndex) { + impl->MArgs.emplace_back(ArgKind, Req, AccessTarget, ArgIndex); +} + +void handler::clearArgs() { + impl->MArgs.clear(); +} + +void handler::setArgsToAssociatedAccessors() { + impl->MArgs = impl->MAssociatedAccesors; +} + +bool handler::HasAssociatedAccessor(detail::AccessorImplHost *Req, + access::target AccessTarget) const { + return std::find_if( + impl->MAssociatedAccesors.cbegin(), + impl->MAssociatedAccesors.cend(), [&](const detail::ArgDesc &AD) { + return AD.MType == detail::kernel_param_kind_t::kind_accessor && + AD.MPtr == Req && + AD.MSize == static_cast(AccessTarget); + }) == impl->MAssociatedAccesors.end(); +} + +void handler::setType(sycl::detail::CGType Type) { impl->MCGType = Type; } +sycl::detail::CGType handler::getType() const { return impl->MCGType; } + +void handler::setNDRangeDescriptorPadded(sycl::range<3> N, + bool SetNumWorkGroups, int Dims) { + impl->MNDRDesc = NDRDescT{N, SetNumWorkGroups, Dims}; +} +void handler::setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, + sycl::id<3> Offset, int Dims) { + impl->MNDRDesc = NDRDescT{NumWorkItems, Offset, Dims}; +} +void handler::setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, + sycl::range<3> LocalSize, sycl::id<3> Offset, + int Dims) { + impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset, Dims}; } -bool handler::eventNeeded() const { return MImpl->MEventNeeded; } } // namespace _V1 } // namespace sycl diff --git a/sycl/test-e2e/AsyncHandler/default_async_handler.cpp b/sycl/test-e2e/AsyncHandler/default_async_handler.cpp index b54ed7b1e8f9..7e6a333e305d 100644 --- a/sycl/test-e2e/AsyncHandler/default_async_handler.cpp +++ b/sycl/test-e2e/AsyncHandler/default_async_handler.cpp @@ -2,7 +2,6 @@ // RUN: %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt #include -#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/accessor/Inputs/host_task_accessor.cpp b/sycl/test-e2e/Basic/accessor/Inputs/host_task_accessor.cpp index dc384c049e83..1a8300eae488 100644 --- a/sycl/test-e2e/Basic/accessor/Inputs/host_task_accessor.cpp +++ b/sycl/test-e2e/Basic/accessor/Inputs/host_task_accessor.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include #include -#include #if !defined(accessor_new_api_test) && !defined(buffer_new_api_test) && \ !defined(accessor_placeholder_new_api_test) && \ diff --git a/sycl/test-e2e/Basic/accessor/accessor.cpp b/sycl/test-e2e/Basic/accessor/accessor.cpp index 0846a4fdc6a7..de944ba12754 100644 --- a/sycl/test-e2e/Basic/accessor/accessor.cpp +++ b/sycl/test-e2e/Basic/accessor/accessor.cpp @@ -11,7 +11,6 @@ #include #include #include -#include struct IdxID1 { int x; diff --git a/sycl/test-e2e/Basic/accessor/empty_acc_host_task.cpp b/sycl/test-e2e/Basic/accessor/empty_acc_host_task.cpp index ed282aa84cf6..b8e91547b83c 100644 --- a/sycl/test-e2e/Basic/accessor/empty_acc_host_task.cpp +++ b/sycl/test-e2e/Basic/accessor/empty_acc_host_task.cpp @@ -1,7 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out #include -#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/Basic/event.cpp b/sycl/test-e2e/Basic/event.cpp index d5cba1063f07..513e1aa8d630 100644 --- a/sycl/test-e2e/Basic/event.cpp +++ b/sycl/test-e2e/Basic/event.cpp @@ -10,7 +10,6 @@ //===----------------------------------------------------------------------===// #include #include -#include int main() { { diff --git a/sycl/test-e2e/Basic/host-task-dependency.cpp b/sycl/test-e2e/Basic/host-task-dependency.cpp index 7f4f31320f1e..8185eeac6658 100644 --- a/sycl/test-e2e/Basic/host-task-dependency.cpp +++ b/sycl/test-e2e/Basic/host-task-dependency.cpp @@ -8,7 +8,6 @@ #define SYCL2020_DISABLE_DEPRECATION_WARNINGS #include -#include #include #include diff --git a/sycl/test-e2e/Basic/host_task_depends.cpp b/sycl/test-e2e/Basic/host_task_depends.cpp index 746062ffce31..e59acd8a701d 100644 --- a/sycl/test-e2e/Basic/host_task_depends.cpp +++ b/sycl/test-e2e/Basic/host_task_depends.cpp @@ -3,7 +3,6 @@ // RUN: %{run} %t.out #include -#include using namespace sycl; using namespace sycl::access; diff --git a/sycl/test-e2e/Basic/out_of_order_queue_status.cpp b/sycl/test-e2e/Basic/out_of_order_queue_status.cpp index a7f6319892bc..67929ac0fc66 100644 --- a/sycl/test-e2e/Basic/out_of_order_queue_status.cpp +++ b/sycl/test-e2e/Basic/out_of_order_queue_status.cpp @@ -5,7 +5,6 @@ // queue. #include -#include #include #include diff --git a/sycl/test-e2e/Basic/profile_host_task.cpp b/sycl/test-e2e/Basic/profile_host_task.cpp index 61fa74539739..d3512fc13d55 100644 --- a/sycl/test-e2e/Basic/profile_host_task.cpp +++ b/sycl/test-e2e/Basic/profile_host_task.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include int main() { diff --git a/sycl/test-e2e/Basic/sycl_2020_images/host_task_sampled_image_read_linear.cpp b/sycl/test-e2e/Basic/sycl_2020_images/host_task_sampled_image_read_linear.cpp index 039d1a008b45..bb87ffeb443a 100644 --- a/sycl/test-e2e/Basic/sycl_2020_images/host_task_sampled_image_read_linear.cpp +++ b/sycl/test-e2e/Basic/sycl_2020_images/host_task_sampled_image_read_linear.cpp @@ -7,8 +7,6 @@ #include "common.hpp" -#include - constexpr size_t IMAGE_WIDTH = 5; constexpr size_t IMAGE_HEIGHT = 4; constexpr size_t IMAGE_DEPTH = 2; diff --git a/sycl/test-e2e/Basic/sycl_2020_images/host_task_sampled_image_read_nearest.cpp b/sycl/test-e2e/Basic/sycl_2020_images/host_task_sampled_image_read_nearest.cpp index fefd50ca52cd..d69f0db910bf 100644 --- a/sycl/test-e2e/Basic/sycl_2020_images/host_task_sampled_image_read_nearest.cpp +++ b/sycl/test-e2e/Basic/sycl_2020_images/host_task_sampled_image_read_nearest.cpp @@ -7,8 +7,6 @@ #include "common.hpp" -#include - constexpr size_t IMAGE_WIDTH = 5; constexpr size_t IMAGE_HEIGHT = 4; constexpr size_t IMAGE_DEPTH = 2; diff --git a/sycl/test-e2e/Basic/sycl_2020_images/host_task_unsampled_image_read.cpp b/sycl/test-e2e/Basic/sycl_2020_images/host_task_unsampled_image_read.cpp index c2121346a50b..99774f29cc83 100644 --- a/sycl/test-e2e/Basic/sycl_2020_images/host_task_unsampled_image_read.cpp +++ b/sycl/test-e2e/Basic/sycl_2020_images/host_task_unsampled_image_read.cpp @@ -6,8 +6,6 @@ #include "common.hpp" -#include - constexpr size_t IMAGE_WIDTH = 5; constexpr size_t IMAGE_HEIGHT = 4; constexpr size_t IMAGE_DEPTH = 2; diff --git a/sycl/test-e2e/Basic/sycl_2020_images/host_task_unsampled_image_write.cpp b/sycl/test-e2e/Basic/sycl_2020_images/host_task_unsampled_image_write.cpp index 14bbca6d0411..2fd1f31026e1 100644 --- a/sycl/test-e2e/Basic/sycl_2020_images/host_task_unsampled_image_write.cpp +++ b/sycl/test-e2e/Basic/sycl_2020_images/host_task_unsampled_image_write.cpp @@ -6,8 +6,6 @@ #include "common.hpp" -#include - constexpr size_t IMAGE_WIDTH = 5; constexpr size_t IMAGE_HEIGHT = 4; constexpr size_t IMAGE_DEPTH = 2; diff --git a/sycl/test-e2e/Graph/Inputs/host_task.cpp b/sycl/test-e2e/Graph/Inputs/host_task.cpp index 8d1c976b1acb..8ec19d770dbc 100644 --- a/sycl/test-e2e/Graph/Inputs/host_task.cpp +++ b/sycl/test-e2e/Graph/Inputs/host_task.cpp @@ -2,8 +2,6 @@ #include "../graph_common.hpp" -#include - int main() { queue Queue{}; diff --git a/sycl/test-e2e/Graph/Inputs/host_task2.cpp b/sycl/test-e2e/Graph/Inputs/host_task2.cpp index e49d97f36c85..6fddddf489b3 100644 --- a/sycl/test-e2e/Graph/Inputs/host_task2.cpp +++ b/sycl/test-e2e/Graph/Inputs/host_task2.cpp @@ -2,8 +2,6 @@ #include "../graph_common.hpp" -#include - int main() { queue Queue{}; diff --git a/sycl/test-e2e/Graph/Inputs/host_task2_multiple_roots.cpp b/sycl/test-e2e/Graph/Inputs/host_task2_multiple_roots.cpp index 369b51cbdb9c..c4d03e3be9e4 100644 --- a/sycl/test-e2e/Graph/Inputs/host_task2_multiple_roots.cpp +++ b/sycl/test-e2e/Graph/Inputs/host_task2_multiple_roots.cpp @@ -3,8 +3,6 @@ #include "../graph_common.hpp" -#include - int main() { queue Queue{}; diff --git a/sycl/test-e2e/Graph/Inputs/host_task_last.cpp b/sycl/test-e2e/Graph/Inputs/host_task_last.cpp index 572087cb5412..07cc7f1dc61a 100644 --- a/sycl/test-e2e/Graph/Inputs/host_task_last.cpp +++ b/sycl/test-e2e/Graph/Inputs/host_task_last.cpp @@ -2,8 +2,6 @@ #include "../graph_common.hpp" -#include - int main() { queue Queue{}; diff --git a/sycl/test-e2e/Graph/Inputs/host_task_multiple_deps.cpp b/sycl/test-e2e/Graph/Inputs/host_task_multiple_deps.cpp index 8b899cb3bb31..efe084645053 100644 --- a/sycl/test-e2e/Graph/Inputs/host_task_multiple_deps.cpp +++ b/sycl/test-e2e/Graph/Inputs/host_task_multiple_deps.cpp @@ -3,8 +3,6 @@ #include "../graph_common.hpp" -#include - int main() { queue Queue{}; diff --git a/sycl/test-e2e/Graph/Inputs/host_task_multiple_roots.cpp b/sycl/test-e2e/Graph/Inputs/host_task_multiple_roots.cpp index c72fe620f73c..62dc577106e3 100644 --- a/sycl/test-e2e/Graph/Inputs/host_task_multiple_roots.cpp +++ b/sycl/test-e2e/Graph/Inputs/host_task_multiple_roots.cpp @@ -3,8 +3,6 @@ #include "../graph_common.hpp" -#include - int main() { queue Queue{}; diff --git a/sycl/test-e2e/Graph/Inputs/host_task_single.cpp b/sycl/test-e2e/Graph/Inputs/host_task_single.cpp index 28c566f23991..d87d6d1f76e5 100644 --- a/sycl/test-e2e/Graph/Inputs/host_task_single.cpp +++ b/sycl/test-e2e/Graph/Inputs/host_task_single.cpp @@ -2,8 +2,6 @@ #include "../graph_common.hpp" -#include - int main() { queue Queue{}; diff --git a/sycl/test-e2e/Graph/Inputs/host_task_successive.cpp b/sycl/test-e2e/Graph/Inputs/host_task_successive.cpp index fbe94be1be0e..1312ea1074da 100644 --- a/sycl/test-e2e/Graph/Inputs/host_task_successive.cpp +++ b/sycl/test-e2e/Graph/Inputs/host_task_successive.cpp @@ -2,8 +2,6 @@ #include "../graph_common.hpp" -#include - int main() { queue Queue{}; diff --git a/sycl/test-e2e/Graph/Inputs/interop-level-zero-get-native-mem.cpp b/sycl/test-e2e/Graph/Inputs/interop-level-zero-get-native-mem.cpp index 3c22462ac485..56ce712b8d41 100644 --- a/sycl/test-e2e/Graph/Inputs/interop-level-zero-get-native-mem.cpp +++ b/sycl/test-e2e/Graph/Inputs/interop-level-zero-get-native-mem.cpp @@ -9,7 +9,7 @@ // SYCL #include -#include +#include bool is_discrete(const device &Device) { auto ZeDevice = get_native(Device); diff --git a/sycl/test-e2e/Graph/RecordReplay/host_task_in_order.cpp b/sycl/test-e2e/Graph/RecordReplay/host_task_in_order.cpp index e35d175e5205..ac5184124b16 100644 --- a/sycl/test-e2e/Graph/RecordReplay/host_task_in_order.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/host_task_in_order.cpp @@ -12,7 +12,6 @@ #include "../graph_common.hpp" -#include #include int main() { diff --git a/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies.cpp b/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies.cpp index f2260314aa80..17d66d8b2d1d 100644 --- a/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies.cpp @@ -8,7 +8,6 @@ #include "../graph_common.hpp" -#include #include int main() { diff --git a/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies_memcpy.cpp b/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies_memcpy.cpp index ee7b8c215f39..cd19c85add1f 100644 --- a/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies_memcpy.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies_memcpy.cpp @@ -8,7 +8,6 @@ #include "../graph_common.hpp" -#include #include int main() { diff --git a/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies_memset.cpp b/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies_memset.cpp index 1fc8264b018c..ad375fbac43d 100644 --- a/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies_memset.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/in_order_queue_with_host_managed_dependencies_memset.cpp @@ -8,7 +8,6 @@ #include "../graph_common.hpp" -#include #include int main() { diff --git a/sycl/test-e2e/HostInteropTask/host-task-dependency2.cpp b/sycl/test-e2e/HostInteropTask/host-task-dependency2.cpp index c6752de3923d..d1a7f5c419e3 100644 --- a/sycl/test-e2e/HostInteropTask/host-task-dependency2.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task-dependency2.cpp @@ -6,7 +6,6 @@ #include #include -#include using namespace sycl; using namespace sycl::access; diff --git a/sycl/test-e2e/HostInteropTask/host-task-dependency3.cpp b/sycl/test-e2e/HostInteropTask/host-task-dependency3.cpp index 3b4cdf1b53c3..16b308cfc544 100644 --- a/sycl/test-e2e/HostInteropTask/host-task-dependency3.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task-dependency3.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include using namespace sycl; diff --git a/sycl/test-e2e/HostInteropTask/host-task-dependency4.cpp b/sycl/test-e2e/HostInteropTask/host-task-dependency4.cpp index a991182ece57..bb315329195b 100644 --- a/sycl/test-e2e/HostInteropTask/host-task-dependency4.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task-dependency4.cpp @@ -3,7 +3,6 @@ // RUN: %{run} %t.out #include -#include sycl::event submit(sycl::queue &Q, sycl::buffer &B) { return Q.submit([&](sycl::handler &CGH) { diff --git a/sycl/test-e2e/HostInteropTask/host-task-failure.cpp b/sycl/test-e2e/HostInteropTask/host-task-failure.cpp index 7828362fab56..eef7cf70b21b 100644 --- a/sycl/test-e2e/HostInteropTask/host-task-failure.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task-failure.cpp @@ -5,7 +5,6 @@ // UNSUPPORTED: ze_debug && windows #include -#include using namespace sycl; using namespace sycl::access; diff --git a/sycl/test-e2e/HostInteropTask/host-task-two-queues.cpp b/sycl/test-e2e/HostInteropTask/host-task-two-queues.cpp index 741b3dea3dd3..7f384fbc0972 100644 --- a/sycl/test-e2e/HostInteropTask/host-task-two-queues.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task-two-queues.cpp @@ -6,7 +6,6 @@ #include #include -#include #include namespace S = sycl; diff --git a/sycl/test-e2e/HostInteropTask/host-task.cpp b/sycl/test-e2e/HostInteropTask/host-task.cpp index 8dda4a1daaeb..3d6423aa6e0f 100644 --- a/sycl/test-e2e/HostInteropTask/host-task.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include using namespace sycl; diff --git a/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp b/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp index fee893b9f752..3f73fcb420b2 100644 --- a/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp +++ b/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/HostInteropTask/interop-task-cuda.cpp b/sycl/test-e2e/HostInteropTask/interop-task-cuda.cpp index 503fa4a51067..ad4c973019fc 100644 --- a/sycl/test-e2e/HostInteropTask/interop-task-cuda.cpp +++ b/sycl/test-e2e/HostInteropTask/interop-task-cuda.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include diff --git a/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp b/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp index e7b34adf2ccb..05d4edf1d2d1 100644 --- a/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp +++ b/sycl/test-e2e/HostInteropTask/interop-task-hip.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #define __HIP_PLATFORM_AMD__ diff --git a/sycl/test-e2e/HostInteropTask/interop-task.cpp b/sycl/test-e2e/HostInteropTask/interop-task.cpp index aa6606c1cc2f..668affc96a7c 100644 --- a/sycl/test-e2e/HostInteropTask/interop-task.cpp +++ b/sycl/test-e2e/HostInteropTask/interop-task.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include using namespace sycl; using namespace sycl::access; diff --git a/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp b/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp index c1dbbed7b134..7a33cbf7f33c 100644 --- a/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp +++ b/sycl/test-e2e/InOrderEventsExt/get_last_event.cpp @@ -11,7 +11,6 @@ // call to ext_oneapi_set_external_event. #include -#include #include #include diff --git a/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp b/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp index 997a8f582452..0ff035914946 100644 --- a/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp +++ b/sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp @@ -9,7 +9,6 @@ #include #include -#include #include #include diff --git a/sycl/test-e2e/InorderQueue/in_order_usm_host_dependency.cpp b/sycl/test-e2e/InorderQueue/in_order_usm_host_dependency.cpp index 92ccbe5b019e..e8d07332a3c5 100644 --- a/sycl/test-e2e/InorderQueue/in_order_usm_host_dependency.cpp +++ b/sycl/test-e2e/InorderQueue/in_order_usm_host_dependency.cpp @@ -5,7 +5,6 @@ // queue in the case when usm commands and host tasks are interleaved. #include -#include #include #include diff --git a/sycl/test-e2e/KernelFusion/sync_host_task.cpp b/sycl/test-e2e/KernelFusion/sync_host_task.cpp index 40754d4e7a8d..640bd7264146 100644 --- a/sycl/test-e2e/KernelFusion/sync_host_task.cpp +++ b/sycl/test-e2e/KernelFusion/sync_host_task.cpp @@ -8,7 +8,6 @@ // complete_fusion. #include -#include #include #include diff --git a/sycl/test-e2e/Plugin/interop-level-zero-get-native-mem.cpp b/sycl/test-e2e/Plugin/interop-level-zero-get-native-mem.cpp index e6257353d3bd..1f9bfd5b5f23 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero-get-native-mem.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero-get-native-mem.cpp @@ -13,8 +13,8 @@ // SYCL #include "interop-level-zero-buffer-helpers.hpp" #include -#include #include +#include using namespace sycl; diff --git a/sycl/test-e2e/Plugin/interop-level-zero-image-get-native-mem.cpp b/sycl/test-e2e/Plugin/interop-level-zero-image-get-native-mem.cpp index ccf99824f324..cb4b317b629b 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero-image-get-native-mem.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero-image-get-native-mem.cpp @@ -24,9 +24,9 @@ #include #include #include -#include #include #include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/Plugin/interop-opencl.cpp b/sycl/test-e2e/Plugin/interop-opencl.cpp index faa5ae348191..137c2b8a7455 100644 --- a/sycl/test-e2e/Plugin/interop-opencl.cpp +++ b/sycl/test-e2e/Plugin/interop-opencl.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp b/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp index c2d262d106c5..22e742e0d6c2 100644 --- a/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp +++ b/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp @@ -21,7 +21,6 @@ // CHECK-NEXT: : 3 #include -#include int main(int argc, const char **argv) { diff --git a/sycl/test-e2e/USM/host_task.cpp b/sycl/test-e2e/USM/host_task.cpp index 6c30c07b049d..fc6551844e9e 100644 --- a/sycl/test-e2e/USM/host_task.cpp +++ b/sycl/test-e2e/USM/host_task.cpp @@ -9,7 +9,6 @@ // REQUIRES: aspect-usm_shared_allocations #include -#include #include int main() { diff --git a/sycl/test/abi/layout_handler.cpp b/sycl/test/abi/layout_handler.cpp index dc1e3f82edda..0a571110240f 100644 --- a/sycl/test/abi/layout_handler.cpp +++ b/sycl/test/abi/layout_handler.cpp @@ -14,7 +14,7 @@ void foo() { // The order of field declarations and their types are important. // CHECK: 0 | class sycl::handler -// CHECK-NEXT: 0 | class std::shared_ptr MImpl +// CHECK-NEXT: 0 | class std::shared_ptr impl // CHECK-NEXT: 0 | class std::__shared_ptr (base) // CHECK-NEXT: 0 | class std::__shared_ptr_access (base) (empty) // CHECK-NEXT: 0 | element_type * _M_ptr @@ -26,166 +26,58 @@ void foo() { // CHECK-NEXT: 16 | element_type * _M_ptr // CHECK-NEXT: 24 | class std::__shared_count<> _M_refcount // CHECK-NEXT: 24 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi -// CHECK-NEXT: 32 | struct sycl::detail::CG::StorageInitHelper CGData -// CHECK-NEXT: 32 | class std::vector > MArgsStorage -// CHECK-NEXT: 32 | struct std::_Vector_base, class std::allocator > > (base) -// CHECK-NEXT: 32 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl -// CHECK-NEXT: 32 | class std::allocator > (base) (empty) -// CHECK: 32 | pointer _M_start -// CHECK-NEXT: 40 | pointer _M_finish -// CHECK-NEXT: 48 | pointer _M_end_of_storage -// CHECK-NEXT: 56 | class std::vector > MAccStorage -// CHECK-NEXT: 56 | struct std::_Vector_base, class std::allocator > > (base) -// CHECK-NEXT: 56 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl -// CHECK-NEXT: 56 | class std::allocator > (base) (empty) -// CHECK: 56 | pointer _M_start -// CHECK-NEXT: 64 | pointer _M_finish -// CHECK-NEXT: 72 | pointer _M_end_of_storage -// CHECK-NEXT: 80 | class std::vector > MSharedPtrStorage -// CHECK-NEXT: 80 | struct std::_Vector_base, class std::allocator > > (base) -// CHECK-NEXT: 80 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl -// CHECK-NEXT: 80 | class std::allocator > (base) (empty) -// CHECK: 80 | pointer _M_start -// CHECK-NEXT: 88 | pointer _M_finish -// CHECK-NEXT: 96 | pointer _M_end_of_storage -// CHECK-NEXT: 104 | class std::vector MRequirements -// CHECK-NEXT: 104 | struct std::_Vector_base > (base) -// CHECK-NEXT: 104 | struct std::_Vector_base >::_Vector_impl _M_impl -// CHECK-NEXT: 104 | class std::allocator (base) (empty) -// CHECK: 104 | pointer _M_start -// CHECK-NEXT: 112 | pointer _M_finish -// CHECK-NEXT: 120 | pointer _M_end_of_storage -// CHECK-NEXT: 128 | class std::vector > MEvents -// CHECK-NEXT: 128 | struct std::_Vector_base, class std::allocator > > (base) -// CHECK-NEXT: 128 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl -// CHECK-NEXT: 128 | class std::allocator > (base) (empty) -// CHECK: 128 | pointer _M_start -// CHECK-NEXT: 136 | pointer _M_finish -// CHECK-NEXT: 144 | pointer _M_end_of_storage -// CHECK-NEXT: 152 | class std::vector > MLocalAccStorage -// CHECK-NEXT: 152 | struct std::_Vector_base, class std::allocator > > (base) -// CHECK-NEXT: 152 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl -// CHECK-NEXT: 152 | class std::allocator > (base) (empty) -// CHECK: 152 | pointer _M_start -// CHECK-NEXT: 160 | pointer _M_finish -// CHECK-NEXT: 168 | pointer _M_end_of_storage -// CHECK-NEXT: 176 | class std::vector > MStreamStorage -// CHECK-NEXT: 176 | struct std::_Vector_base, class std::allocator > > (base) -// CHECK-NEXT: 176 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl -// CHECK-NEXT: 176 | class std::allocator > (base) (empty) -// CHECK: 176 | pointer _M_start -// CHECK-NEXT: 184 | pointer _M_finish -// CHECK-NEXT: 192 | pointer _M_end_of_storage -// CHECK-NEXT: 200 | class std::vector MArgs -// CHECK-NEXT: 200 | struct std::_Vector_base > (base) -// CHECK-NEXT: 200 | struct std::_Vector_base >::_Vector_impl _M_impl -// CHECK-NEXT: 200 | class std::allocator (base) (empty) -// CHECK: 200 | pointer _M_start -// CHECK-NEXT: 208 | pointer _M_finish -// CHECK-NEXT: 216 | pointer _M_end_of_storage -// CHECK-NEXT: 224 | class std::vector MAssociatedAccesors -// CHECK-NEXT: 224 | struct std::_Vector_base > (base) -// CHECK-NEXT: 224 | struct std::_Vector_base >::_Vector_impl _M_impl -// CHECK-NEXT: 224 | class std::allocator (base) (empty) -// CHECK: 224 | pointer _M_start -// CHECK-NEXT: 232 | pointer _M_finish -// CHECK-NEXT: 240 | pointer _M_end_of_storage -// CHECK-NEXT: 248 | class sycl::detail::NDRDescT MNDRDesc -// CHECK-NEXT: 248 | class sycl::range<3> GlobalSize -// CHECK-NEXT: 248 | class sycl::detail::array<3> (base) -// CHECK-NEXT: 248 | size_t[3] common_array -// CHECK-NEXT: 272 | class sycl::range<3> LocalSize -// CHECK-NEXT: 272 | class sycl::detail::array<3> (base) -// CHECK-NEXT: 272 | size_t[3] common_array -// CHECK-NEXT: 296 | class sycl::id<3> GlobalOffset -// CHECK-NEXT: 296 | class sycl::detail::array<3> (base) -// CHECK-NEXT: 296 | size_t[3] common_array -// CHECK-NEXT: 320 | class sycl::range<3> NumWorkGroups -// CHECK-NEXT: 320 | class sycl::detail::array<3> (base) -// CHECK-NEXT: 320 | size_t[3] common_array -// CHECK-NEXT: 344 | class sycl::range<3> ClusterDimensions -// CHECK-NEXT: 344 | class sycl::detail::array<3> (base) -// CHECK-NEXT: 344 | size_t[3] common_array -// CHECK-NEXT: 368 | size_t Dims -// CHECK-NEXT: 376 | class sycl::detail::string MKernelName -// CHECK-NEXT: 376 | char * str -// CHECK-NEXT: 384 | class std::shared_ptr MKernel -// CHECK-NEXT: 384 | class std::__shared_ptr (base) -// CHECK-NEXT: 384 | class std::__shared_ptr_access (base) (empty) -// CHECK-NEXT: 384 | element_type * _M_ptr -// CHECK-NEXT: 392 | class std::__shared_count<> _M_refcount -// CHECK-NEXT: 392 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi -// CHECK-NEXT: 400 | detail::class CG::CGTYPE MCGType -// CHECK-NEXT: 408 | void * MSrcPtr -// CHECK-NEXT: 416 | void * MDstPtr -// CHECK-NEXT: 424 | size_t MLength -// CHECK-NEXT: 432 | class std::vector MPattern -// CHECK-NEXT: 432 | struct std::_Vector_base > (base) -// CHECK-NEXT: 432 | struct std::_Vector_base >::_Vector_impl _M_impl -// CHECK-NEXT: 432 | class std::allocator (base) (empty) -// CHECK: 432 | pointer _M_start -// CHECK-NEXT: 440 | pointer _M_finish -// CHECK-NEXT: 448 | pointer _M_end_of_storage -// CHECK-NEXT: 456 | class std::unique_ptr MHostKernel -// CHECK: 456 | class std::__uniq_ptr_impl > -// CHECK-NEXT: 456 | class std::tuple > _M_t -// CHECK-NEXT: 456 | struct std::_Tuple_impl<0, class sycl::detail::HostKernelBase *, struct std::default_delete > (base) -// CHECK-NEXT: 456 | struct std::_Tuple_impl<1, struct std::default_delete > (base) (empty) -// CHECK: 456 | struct std::_Head_base<0, class sycl::detail::HostKernelBase *> (base) -// CHECK-NEXT: 456 | class sycl::detail::HostKernelBase * _M_head_impl -// CHECK-NEXT: 464 | class std::unique_ptr MHostTask -// CHECK: 464 | class std::__uniq_ptr_impl > -// CHECK-NEXT: 464 | class std::tuple > _M_t -// CHECK-NEXT: 464 | struct std::_Tuple_impl<0, class sycl::detail::HostTask *, struct std::default_delete > (base) -// CHECK-NEXT: 464 | struct std::_Tuple_impl<1, struct std::default_delete > (base) (empty) -// CHECK: 464 | struct std::_Head_base<0, class sycl::detail::HostTask *> (base) -// CHECK-NEXT: 464 | class sycl::detail::HostTask * _M_head_impl -// CHECK-NEXT: 472 | class std::vector > MEventsWaitWithBarrier -// CHECK-NEXT: 472 | struct std::_Vector_base, class std::allocator > > (base) -// CHECK-NEXT: 472 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl -// CHECK-NEXT: 472 | class std::allocator > (base) (empty) -// CHECK: 472 | pointer _M_start -// CHECK-NEXT: 480 | pointer _M_finish -// CHECK-NEXT: 488 | pointer _M_end_of_storage -// CHECK-NEXT: 496 | class std::shared_ptr MGraph -// CHECK-NEXT: 496 | class std::__shared_ptr (base) -// CHECK-NEXT: 496 | class std::__shared_ptr_access (base) (empty) -// CHECK-NEXT: 496 | element_type * _M_ptr -// CHECK-NEXT: 504 | class std::__shared_count<> _M_refcount -// CHECK-NEXT: 504 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi -// CHECK-NEXT: 512 | class std::shared_ptr MExecGraph -// CHECK-NEXT: 512 | class std::__shared_ptr (base) -// CHECK-NEXT: 512 | class std::__shared_ptr_access (base) (empty) -// CHECK-NEXT: 512 | element_type * _M_ptr -// CHECK-NEXT: 520 | class std::__shared_count<> _M_refcount -// CHECK-NEXT: 520 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi -// CHECK-NEXT: 528 | class std::shared_ptr MSubgraphNode -// CHECK-NEXT: 528 | class std::__shared_ptr (base) -// CHECK-NEXT: 528 | class std::__shared_ptr_access (base) (empty) -// CHECK-NEXT: 528 | element_type * _M_ptr -// CHECK-NEXT: 536 | class std::__shared_count<> _M_refcount -// CHECK-NEXT: 536 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi -// CHECK-NEXT: 544 | class std::unique_ptr MGraphNodeCG -// CHECK: 544 | class std::__uniq_ptr_impl > -// CHECK-NEXT: 544 | class std::tuple > _M_t -// CHECK-NEXT: 544 | struct std::_Tuple_impl<0, class sycl::detail::CG *, struct std::default_delete > (base) -// CHECK-NEXT: 544 | struct std::_Tuple_impl<1, struct std::default_delete > (base) (empty) -// CHECK: 544 | struct std::_Head_base<0, class sycl::detail::CG *> (base) -// CHECK-NEXT: 544 | class sycl::detail::CG * _M_head_impl -// CHECK-NEXT: 552 | struct sycl::detail::code_location MCodeLoc -// CHECK-NEXT: 552 | const char * MFileName -// CHECK-NEXT: 560 | const char * MFunctionName -// CHECK-NEXT: 568 | unsigned long MLineNo -// CHECK-NEXT: 576 | unsigned long MColumnNo -// CHECK-NEXT: 584 | _Bool MIsFinalized -// CHECK-NEXT: 592 | class sycl::event MLastEvent -// CHECK-NEXT: 592 | class sycl::detail::OwnerLessBase (base) (empty) -// CHECK-NEXT: 592 | class std::shared_ptr impl -// CHECK-NEXT: 592 | class std::__shared_ptr (base) -// CHECK-NEXT: 592 | class std::__shared_ptr_access (base) (empty) -// CHECK-NEXT: 592 | element_type * _M_ptr -// CHECK-NEXT: 600 | class std::__shared_count<> _M_refcount -// CHECK-NEXT: 600 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi -// CHECK-NEXT: | [sizeof=608, dsize=608, align=8, -// CHECK-NEXT: | nvsize=608, nvalign=8] - +// CHECK-NEXT: 32 | class std::vector > MLocalAccStorage +// CHECK-NEXT: 32 | struct std::_Vector_base, class std::allocator > > (base) +// CHECK-NEXT: 32 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl +// CHECK-NEXT: 32 | class std::allocator > (base) (empty) +// CHECK: 32 | pointer _M_start +// CHECK-NEXT: 40 | pointer _M_finish +// CHECK-NEXT: 48 | pointer _M_end_of_storage +// CHECK-NEXT: 56 | class std::vector > MStreamStorage +// CHECK-NEXT: 56 | struct std::_Vector_base, class std::allocator > > (base) +// CHECK-NEXT: 56 | struct std::_Vector_base, class std::allocator > >::_Vector_impl _M_impl +// CHECK-NEXT: 56 | class std::allocator > (base) (empty) +// CHECK: 56 | pointer _M_start +// CHECK-NEXT: 64 | pointer _M_finish +// CHECK-NEXT: 72 | pointer _M_end_of_storage +// CHECK-NEXT: 80 | class sycl::detail::string MKernelName +// CHECK-NEXT: 80 | char * str +// CHECK-NEXT: 88 | class std::shared_ptr MKernel +// CHECK-NEXT: 88 | class std::__shared_ptr (base) +// CHECK-NEXT: 88 | class std::__shared_ptr_access (base) (empty) +// CHECK-NEXT: 88 | element_type * _M_ptr +// CHECK-NEXT: 96 | class std::__shared_count<> _M_refcount +// CHECK-NEXT: 96 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi +// CHECK-NEXT: 104 | void * MSrcPtr +// CHECK-NEXT: 112 | void * MDstPtr +// CHECK-NEXT: 120 | size_t MLength +// CHECK-NEXT: 128 | class std::vector MPattern +// CHECK-NEXT: 128 | struct std::_Vector_base > (base) +// CHECK-NEXT: 128 | struct std::_Vector_base >::_Vector_impl _M_impl +// CHECK-NEXT: 128 | class std::allocator (base) (empty) +// CHECK: 128 | pointer _M_start +// CHECK-NEXT: 136 | pointer _M_finish +// CHECK-NEXT: 144 | pointer _M_end_of_storage +// CHECK-NEXT: 152 | class std::unique_ptr MHostKernel +// CHECK: 152 | class std::__uniq_ptr_impl > (base) +// CHECK-NEXT: 152 | class std::tuple > _M_t +// CHECK-NEXT: 152 | struct std::_Tuple_impl<0, class sycl::detail::HostKernelBase *, struct std::default_delete > (base) +// CHECK-NEXT: 152 | struct std::_Tuple_impl<1, struct std::default_delete > (base) (empty) +// CHECK: 152 | struct std::_Head_base<0, class sycl::detail::HostKernelBase *> (base) +// CHECK-NEXT: 152 | class sycl::detail::HostKernelBase * _M_head_impl +// CHECK-NEXT: 160 | struct sycl::detail::code_location MCodeLoc +// CHECK-NEXT: 160 | const char * MFileName +// CHECK-NEXT: 168 | const char * MFunctionName +// CHECK-NEXT: 176 | unsigned long MLineNo +// CHECK-NEXT: 184 | unsigned long MColumnNo +// CHECK-NEXT: 192 | _Bool MIsFinalized +// CHECK-NEXT: 200 | class sycl::event MLastEvent +// CHECK-NEXT: 200 | class sycl::detail::OwnerLessBase (base) (empty) +// CHECK-NEXT: 200 | class std::shared_ptr impl +// CHECK-NEXT: 200 | class std::__shared_ptr (base) +// CHECK-NEXT: 200 | class std::__shared_ptr_access (base) (empty) +// CHECK-NEXT: 200 | element_type * _M_ptr +// CHECK-NEXT: 208 | class std::__shared_count<> _M_refcount +// CHECK-NEXT: 208 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi +// CHECK-NEXT: | [sizeof=216, dsize=216, align=8, +// CHECK-NEXT: | nvsize=216, nvalign=8] diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 4b442721a108..d50aa035912c 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3482,8 +3482,12 @@ _ZN4sycl3_V17handler10depends_onERKSt6vectorINS0_5eventESaIS3_EE _ZN4sycl3_V17handler10depends_onERKSt6vectorISt10shared_ptrINS0_6detail10event_implEESaIS6_EE _ZN4sycl3_V17handler10mem_adviseEPKvmi _ZN4sycl3_V17handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmbb +_ZN4sycl3_V17handler11SetHostTaskEOSt8functionIFvNS0_14interop_handleEEE +_ZN4sycl3_V17handler11SetHostTaskEOSt8functionIFvvEE +_ZN4sycl3_V17handler11storeRawArgEPKvm _ZN4sycl3_V17handler12addReductionERKSt10shared_ptrIKvE _ZN4sycl3_V17handler13getKernelNameEv +_ZN4sycl3_V17handler14addAccessorReqESt10shared_ptrINS0_6detail16AccessorImplHostEE _ZN4sycl3_V17handler14setNDRangeUsedEb _ZN4sycl3_V17handler15ext_oneapi_copyENS0_3ext6oneapi12experimental16image_mem_handleENS0_5rangeILi3EEERKNS4_16image_descriptorEPvS7_S7_S7_ _ZN4sycl3_V17handler15ext_oneapi_copyENS0_3ext6oneapi12experimental16image_mem_handleEPvRKNS4_16image_descriptorE @@ -3521,12 +3525,17 @@ _ZN4sycl3_V17handler24ext_oneapi_memcpy2d_implEPvmPKvmmm _ZN4sycl3_V17handler24ext_oneapi_memset2d_implEPvmimm _ZN4sycl3_V17handler24registerDynamicParameterERNS0_3ext6oneapi12experimental6detail22dynamic_parameter_baseEi _ZN4sycl3_V17handler25ext_intel_write_host_pipeENS0_6detail11string_viewEPvmb +_ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEEbi +_ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEENS0_2idILi3EEEi +_ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEES3_NS0_2idILi3EEEi _ZN4sycl3_V17handler26associateWithHandlerCommonESt10shared_ptrINS0_6detail16AccessorImplHostEEi -_ZN4sycl3_V17handler26setKernelUsesClusterLaunchEv +_ZN4sycl3_V17handler27addLifetimeSharedPtrStorageESt10shared_ptrIKvE +_ZN4sycl3_V17handler22setKernelClusterLaunchENS0_5rangeILi3EEEi _ZN4sycl3_V17handler27computeFallbackKernelBoundsEmm _ZN4sycl3_V17handler28extractArgsAndReqsFromLambdaEPcmPKNS0_6detail19kernel_param_desc_tEb _ZN4sycl3_V17handler28memcpyToHostOnlyDeviceGlobalEPKvS3_mbmm _ZN4sycl3_V17handler28setStateExplicitKernelBundleEv +_ZN4sycl3_V17handler28setArgsToAssociatedAccessorsEv _ZN4sycl3_V17handler30memcpyFromHostOnlyDeviceGlobalEPvPKvbmm _ZN4sycl3_V17handler30verifyUsedKernelBundleInternalENS0_6detail11string_viewE _ZN4sycl3_V17handler32verifyDeviceHasProgressGuaranteeENS0_3ext6oneapi12experimental26forward_progress_guaranteeENS4_15execution_scopeES6_ @@ -3534,10 +3543,13 @@ _ZN4sycl3_V17handler34ext_oneapi_wait_external_semaphoreENS0_3ext6oneapi12experi _ZN4sycl3_V17handler34ext_oneapi_wait_external_semaphoreENS0_3ext6oneapi12experimental24interop_semaphore_handleEm _ZN4sycl3_V17handler36ext_oneapi_signal_external_semaphoreENS0_3ext6oneapi12experimental24interop_semaphore_handleE _ZN4sycl3_V17handler36ext_oneapi_signal_external_semaphoreENS0_3ext6oneapi12experimental24interop_semaphore_handleEm +_ZN4sycl3_V17handler6addArgENS0_6detail19kernel_param_kind_tEPvii _ZN4sycl3_V17handler6memcpyEPvPKvm _ZN4sycl3_V17handler6memsetEPvim +_ZN4sycl3_V17handler7setTypeENS0_6detail6CGTypeE _ZN4sycl3_V17handler8finalizeEv _ZN4sycl3_V17handler8prefetchEPKvm +_ZN4sycl3_V17handler9clearArgsEv _ZN4sycl3_V17handler9fill_implEPvPKvmm _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b @@ -3960,8 +3972,10 @@ _ZNK4sycl3_V17handler11eventNeededEv _ZNK4sycl3_V17handler15getCommandGraphEv _ZNK4sycl3_V17handler16getDeviceBackendEv _ZNK4sycl3_V17handler17getContextImplPtrEv +_ZNK4sycl3_V17handler21HasAssociatedAccessorEPNS0_6detail16AccessorImplHostENS0_6access6targetE _ZNK4sycl3_V17handler27isStateExplicitKernelBundleEv _ZNK4sycl3_V17handler30getOrInsertHandlerKernelBundleEb +_ZNK4sycl3_V17handler7getTypeEv _ZNK4sycl3_V17sampler11getPropListEv _ZNK4sycl3_V17sampler18get_filtering_modeEv _ZNK4sycl3_V17sampler19get_addressing_modeEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index d79c06baf7d8..b7c33e3d5943 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -640,6 +640,7 @@ ??_Fqueue@_V1@sycl@@QEAAXXZ ?AccessTargetMask@handler@_V1@sycl@@0HB ?Clear@exception_list@_V1@sycl@@AEAAXXZ +?clearArgs@handler@_V1@sycl@@AEAAXXZ ?DirSep@OSUtil@detail@_V1@sycl@@2QEBDEB ?DisableRangeRounding@handler@_V1@sycl@@AEAA_NXZ ?GDBMethodsAnchor@SampledImageAccessorBaseHost@detail@_V1@sycl@@IEAAXXZ @@ -3695,6 +3696,9 @@ ?addOrReplaceAccessorProperties@buffer_plain@detail@_V1@sycl@@IEAAXAEBVproperty_list@34@@Z ?addReduction@handler@_V1@sycl@@AEAAXAEBV?$shared_ptr@$$CBX@std@@@Z ?addStream@handler@_V1@sycl@@AEAAXAEBV?$shared_ptr@Vstream_impl@detail@_V1@sycl@@@std@@@Z +?addArg@handler@_V1@sycl@@AEAAXW4kernel_param_kind_t@detail@23@PEAXHH@Z +?addLifetimeSharedPtrStorage@handler@_V1@sycl@@AEAAXV?$shared_ptr@$$CBX@std@@@Z +?addAccessorReq@handler@_V1@sycl@@AEAAXV?$shared_ptr@VAccessorImplHost@detail@_V1@sycl@@@std@@@Z ?alignedAlloc@OSUtil@detail@_V1@sycl@@SAPEAX_K0@Z ?alignedFree@OSUtil@detail@_V1@sycl@@SAXPEAX@Z ?aligned_alloc@_V1@sycl@@YAPEAX_K0AEBVdevice@12@AEBVcontext@12@W4alloc@usm@12@AEBUcode_location@detail@12@@Z @@ -3983,7 +3987,7 @@ ?getSize@buffer_plain@detail@_V1@sycl@@IEBA_KXZ ?getSlicePitch@image_plain@detail@_V1@sycl@@IEBA_KXZ ?getStartTime@HostProfilingInfo@detail@_V1@sycl@@QEBA_KXZ -?getType@handler@_V1@sycl@@AEAA?AW4CGTYPE@CG@detail@23@XZ +?getType@handler@_V1@sycl@@AEBA?AW4CGType@detail@23@XZ ?getValueFromDynamicParameter@detail@_V1@sycl@@YAPEAXAEAVdynamic_parameter_base@1experimental@oneapi@ext@23@@Z ?get_access_mode@experimental@oneapi@ext@_V1@sycl@@YA?AW4address_access_mode@12345@PEBX_KAEBVcontext@45@@Z ?get_addressing_mode@sampler@_V1@sycl@@QEBA?AW4addressing_mode@23@XZ @@ -4069,6 +4073,7 @@ ?get_width@stream@_V1@sycl@@QEBA_KXZ ?get_work_item_buffer_size@stream@_V1@sycl@@QEBA_KXZ ?gpu_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z +?HasAssociatedAccessor@handler@_V1@sycl@@AEBA_NPEAVAccessorImplHost@detail@23@W4target@access@23@@Z ?handleRelease@buffer_plain@detail@_V1@sycl@@IEBAXXZ ?has@device@_V1@sycl@@QEBA_NW4aspect@23@@Z ?has@platform@_V1@sycl@@QEBA_NW4aspect@23@@Z @@ -4224,13 +4229,19 @@ ?setHandlerKernelBundle@handler@_V1@sycl@@AEAAXVkernel@23@@Z ?setKernelCacheConfig@handler@_V1@sycl@@AEAAXW4_pi_kernel_cache_config@@@Z ?setKernelIsCooperative@handler@_V1@sycl@@AEAAX_N@Z -?setKernelUsesClusterLaunch@handler@_V1@sycl@@AEAAXXZ ?setLocalAccessorArgHelper@handler@_V1@sycl@@AEAAXHAEAVLocalAccessorBaseHost@detail@23@@Z ?setNDRangeUsed@handler@_V1@sycl@@AEAAX_N@Z ?setStateExplicitKernelBundle@handler@_V1@sycl@@AEAAXXZ ?setStateSpecConstSet@handler@_V1@sycl@@AEAAXXZ -?setType@handler@_V1@sycl@@AEAAXW4CGTYPE@CG@detail@23@@Z ?setUserFacingNodeType@handler@_V1@sycl@@AEAAXW4node_type@experimental@oneapi@ext@23@@Z +?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@H@Z +?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$02@23@H@Z +?setArgsToAssociatedAccessors@handler@_V1@sycl@@AEAAXXZ +?setType@handler@_V1@sycl@@AEAAXW4CGType@detail@23@@Z +?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@_NH@Z +?SetHostTask@handler@_V1@sycl@@AEAAX$$QEAV?$function@$$A6AXVinterop_handle@_V1@sycl@@@Z@std@@@Z +?SetHostTask@handler@_V1@sycl@@AEAAX$$QEAV?$function@$$A6AXXZ@std@@@Z +?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@H@Z ?set_access_mode@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KW4address_access_mode@12345@AEBVcontext@45@@Z ?set_arg@handler@_V1@sycl@@QEAAXH$$QEAVraw_kernel_arg@experimental@oneapi@ext@23@@Z ?set_final_data_internal@buffer_plain@detail@_V1@sycl@@IEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z @@ -4254,6 +4265,7 @@ ?start@HostProfilingInfo@detail@_V1@sycl@@QEAAXXZ ?start_fusion@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEAAXXZ ?storeRawArg@handler@_V1@sycl@@AEAAPEAXAEBVraw_kernel_arg@experimental@oneapi@ext@23@@Z +?storeRawArg@handler@_V1@sycl@@AEAAPEAXPEBX_K@Z ?stringifyErrorCode@detail@_V1@sycl@@YAPEBDH@Z ?submit_impl@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@@Z ?submit_impl@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@@Z diff --git a/sycl/test/abi/symbol_size_alignment.cpp b/sycl/test/abi/symbol_size_alignment.cpp index db310b629192..03ab7022045c 100644 --- a/sycl/test/abi/symbol_size_alignment.cpp +++ b/sycl/test/abi/symbol_size_alignment.cpp @@ -52,9 +52,9 @@ int main() { check(); check(); #ifdef _MSC_VER - check(); + check(); #else - check(); + check(); #endif check, 16, 8>(); check(); diff --git a/sycl/test/abi/vtable.cpp b/sycl/test/abi/vtable.cpp index 8f1766d0960d..f1ff7e02f190 100644 --- a/sycl/test/abi/vtable.cpp +++ b/sycl/test/abi/vtable.cpp @@ -8,30 +8,6 @@ // Changing vtable breaks ABI. If this test fails, please, refer to ABI Policy // Guide for further instructions. -void foo(sycl::detail::HostKernelBase &HKB) { - sycl::detail::NDRDescT Desc; - sycl::detail::HostProfilingInfo HPInfo; - HKB.call(Desc, &HPInfo); -} - -// CHECK: Vtable for 'sycl::detail::HostKernelBase' (6 entries). -// CHECK-NEXT: 0 | offset_to_top (0) -// CHECK-NEXT: 1 | sycl::detail::HostKernelBase RTTI -// CHECK-NEXT: -- (sycl::detail::HostKernelBase, 0) vtable address -- -// CHECK-NEXT: 2 | void sycl::detail::HostKernelBase::call(const NDRDescT &, HostProfilingInfo *) [pure] -// CHECK-NEXT: 3 | char *sycl::detail::HostKernelBase::getPtr() [pure] -// CHECK-NEXT: 4 | sycl::detail::HostKernelBase::~HostKernelBase() [complete] -// CHECK-NEXT: 5 | sycl::detail::HostKernelBase::~HostKernelBase() [deleting] - -void foo(sycl::detail::CG *CG) { delete CG; } -// CHECK: Vtable for 'sycl::detail::CG' (6 entries). -// CHECK-NEXT: 0 | offset_to_top (0) -// CHECK-NEXT: 1 | sycl::detail::CG RTTI -// CHECK-NEXT: -- (sycl::detail::CG, 0) vtable address -- -// CHECK-NEXT: 2 | std::vector> sycl::detail::CG::getAuxiliaryResources() const -// CHECK-NEXT: 3 | void sycl::detail::CG::clearAuxiliaryResources() -// CHECK-NEXT: 4 | sycl::detail::CG::~CG() [complete] - void foo(sycl::detail::PropertyWithDataBase *Prop) { delete Prop; } // CHECK: Vtable for 'sycl::detail::PropertyWithDataBase' (4 entries). // CHECK-NEXT: 0 | offset_to_top (0) diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index 027bf700f548..711073ad7ff5 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -145,9 +145,6 @@ // CHECK-NEXT: ext/oneapi/properties/properties.hpp // CHECK-NEXT: ext/oneapi/experimental/graph.hpp // CHECK-NEXT: handler.hpp -// CHECK-NEXT: detail/cg.hpp -// CHECK-NEXT: kernel.hpp -// CHECK-NEXT: kernel_bundle.hpp // CHECK-NEXT: detail/reduction_forward.hpp // CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp @@ -158,5 +155,7 @@ // CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp // CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp +// CHECK-NEXT: kernel.hpp +// CHECK-NEXT: kernel_bundle.hpp // CHECK-NEXT: feature_test.hpp // CHECK-EMPTY: diff --git a/sycl/unittests/Extensions/CommandGraph/Barrier.cpp b/sycl/unittests/Extensions/CommandGraph/Barrier.cpp index 790956d5aeff..4d3d8532efb4 100644 --- a/sycl/unittests/Extensions/CommandGraph/Barrier.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Barrier.cpp @@ -43,7 +43,7 @@ TEST_F(CommandGraphTest, EnqueueBarrier) { auto Node = Root.lock(); ASSERT_EQ(Node->MSuccessors.size(), 1lu); auto BarrierNode = Node->MSuccessors.front().lock(); - ASSERT_EQ(BarrierNode->MCGType, sycl::detail::CG::Barrier); + ASSERT_EQ(BarrierNode->MCGType, sycl::detail::CGType::Barrier); ASSERT_EQ(GraphImpl->getEventForNode(BarrierNode), sycl::detail::getSyclObjImpl(Barrier)); ASSERT_EQ(BarrierNode->MPredecessors.size(), 3lu); @@ -151,7 +151,7 @@ TEST_F(CommandGraphTest, EnqueueBarrierWaitList) { auto Node = Root.lock(); ASSERT_EQ(Node->MSuccessors.size(), 1lu); auto SuccNode = Node->MSuccessors.front().lock(); - if (SuccNode->MCGType == sycl::detail::CG::Barrier) { + if (SuccNode->MCGType == sycl::detail::CGType::Barrier) { ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), sycl::detail::getSyclObjImpl(Barrier)); ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); @@ -208,7 +208,7 @@ TEST_F(CommandGraphTest, EnqueueBarrierWaitListMultipleQueues) { auto Node = Root.lock(); ASSERT_EQ(Node->MSuccessors.size(), 1lu); auto SuccNode = Node->MSuccessors.front().lock(); - if (SuccNode->MCGType == sycl::detail::CG::Barrier) { + if (SuccNode->MCGType == sycl::detail::CGType::Barrier) { ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), sycl::detail::getSyclObjImpl(Barrier)); ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); @@ -271,14 +271,14 @@ TEST_F(CommandGraphTest, EnqueueMultipleBarrier) { auto Node = Root.lock(); ASSERT_EQ(Node->MSuccessors.size(), 1lu); auto SuccNode = Node->MSuccessors.front().lock(); - if (SuccNode->MCGType == sycl::detail::CG::Barrier) { + if (SuccNode->MCGType == sycl::detail::CGType::Barrier) { ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), sycl::detail::getSyclObjImpl(Barrier1)); ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); ASSERT_EQ(SuccNode->MSuccessors.size(), 3lu); for (auto Succ1 : SuccNode->MSuccessors) { auto SuccBarrier1 = Succ1.lock(); - if (SuccBarrier1->MCGType == sycl::detail::CG::Barrier) { + if (SuccBarrier1->MCGType == sycl::detail::CGType::Barrier) { ASSERT_EQ(GraphImpl->getEventForNode(SuccBarrier1), sycl::detail::getSyclObjImpl(Barrier2)); ASSERT_EQ(SuccBarrier1->MPredecessors.size(), 3lu); @@ -338,7 +338,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithPreviousCommand) { for (auto Root : GraphImpl->MRoots) { auto RootNode = Root.lock(); ASSERT_EQ(RootNode->MSuccessors.size(), 0lu); - ASSERT_TRUE(RootNode->MCGType == sycl::detail::CG::Barrier); + ASSERT_TRUE(RootNode->MCGType == sycl::detail::CGType::Barrier); } } @@ -378,7 +378,7 @@ TEST_F(CommandGraphTest, InOrderQueuesWithBarrier) { ASSERT_EQ(RootNode->MSuccessors.size(), 1lu); auto SuccNode = RootNode->MSuccessors.front().lock(); - ASSERT_TRUE(SuccNode->MCGType == sycl::detail::CG::Barrier); + ASSERT_TRUE(SuccNode->MCGType == sycl::detail::CGType::Barrier); ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); diff --git a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp index 20b95f99a2d1..655bbf0d0587 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp @@ -167,7 +167,7 @@ TEST_F(CommandGraphTest, GetCGCopy) { auto Node2Imp = sycl::detail::getSyclObjImpl(Node2); auto Node2CGCopy = Node2Imp->getCGCopy(); ASSERT_EQ(Node2CGCopy->getType(), Node2Imp->MCGType); - ASSERT_EQ(Node2CGCopy->getType(), sycl::detail::CG::Kernel); + ASSERT_EQ(Node2CGCopy->getType(), sycl::detail::CGType::Kernel); ASSERT_EQ(Node2CGCopy->getType(), Node2Imp->MCommandGroup->getType()); ASSERT_EQ(Node2CGCopy->getAccStorage(), Node2Imp->MCommandGroup->getAccStorage()); diff --git a/sycl/unittests/event/EventDestruction.cpp b/sycl/unittests/event/EventDestruction.cpp index a9381af5dcda..20427b7623cf 100644 --- a/sycl/unittests/event/EventDestruction.cpp +++ b/sycl/unittests/event/EventDestruction.cpp @@ -8,7 +8,6 @@ #include #include -#include #include diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index c2d53e3d8e6d..ae5bca9c4a8b 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -110,6 +110,8 @@ inline pi_result redefinedProgramCreateEAM(pi_context, const void *, size_t, class MockHandler : public sycl::handler { public: + using sycl::handler::impl; + MockHandler(std::shared_ptr Queue) : sycl::handler(Queue, /*CallerNeedsEvent*/ true) {} @@ -117,14 +119,14 @@ class MockHandler : public sycl::handler { auto CGH = static_cast(this); std::unique_ptr CommandGroup; switch (getType()) { - case sycl::detail::CG::Kernel: { + case sycl::detail::CGType::Kernel: { CommandGroup.reset(new sycl::detail::CGExecKernel( - std::move(CGH->MNDRDesc), std::move(CGH->MHostKernel), - std::move(CGH->MKernel), std::move(MImpl->MKernelBundle), - std::move(CGH->CGData), std::move(CGH->MArgs), + std::move(impl->MNDRDesc), std::move(CGH->MHostKernel), + std::move(CGH->MKernel), std::move(impl->MKernelBundle), + std::move(impl->CGData), std::move(impl->MArgs), CGH->MKernelName.c_str(), std::move(CGH->MStreamStorage), - std::move(MImpl->MAuxiliaryResources), CGH->MCGType, {}, - MImpl->MKernelIsCooperative, MImpl->MKernelUsesClusterLaunch, + std::move(impl->MAuxiliaryResources), impl->MCGType, {}, + impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, CGH->MCodeLoc)); break; } @@ -135,6 +137,7 @@ class MockHandler : public sycl::handler { return CommandGroup; } + }; const sycl::detail::KernelArgMask *getKernelArgMaskFromBundle( diff --git a/sycl/unittests/scheduler/Commands.cpp b/sycl/unittests/scheduler/Commands.cpp index bd0df10d1309..2e686a94f67b 100644 --- a/sycl/unittests/scheduler/Commands.cpp +++ b/sycl/unittests/scheduler/Commands.cpp @@ -80,7 +80,7 @@ TEST_F(SchedulerTest, WaitEmptyEventWithBarrier) { for (auto &Arg : InputEventWaitLists) { std::unique_ptr CommandGroup(new detail::CGBarrier( std::move(Arg), detail::CG::StorageInitHelper({}, {}, {}, {}, {}), - detail::CG::CGTYPE::BarrierWaitlist, {})); + detail::CGType::BarrierWaitlist, {})); MS.Scheduler::addCG(std::move(CommandGroup), QueueImpl, /*EventNeeded=*/true); } diff --git a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp index 3f97ffb003ad..66c40572440d 100644 --- a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp +++ b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp @@ -9,6 +9,7 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" #include +#include #include #include #include @@ -31,7 +32,7 @@ class MockQueueImpl : public sycl::detail::queue_impl { // Define type with the only methods called by finalizeHandler class LimitedHandler { public: - LimitedHandler(sycl::detail::CG::CGTYPE CGType, + LimitedHandler(sycl::detail::CGType CGType, std::shared_ptr Queue) : MCGType(CGType), MQueue(Queue) {} @@ -46,17 +47,18 @@ class LimitedHandler { return sycl::detail::createSyclObjFromImpl(NewEvent); } - sycl::detail::CG::CGTYPE getType() { return MCGType; } + sycl::detail::CGType getType() { return MCGType; } - sycl::detail::CG::CGTYPE MCGType; + sycl::detail::CGType MCGType; std::shared_ptr MQueue; + std::shared_ptr impl; }; // Needed to use EXPECT_CALL to verify depends_on that originally appends lst // event as dependency to the new CG class LimitedHandlerSimulation : public LimitedHandler { public: - LimitedHandlerSimulation(sycl::detail::CG::CGTYPE CGType, + LimitedHandlerSimulation(sycl::detail::CGType CGType, std::shared_ptr Queue) : LimitedHandler(CGType, Queue) {} @@ -80,14 +82,14 @@ TEST_F(SchedulerTest, InOrderQueueSyncCheck) { // previous task, this is needed to properly sync blocking & blocked tasks. sycl::event Event; { - LimitedHandlerSimulation MockCGH{detail::CG::CGTYPE::CodeplayHostTask, + LimitedHandlerSimulation MockCGH{detail::CGType::CodeplayHostTask, Queue}; EXPECT_CALL(MockCGH, depends_on(An())) .Times(0); Queue->finalizeHandler(MockCGH, Event); } { - LimitedHandlerSimulation MockCGH{detail::CG::CGTYPE::CodeplayHostTask, + LimitedHandlerSimulation MockCGH{detail::CGType::CodeplayHostTask, Queue}; EXPECT_CALL(MockCGH, depends_on(An())) .Times(1); diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index b98aff0e040d..cc4bfe123e21 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -230,12 +230,12 @@ class MockHandler : public sycl::handler { : sycl::handler(Queue, CallerNeedsEvent) {} // Methods using sycl::handler::addReduction; + using sycl::handler::impl; using sycl::handler::getType; - using sycl::handler::MImpl; + using sycl::handler::setNDRangeDescriptor; - sycl::detail::NDRDescT &getNDRDesc() { return MNDRDesc; } + sycl::detail::NDRDescT &getNDRDesc() { return impl->MNDRDesc; } sycl::detail::code_location &getCodeLoc() { return MCodeLoc; } - sycl::detail::CG::CGTYPE &getCGType() { return MCGType; } std::vector> &getStreamStorage() { return MStreamStorage; } @@ -243,28 +243,30 @@ class MockHandler : public sycl::handler { return MHostKernel; } std::vector> &getArgsStorage() { - return CGData.MArgsStorage; + return impl->CGData.MArgsStorage; } std::vector &getAccStorage() { - return CGData.MAccStorage; + return impl->CGData.MAccStorage; } std::vector> &getSharedPtrStorage() { - return CGData.MSharedPtrStorage; + return impl->CGData.MSharedPtrStorage; } std::vector &getRequirements() { - return CGData.MRequirements; + return impl->CGData.MRequirements; } std::vector &getEvents() { - return CGData.MEvents; + return impl->CGData.MEvents; } - std::vector &getArgs() { return MArgs; } + std::vector &getArgs() { return impl->MArgs; } std::string getKernelName() { return MKernelName.c_str(); } std::shared_ptr &getKernel() { return MKernel; } - std::unique_ptr &getHostTask() { return MHostTask; } + std::shared_ptr &getHostTask() { + return impl->MHostTask; + } std::shared_ptr &getQueue() { return MQueue; } - void setType(sycl::detail::CG::CGTYPE Type) { - static_cast(this)->MCGType = Type; + void setType(sycl::detail::CGType Type) { + impl->MCGType = Type; } template void setNDRangeDesc(sycl::nd_range Range) { - static_cast(this)->MNDRDesc.set(std::move(Range)); + setNDRangeDescriptor(std::move(Range)); } void addStream(const sycl::detail::StreamImplPtr &Stream) { @@ -302,19 +304,19 @@ class MockHandlerCustomFinalize : public MockHandler { getArgsStorage(), getAccStorage(), getSharedPtrStorage(), getRequirements(), getEvents()); switch (getType()) { - case sycl::detail::CG::Kernel: { + case sycl::detail::CGType::Kernel: { CommandGroup.reset(new sycl::detail::CGExecKernel( getNDRDesc(), std::move(getHostKernel()), getKernel(), - std::move(MImpl->MKernelBundle), std::move(CGData), getArgs(), - getKernelName(), getStreamStorage(), MImpl->MAuxiliaryResources, - getCGType(), {}, MImpl->MKernelIsCooperative, - MImpl->MKernelUsesClusterLaunch, getCodeLoc())); + std::move(impl->MKernelBundle), std::move(CGData), getArgs(), + getKernelName(), getStreamStorage(), impl->MAuxiliaryResources, + getType(), {}, impl->MKernelIsCooperative, + impl->MKernelUsesClusterLaunch, getCodeLoc())); break; } - case sycl::detail::CG::CodeplayHostTask: { + case sycl::detail::CGType::CodeplayHostTask: { CommandGroup.reset(new sycl::detail::CGHostTask( std::move(getHostTask()), getQueue(), getQueue()->getContextImplPtr(), - getArgs(), std::move(CGData), getCGType(), getCodeLoc())); + getArgs(), std::move(CGData), getType(), getCodeLoc())); break; } default: diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index c8aac819e1c3..a45a1cb6842c 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -27,16 +27,16 @@ class MockHandlerStreamInit : public MockHandler { std::unique_ptr finalize() { std::unique_ptr CommandGroup; switch (getType()) { - case detail::CG::Kernel: { + case detail::CGType::Kernel: { CommandGroup.reset(new detail::CGExecKernel( getNDRDesc(), std::move(getHostKernel()), getKernel(), - std::move(MImpl->MKernelBundle), + std::move(impl->MKernelBundle), detail::CG::StorageInitHelper(getArgsStorage(), getAccStorage(), getSharedPtrStorage(), getRequirements(), getEvents()), getArgs(), getKernelName(), getStreamStorage(), - std::move(MImpl->MAuxiliaryResources), getCGType(), {}, - MImpl->MKernelIsCooperative, MImpl->MKernelUsesClusterLaunch, + std::move(impl->MAuxiliaryResources), getType(), {}, + impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, getCodeLoc())); break; }