diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index cbc13528a1e41..0e21fc46a02e1 100644 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -15,10 +15,10 @@ #include #include #include +#include #include #include #include -#include #include #include #include @@ -797,7 +797,7 @@ class accessor : detail::convertToArrayOfN<3, 1>(BufferRef.get_range()), AccessMode, detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, BufferRef.IsSubBuffer) { - CommandGroupHandler.associateWithHandler(*this); + detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); } #endif @@ -839,7 +839,7 @@ class accessor : detail::convertToArrayOfN<3, 1>(BufferRef.get_range()), AccessMode, detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, BufferRef.IsSubBuffer) { - CommandGroupHandler.associateWithHandler(*this); + detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); } #endif @@ -883,7 +883,7 @@ class accessor : AccessMode, detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, BufferRef.IsSubBuffer) { - CommandGroupHandler.associateWithHandler(*this); + detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); } #endif @@ -1186,7 +1186,10 @@ class accessor( Image, CommandGroupHandler, (detail::getSyclObjImpl(Image))->getElementSize()) { - CommandGroupHandler.associateWithHandler(*this); +#ifndef __SYCL_DEVICE_ONLY__ + detail::associateWithHandler(CommandGroupHandler, this, + access::target::image); +#endif } #ifdef __SYCL_DEVICE_ONLY__ private: @@ -1254,7 +1257,10 @@ class accessor( Image, CommandGroupHandler, (detail::getSyclObjImpl(Image))->getElementSize()) { - CommandGroupHandler.associateWithHandler(*this); +#ifndef __SYCL_DEVICE_ONLY__ + detail::associateWithHandler(CommandGroupHandler, this, + access::target::image_array); +#endif } detail::__image_array_slice__ diff --git a/sycl/include/CL/sycl/detail/buffer_impl.hpp b/sycl/include/CL/sycl/detail/buffer_impl.hpp index 523ae0f783b1b..09783b9d3713f 100644 --- a/sycl/include/CL/sycl/detail/buffer_impl.hpp +++ b/sycl/include/CL/sycl/detail/buffer_impl.hpp @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -32,7 +31,6 @@ template class buffer; -class handler; using buffer_allocator = detail::sycl_memory_object_allocator; diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index 2ef0ff5170b74..685d41bfae2aa 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -8,8 +8,10 @@ #pragma once +#include #include #include +#include #include #include #include @@ -18,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -36,354 +39,10 @@ namespace detail { class queue_impl; } // namespace detail -// Interoperability handler -// -class interop_handler { - // Make accessor class friend to access the detail mem objects - template - friend class accessor; - -public: - using QueueImplPtr = std::shared_ptr; - using ReqToMem = std::pair; - - interop_handler(std::vector MemObjs, QueueImplPtr Queue) - : MQueue(std::move(Queue)), MMemObjs(std::move(MemObjs)) {} - - template - auto get_queue() const -> typename interop::type { - return reinterpret_cast::type>( - GetNativeQueue()); - } - - template - auto get_mem(accessor - Acc) const -> - typename interop>::type { - detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Acc; - return getMemImpl( - detail::getSyclObjImpl(*AccBase).get()); - } - -private: - QueueImplPtr MQueue; - std::vector MMemObjs; - - template - auto getMemImpl(detail::Requirement *Req) const -> typename interop< - BackendName, - accessor>::type { - return (typename interop>::type)GetNativeMem(Req); - } - - __SYCL_EXPORT pi_native_handle GetNativeMem(detail::Requirement *Req) const; - __SYCL_EXPORT pi_native_handle GetNativeQueue() const; -}; - namespace detail { using namespace cl; -// The structure represents kernel argument. -class ArgDesc { -public: - ArgDesc(sycl::detail::kernel_param_kind_t Type, void *Ptr, int Size, - int Index) - : MType(Type), MPtr(Ptr), MSize(Size), MIndex(Index) {} - - sycl::detail::kernel_param_kind_t MType; - void *MPtr; - int MSize; - int MIndex; -}; - -// The structure represents NDRange - global, local sizes, global offset and -// number of dimensions. -class NDRDescT { - // The method initializes all sizes for dimensions greater than the passed one - // to the default values, so they will not affect execution. - void setNDRangeLeftover(int Dims_) { - for (int I = Dims_; I < 3; ++I) { - GlobalSize[I] = 1; - LocalSize[I] = LocalSize[0] ? 1 : 0; - GlobalOffset[I] = 0; - NumWorkGroups[I] = 0; - } - } - -public: - NDRDescT() - : GlobalSize{0, 0, 0}, LocalSize{0, 0, 0}, NumWorkGroups{0, 0, 0} {} - - template void set(sycl::range NumWorkItems) { - for (int I = 0; I < Dims_; ++I) { - GlobalSize[I] = NumWorkItems[I]; - LocalSize[I] = 0; - GlobalOffset[I] = 0; - NumWorkGroups[I] = 0; - } - setNDRangeLeftover(Dims_); - Dims = Dims_; - } - - // Initializes this ND range descriptor with given range of work items and - // offset. - template - void set(sycl::range NumWorkItems, sycl::id Offset) { - for (int I = 0; I < Dims_; ++I) { - GlobalSize[I] = NumWorkItems[I]; - LocalSize[I] = 0; - GlobalOffset[I] = Offset[I]; - NumWorkGroups[I] = 0; - } - setNDRangeLeftover(Dims_); - Dims = Dims_; - } - - template void set(sycl::nd_range ExecutionRange) { - for (int I = 0; I < Dims_; ++I) { - GlobalSize[I] = ExecutionRange.get_global_range()[I]; - LocalSize[I] = ExecutionRange.get_local_range()[I]; - GlobalOffset[I] = ExecutionRange.get_offset()[I]; - NumWorkGroups[I] = 0; - } - setNDRangeLeftover(Dims_); - Dims = Dims_; - } - - void set(int Dims_, sycl::nd_range<3> ExecutionRange) { - for (int I = 0; I < Dims_; ++I) { - GlobalSize[I] = ExecutionRange.get_global_range()[I]; - LocalSize[I] = ExecutionRange.get_local_range()[I]; - GlobalOffset[I] = ExecutionRange.get_offset()[I]; - NumWorkGroups[I] = 0; - } - setNDRangeLeftover(Dims_); - Dims = Dims_; - } - - template void setNumWorkGroups(sycl::range N) { - for (int I = 0; I < Dims_; ++I) { - GlobalSize[I] = 0; - // '0' is a mark to adjust before kernel launch when there is enough info: - LocalSize[I] = 0; - GlobalOffset[I] = 0; - NumWorkGroups[I] = N[I]; - } - setNDRangeLeftover(Dims_); - Dims = Dims_; - } - - sycl::range<3> GlobalSize; - sycl::range<3> LocalSize; - sycl::id<3> GlobalOffset; - /// Number of workgroups, used to record the number of workgroups from the - /// simplest form of parallel_for_work_group. If set, all other fields must be - /// zero - sycl::range<3> NumWorkGroups; - size_t Dims; -}; - -// The pure virtual class aimed to store lambda/functors of any type. -class HostKernelBase { -public: - // The method executes lambda stored using NDRange passed. - virtual void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) = 0; - // Return pointer to the lambda object. - // Used to extract captured variables. - virtual char *getPtr() = 0; - virtual ~HostKernelBase() = default; -}; - -class InteropTask { - std::function MFunc; - -public: - InteropTask(function_class Func) - : MFunc(Func) {} - void call(cl::sycl::interop_handler &h) { MFunc(h); } -}; - -class HostTask { - std::function MHostTask; - -public: - HostTask() : MHostTask([]() {}) {} - HostTask(std::function &&Func) : MHostTask(Func) {} - - void call() { MHostTask(); } -}; - -// Class which stores specific lambda object. -template -class HostKernel : public HostKernelBase { - using IDBuilder = sycl::detail::Builder; - KernelType MKernel; - -public: - HostKernel(KernelType Kernel) : MKernel(Kernel) {} - void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) override { - // adjust ND range for serial host: - NDRDescT AdjustedRange = NDRDesc; - - if (NDRDesc.GlobalSize[0] == 0 && NDRDesc.NumWorkGroups[0] != 0) { - // This is a special case - NDRange information is not complete, only the - // desired number of work groups is set by the user. Choose work group - // size (LocalSize), calculate the missing NDRange characteristics - // needed to invoke the kernel and adjust the NDRange descriptor - // accordingly. For some devices the work group size selection requires - // access to the device's properties, hence such late "adjustment". - range<3> WGsize{1, 1, 1}; // no better alternative for serial host? - AdjustedRange.set(NDRDesc.Dims, - nd_range<3>(NDRDesc.NumWorkGroups * WGsize, WGsize)); - } - // If local size for host is not set explicitly, let's adjust it to 1, - // so nd_range_error for zero local size is not thrown. - if (AdjustedRange.LocalSize[0] == 0) - for (size_t I = 0; I < AdjustedRange.Dims; ++I) - AdjustedRange.LocalSize[I] = 1; - if (HPI) - HPI->start(); - runOnHost(AdjustedRange); - if (HPI) - HPI->end(); - } - - char *getPtr() override { return reinterpret_cast(&MKernel); } - - template - typename std::enable_if::value>::type - runOnHost(const NDRDescT &) { - MKernel(); - } - - template - typename std::enable_if>::value>::type - runOnHost(const NDRDescT &NDRDesc) { - sycl::range Range(InitializedVal::template get<0>()); - for (int I = 0; I < Dims; ++I) - Range[I] = NDRDesc.GlobalSize[I]; - - detail::NDLoop::iterate( - Range, [&](const sycl::id &ID) { MKernel(ID); }); - } - - template - typename std::enable_if< - std::is_same>::value>::type - runOnHost(const NDRDescT &NDRDesc) { - sycl::id ID; - sycl::range Range(InitializedVal::template get<0>()); - for (int I = 0; I < Dims; ++I) - Range[I] = NDRDesc.GlobalSize[I]; - - detail::NDLoop::iterate(Range, [&](const sycl::id ID) { - sycl::item Item = - IDBuilder::createItem(Range, ID); - MKernel(Item); - }); - } - - template - typename std::enable_if< - std::is_same>::value>::type - runOnHost(const NDRDescT &NDRDesc) { - sycl::range Range(InitializedVal::template get<0>()); - sycl::id Offset; - for (int I = 0; I < Dims; ++I) { - Range[I] = NDRDesc.GlobalSize[I]; - Offset[I] = NDRDesc.GlobalOffset[I]; - } - - detail::NDLoop::iterate(Range, [&](const sycl::id &ID) { - sycl::id OffsetID = ID + Offset; - sycl::item Item = - IDBuilder::createItem(Range, OffsetID, Offset); - MKernel(Item); - }); - } - - template - typename std::enable_if>::value>::type - runOnHost(const NDRDescT &NDRDesc) { - sycl::range GroupSize(InitializedVal::template get<0>()); - for (int I = 0; I < Dims; ++I) { - if (NDRDesc.LocalSize[I] == 0 || - NDRDesc.GlobalSize[I] % NDRDesc.LocalSize[I] != 0) - throw sycl::nd_range_error("Invalid local size for global size", - PI_INVALID_WORK_GROUP_SIZE); - GroupSize[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I]; - } - - sycl::range LocalSize(InitializedVal::template get<0>()); - sycl::range GlobalSize( - InitializedVal::template get<0>()); - sycl::id GlobalOffset; - for (int I = 0; I < Dims; ++I) { - GlobalOffset[I] = NDRDesc.GlobalOffset[I]; - LocalSize[I] = NDRDesc.LocalSize[I]; - GlobalSize[I] = NDRDesc.GlobalSize[I]; - } - - detail::NDLoop::iterate(GroupSize, [&](const id &GroupID) { - sycl::group Group = IDBuilder::createGroup( - GlobalSize, LocalSize, GroupSize, GroupID); - - detail::NDLoop::iterate(LocalSize, [&](const id &LocalID) { - id GlobalID = GroupID * LocalSize + LocalID + GlobalOffset; - const sycl::item GlobalItem = - IDBuilder::createItem(GlobalSize, GlobalID, - GlobalOffset); - const sycl::item LocalItem = - IDBuilder::createItem(LocalSize, LocalID); - const sycl::nd_item NDItem = - IDBuilder::createNDItem(GlobalItem, LocalItem, Group); - MKernel(NDItem); - }); - }); - } - - template - enable_if_t>::value> - runOnHost(const NDRDescT &NDRDesc) { - sycl::range NGroups(InitializedVal::template get<0>()); - - for (int I = 0; I < Dims; ++I) { - if (NDRDesc.LocalSize[I] == 0 || - NDRDesc.GlobalSize[I] % NDRDesc.LocalSize[I] != 0) - throw sycl::nd_range_error("Invalid local size for global size", - PI_INVALID_WORK_GROUP_SIZE); - NGroups[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I]; - } - - sycl::range LocalSize(InitializedVal::template get<0>()); - sycl::range GlobalSize( - InitializedVal::template get<0>()); - for (int I = 0; I < Dims; ++I) { - LocalSize[I] = NDRDesc.LocalSize[I]; - GlobalSize[I] = NDRDesc.GlobalSize[I]; - } - detail::NDLoop::iterate(NGroups, [&](const id &GroupID) { - sycl::group Group = - IDBuilder::createGroup(GlobalSize, LocalSize, NGroups, GroupID); - MKernel(Group); - }); - } - - ~HostKernel() = default; -}; - class stream_impl; /// Base class for all types of command groups. class CG { diff --git a/sycl/include/CL/sycl/detail/cg_types.hpp b/sycl/include/CL/sycl/detail/cg_types.hpp new file mode 100644 index 0000000000000..f4df3eeb783e6 --- /dev/null +++ b/sycl/include/CL/sycl/detail/cg_types.hpp @@ -0,0 +1,315 @@ +//==---- cg_types.hpp - Auxiliary types required by command group class ----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { + +// The structure represents kernel argument. +class ArgDesc { +public: + ArgDesc(cl::sycl::detail::kernel_param_kind_t Type, void *Ptr, int Size, + int Index) + : MType(Type), MPtr(Ptr), MSize(Size), MIndex(Index) {} + + cl::sycl::detail::kernel_param_kind_t MType; + void *MPtr; + int MSize; + int MIndex; +}; + +// The structure represents NDRange - global, local sizes, global offset and +// number of dimensions. +class NDRDescT { + // The method initializes all sizes for dimensions greater than the passed one + // to the default values, so they will not affect execution. + void setNDRangeLeftover(int Dims_) { + for (int I = Dims_; I < 3; ++I) { + GlobalSize[I] = 1; + LocalSize[I] = LocalSize[0] ? 1 : 0; + GlobalOffset[I] = 0; + NumWorkGroups[I] = 0; + } + } + +public: + NDRDescT() + : GlobalSize{0, 0, 0}, LocalSize{0, 0, 0}, NumWorkGroups{0, 0, 0} {} + + template void set(sycl::range NumWorkItems) { + for (int I = 0; I < Dims_; ++I) { + GlobalSize[I] = NumWorkItems[I]; + LocalSize[I] = 0; + GlobalOffset[I] = 0; + NumWorkGroups[I] = 0; + } + setNDRangeLeftover(Dims_); + Dims = Dims_; + } + + // Initializes this ND range descriptor with given range of work items and + // offset. + template + void set(sycl::range NumWorkItems, sycl::id Offset) { + for (int I = 0; I < Dims_; ++I) { + GlobalSize[I] = NumWorkItems[I]; + LocalSize[I] = 0; + GlobalOffset[I] = Offset[I]; + NumWorkGroups[I] = 0; + } + setNDRangeLeftover(Dims_); + Dims = Dims_; + } + + template void set(sycl::nd_range ExecutionRange) { + for (int I = 0; I < Dims_; ++I) { + GlobalSize[I] = ExecutionRange.get_global_range()[I]; + LocalSize[I] = ExecutionRange.get_local_range()[I]; + GlobalOffset[I] = ExecutionRange.get_offset()[I]; + NumWorkGroups[I] = 0; + } + setNDRangeLeftover(Dims_); + Dims = Dims_; + } + + void set(int Dims_, sycl::nd_range<3> ExecutionRange) { + for (int I = 0; I < Dims_; ++I) { + GlobalSize[I] = ExecutionRange.get_global_range()[I]; + LocalSize[I] = ExecutionRange.get_local_range()[I]; + GlobalOffset[I] = ExecutionRange.get_offset()[I]; + NumWorkGroups[I] = 0; + } + setNDRangeLeftover(Dims_); + Dims = Dims_; + } + + template void setNumWorkGroups(sycl::range N) { + for (int I = 0; I < Dims_; ++I) { + GlobalSize[I] = 0; + // '0' is a mark to adjust before kernel launch when there is enough info: + LocalSize[I] = 0; + GlobalOffset[I] = 0; + NumWorkGroups[I] = N[I]; + } + setNDRangeLeftover(Dims_); + Dims = Dims_; + } + + sycl::range<3> GlobalSize; + sycl::range<3> LocalSize; + sycl::id<3> GlobalOffset; + /// Number of workgroups, used to record the number of workgroups from the + /// simplest form of parallel_for_work_group. If set, all other fields must be + /// zero + sycl::range<3> NumWorkGroups; + size_t Dims; +}; + +// The pure virtual class aimed to store lambda/functors of any type. +class HostKernelBase { +public: + // The method executes lambda stored using NDRange passed. + virtual void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) = 0; + // Return pointer to the lambda object. + // Used to extract captured variables. + virtual char *getPtr() = 0; + virtual ~HostKernelBase() = default; +}; + +class InteropTask { + std::function MFunc; + +public: + InteropTask(function_class Func) + : MFunc(Func) {} + void call(cl::sycl::interop_handler &h) { MFunc(h); } +}; + +class HostTask { + std::function MHostTask; + +public: + HostTask() : MHostTask([]() {}) {} + HostTask(std::function &&Func) : MHostTask(Func) {} + + void call() { MHostTask(); } +}; + +// Class which stores specific lambda object. +template +class HostKernel : public HostKernelBase { + using IDBuilder = sycl::detail::Builder; + KernelType MKernel; + +public: + HostKernel(KernelType Kernel) : MKernel(Kernel) {} + void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) override { + // adjust ND range for serial host: + NDRDescT AdjustedRange = NDRDesc; + + if (NDRDesc.GlobalSize[0] == 0 && NDRDesc.NumWorkGroups[0] != 0) { + // This is a special case - NDRange information is not complete, only the + // desired number of work groups is set by the user. Choose work group + // size (LocalSize), calculate the missing NDRange characteristics + // needed to invoke the kernel and adjust the NDRange descriptor + // accordingly. For some devices the work group size selection requires + // access to the device's properties, hence such late "adjustment". + range<3> WGsize{1, 1, 1}; // no better alternative for serial host? + AdjustedRange.set(NDRDesc.Dims, + nd_range<3>(NDRDesc.NumWorkGroups * WGsize, WGsize)); + } + // If local size for host is not set explicitly, let's adjust it to 1, + // so nd_range_error for zero local size is not thrown. + if (AdjustedRange.LocalSize[0] == 0) + for (size_t I = 0; I < AdjustedRange.Dims; ++I) + AdjustedRange.LocalSize[I] = 1; + if (HPI) + HPI->start(); + runOnHost(AdjustedRange); + if (HPI) + HPI->end(); + } + + char *getPtr() override { return reinterpret_cast(&MKernel); } + + template + typename std::enable_if::value>::type + runOnHost(const NDRDescT &) { + MKernel(); + } + + template + typename std::enable_if>::value>::type + runOnHost(const NDRDescT &NDRDesc) { + sycl::range Range(InitializedVal::template get<0>()); + for (int I = 0; I < Dims; ++I) + Range[I] = NDRDesc.GlobalSize[I]; + + detail::NDLoop::iterate( + Range, [&](const sycl::id &ID) { MKernel(ID); }); + } + + template + typename std::enable_if< + std::is_same>::value>::type + runOnHost(const NDRDescT &NDRDesc) { + sycl::id ID; + sycl::range Range(InitializedVal::template get<0>()); + for (int I = 0; I < Dims; ++I) + Range[I] = NDRDesc.GlobalSize[I]; + + detail::NDLoop::iterate(Range, [&](const sycl::id ID) { + sycl::item Item = + IDBuilder::createItem(Range, ID); + MKernel(Item); + }); + } + + template + typename std::enable_if< + std::is_same>::value>::type + runOnHost(const NDRDescT &NDRDesc) { + sycl::range Range(InitializedVal::template get<0>()); + sycl::id Offset; + for (int I = 0; I < Dims; ++I) { + Range[I] = NDRDesc.GlobalSize[I]; + Offset[I] = NDRDesc.GlobalOffset[I]; + } + + detail::NDLoop::iterate(Range, [&](const sycl::id &ID) { + sycl::id OffsetID = ID + Offset; + sycl::item Item = + IDBuilder::createItem(Range, OffsetID, Offset); + MKernel(Item); + }); + } + + template + typename std::enable_if>::value>::type + runOnHost(const NDRDescT &NDRDesc) { + sycl::range GroupSize(InitializedVal::template get<0>()); + for (int I = 0; I < Dims; ++I) { + if (NDRDesc.LocalSize[I] == 0 || + NDRDesc.GlobalSize[I] % NDRDesc.LocalSize[I] != 0) + throw sycl::nd_range_error("Invalid local size for global size", + PI_INVALID_WORK_GROUP_SIZE); + GroupSize[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I]; + } + + sycl::range LocalSize(InitializedVal::template get<0>()); + sycl::range GlobalSize( + InitializedVal::template get<0>()); + sycl::id GlobalOffset; + for (int I = 0; I < Dims; ++I) { + GlobalOffset[I] = NDRDesc.GlobalOffset[I]; + LocalSize[I] = NDRDesc.LocalSize[I]; + GlobalSize[I] = NDRDesc.GlobalSize[I]; + } + + detail::NDLoop::iterate(GroupSize, [&](const id &GroupID) { + sycl::group Group = IDBuilder::createGroup( + GlobalSize, LocalSize, GroupSize, GroupID); + + detail::NDLoop::iterate(LocalSize, [&](const id &LocalID) { + id GlobalID = GroupID * LocalSize + LocalID + GlobalOffset; + const sycl::item GlobalItem = + IDBuilder::createItem(GlobalSize, GlobalID, + GlobalOffset); + const sycl::item LocalItem = + IDBuilder::createItem(LocalSize, LocalID); + const sycl::nd_item NDItem = + IDBuilder::createNDItem(GlobalItem, LocalItem, Group); + MKernel(NDItem); + }); + }); + } + + template + enable_if_t>::value> + runOnHost(const NDRDescT &NDRDesc) { + sycl::range NGroups(InitializedVal::template get<0>()); + + for (int I = 0; I < Dims; ++I) { + if (NDRDesc.LocalSize[I] == 0 || + NDRDesc.GlobalSize[I] % NDRDesc.LocalSize[I] != 0) + throw sycl::nd_range_error("Invalid local size for global size", + PI_INVALID_WORK_GROUP_SIZE); + NGroups[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I]; + } + + sycl::range LocalSize(InitializedVal::template get<0>()); + sycl::range GlobalSize( + InitializedVal::template get<0>()); + for (int I = 0; I < Dims; ++I) { + LocalSize[I] = NDRDesc.LocalSize[I]; + GlobalSize[I] = NDRDesc.GlobalSize[I]; + } + detail::NDLoop::iterate(NGroups, [&](const id &GroupID) { + sycl::group Group = + IDBuilder::createGroup(GlobalSize, LocalSize, NGroups, GroupID); + MKernel(Group); + }); + } + + ~HostKernel() = default; +}; + +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/detail/handler_proxy.hpp b/sycl/include/CL/sycl/detail/handler_proxy.hpp new file mode 100644 index 0000000000000..8ed01eb7a7b4d --- /dev/null +++ b/sycl/include/CL/sycl/detail/handler_proxy.hpp @@ -0,0 +1,27 @@ +//==--------- handler_proxy.hpp - Proxy methods to call in handler ---------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { + +class handler; + +namespace detail { + +class AccessorBaseHost; + +__SYCL_EXPORT void associateWithHandler(handler &, AccessorBaseHost *, + access::target); +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index c65eea75bb7ba..1b3daeef659c8 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -9,10 +9,13 @@ #pragma once #include +#include #include #include #include +#include #include +#include #include #include #include @@ -287,10 +290,8 @@ class __SYCL_EXPORT handler { bool is_host() { return MIsHost; } - template - void associateWithHandler(accessor Acc) { - detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Acc; + void associateWithHandler(detail::AccessorBaseHost *AccBase, + access::target AccTarget) { detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); detail::Requirement *Req = AccImpl.get(); // Add accessor to the list of requirements. @@ -711,7 +712,11 @@ class __SYCL_EXPORT handler { void require(accessor Acc) { - associateWithHandler(Acc); +#ifndef __SYCL_DEVICE_ONLY__ + associateWithHandler(&Acc, AccTarget); +#else + (void)Acc; +#endif } /// Registers event dependencies on this command group. @@ -935,8 +940,10 @@ class __SYCL_EXPORT handler { // Copy from RWAcc to some temp memory. handler CopyHandler(QueueCopy, MIsHost); CopyHandler.saveCodeLoc(MCodeLoc); - CopyHandler.associateWithHandler(RWAcc); +#ifndef __SYCL_DEVICE_ONLY__ + CopyHandler.associateWithHandler(&RWAcc, access::target::global_buffer); Redu.associateWithHandler(CopyHandler); +#endif CopyHandler.copy(RWAcc, Redu.getUserAccessor()); MLastEvent = CopyHandler.finalize(); } @@ -1686,6 +1693,10 @@ class __SYCL_EXPORT handler { template friend class intel::detail::reduction_impl; + + friend void detail::associateWithHandler(handler &, + detail::AccessorBaseHost *, + access::target); }; } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/reduction.hpp b/sycl/include/CL/sycl/intel/reduction.hpp index 19fdac7e1b410..baa83fe618615 100644 --- a/sycl/include/CL/sycl/intel/reduction.hpp +++ b/sycl/include/CL/sycl/intel/reduction.hpp @@ -475,9 +475,13 @@ class reduction_impl { /// Associates reduction accessor with the given handler and saves reduction /// buffer so that it is alive until the command group finishes the work. void associateWithHandler(handler &CGH) { +#ifndef __SYCL_DEVICE_ONLY__ if (MUSMBufPtr != nullptr) CGH.addReduction(MUSMBufPtr); - CGH.associateWithHandler(MAcc); + CGH.associateWithHandler(&MAcc, access::target::global_buffer); +#else + (void)CGH; +#endif } accessor + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { + +// Interoperability handler +// +class interop_handler { + // Make accessor class friend to access the detail mem objects + template + friend class accessor; + +public: + using QueueImplPtr = std::shared_ptr; + using ReqToMem = std::pair; + + interop_handler(std::vector MemObjs, QueueImplPtr Queue) + : MQueue(std::move(Queue)), MMemObjs(std::move(MemObjs)) {} + + template + auto get_queue() const -> typename interop::type { + return reinterpret_cast::type>( + GetNativeQueue()); + } + + template + auto get_mem(accessor + Acc) const -> + typename interop>::type { + detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Acc; + return getMemImpl( + detail::getSyclObjImpl(*AccBase).get()); + } + +private: + QueueImplPtr MQueue; + std::vector MMemObjs; + + template + auto getMemImpl(detail::Requirement *Req) const -> typename interop< + BackendName, + accessor>::type { + return (typename interop>::type)GetNativeMem(Req); + } + + __SYCL_EXPORT pi_native_handle GetNativeMem(detail::Requirement *Req) const; + __SYCL_EXPORT pi_native_handle GetNativeQueue() const; +}; + +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/types.hpp b/sycl/include/CL/sycl/types.hpp index 6765e73ca1251..4541d07b0d170 100644 --- a/sycl/include/CL/sycl/types.hpp +++ b/sycl/include/CL/sycl/types.hpp @@ -10,6 +10,8 @@ #pragma once +#include + // Define __NO_EXT_VECTOR_TYPE_ON_HOST__ to avoid using ext_vector_type // extension even if the host compiler supports it. The same can be // accomplished by -D__NO_EXT_VECTOR_TYPE_ON_HOST__ command line option. diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 17ff24db14b00..1a511fcaf3436 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -101,7 +101,6 @@ set(SYCL_SOURCES "detail/builtins_integer.cpp" "detail/builtins_math.cpp" "detail/builtins_relational.cpp" - "detail/cg.cpp" "detail/pi.cpp" "detail/common.cpp" "detail/config.cpp" @@ -112,6 +111,7 @@ set(SYCL_SOURCES "detail/event_impl.cpp" "detail/force_device.cpp" "detail/helpers.cpp" + "detail/handler_proxy.cpp" "detail/image_accessor_util.cpp" "detail/image_impl.cpp" "detail/kernel_impl.cpp" @@ -143,6 +143,7 @@ set(SYCL_SOURCES "function_pointer.cpp" "half_type.cpp" "handler.cpp" + "interop_handler.cpp" "kernel.cpp" "platform.cpp" "program.cpp" diff --git a/sycl/source/detail/handler_proxy.cpp b/sycl/source/detail/handler_proxy.cpp new file mode 100644 index 0000000000000..c294634763b76 --- /dev/null +++ b/sycl/source/detail/handler_proxy.cpp @@ -0,0 +1,24 @@ +//==--------- handler_proxy.cpp - Proxy methods to call in handler ---------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { + +void associateWithHandler(handler &CGH, AccessorBaseHost *Acc, + access::target Target) { + CGH.associateWithHandler(Acc, Target); +} + +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/cg.cpp b/sycl/source/interop_handler.cpp similarity index 77% rename from sycl/source/detail/cg.cpp rename to sycl/source/interop_handler.cpp index a9a226dee277f..f6fb55ad28120 100644 --- a/sycl/source/detail/cg.cpp +++ b/sycl/source/interop_handler.cpp @@ -1,4 +1,4 @@ -//==-------------- cg.cpp --------------------------------------------------==// +//==------- interop_handler.cpp - Argument for codeplay_introp_task --------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,18 +6,10 @@ // //===----------------------------------------------------------------------===// -#include #include -#include #include +#include #include -#include -#include - -#include -#include -#include -#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -41,5 +33,5 @@ pi_native_handle interop_handler::GetNativeMem(detail::Requirement *Req) const { return Handle; } -} // sycl +} // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 8aedee48dd8ec..28c142ac53365 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3234,6 +3234,7 @@ _ZN2cl4sycl6detail18stringifyErrorCodeEi _ZN2cl4sycl6detail19convertChannelOrderE23_pi_image_channel_order _ZN2cl4sycl6detail19convertChannelOrderENS0_19image_channel_orderE _ZN2cl4sycl6detail19getImageElementSizeEhNS0_18image_channel_typeE +_ZN2cl4sycl6detail20associateWithHandlerERNS0_7handlerEPNS1_16AccessorBaseHostENS0_6access6targetE _ZN2cl4sycl6detail20getDeviceFromHandlerERNS0_7handlerE _ZN2cl4sycl6detail22addHostAccessorAndWaitEPNS1_16AccessorImplHostE _ZN2cl4sycl6detail22getImageNumberChannelsENS0_19image_channel_orderE