diff --git a/sycl/include/CL/__spirv/spirv_vars.hpp b/sycl/include/CL/__spirv/spirv_vars.hpp index 74670fa0fb557..f8cff7f39590e 100644 --- a/sycl/include/CL/__spirv/spirv_vars.hpp +++ b/sycl/include/CL/__spirv/spirv_vars.hpp @@ -24,6 +24,8 @@ extern "C" const __constant size_t_vec __spirv_BuiltInGlobalOffset; template <> size_t get##POSTFIX<1>() { return __spirv_BuiltIn##POSTFIX.y; } \ template <> size_t get##POSTFIX<2>() { return __spirv_BuiltIn##POSTFIX.z; } +namespace __spirv { + DEFINE_INT_ID_TO_XYZ_CONVERTER(GlobalSize); DEFINE_INT_ID_TO_XYZ_CONVERTER(GlobalInvocationId) DEFINE_INT_ID_TO_XYZ_CONVERTER(WorkgroupSize) @@ -31,6 +33,8 @@ DEFINE_INT_ID_TO_XYZ_CONVERTER(LocalInvocationId) DEFINE_INT_ID_TO_XYZ_CONVERTER(WorkgroupId) DEFINE_INT_ID_TO_XYZ_CONVERTER(GlobalOffset) +} // namespace __spirv + #undef DEFINE_INT_ID_TO_XYZ_CONVERTER extern "C" const __constant uint32_t __spirv_BuiltInSubgroupSize; @@ -40,4 +44,45 @@ extern "C" const __constant uint32_t __spirv_BuiltInNumEnqueuedSubgroups; extern "C" const __constant uint32_t __spirv_BuiltInSubgroupId; extern "C" const __constant uint32_t __spirv_BuiltInSubgroupLocalInvocationId; +#define DEFINE_INIT_SIZES(POSTFIX) \ + \ + template struct InitSizesST##POSTFIX; \ + \ + template struct InitSizesST##POSTFIX<1, DstT> { \ + static void initSize(DstT &Dst) { \ + Dst[0] = get##POSTFIX<0>(); \ + } \ + }; \ + \ + template struct InitSizesST##POSTFIX<2, DstT> { \ + static void initSize(DstT &Dst) { \ + Dst[1] = get##POSTFIX<1>(); \ + InitSizesST##POSTFIX<1, DstT>::initSize(Dst); \ + } \ + }; \ + \ + template struct InitSizesST##POSTFIX<3, DstT> { \ + static void initSize(DstT &Dst) { \ + Dst[2] = get##POSTFIX<2>(); \ + InitSizesST##POSTFIX<2, DstT>::initSize(Dst); \ + } \ + }; \ + \ + template static void init##POSTFIX(DstT &Dst) { \ + InitSizesST##POSTFIX::initSize(Dst); \ + } + +namespace __spirv { + +DEFINE_INIT_SIZES(GlobalSize); +DEFINE_INIT_SIZES(GlobalInvocationId) +DEFINE_INIT_SIZES(WorkgroupSize) +DEFINE_INIT_SIZES(LocalInvocationId) +DEFINE_INIT_SIZES(WorkgroupId) +DEFINE_INIT_SIZES(GlobalOffset) + +} // namespace __spirv + +#undef DEFINE_INIT_SIZES + #endif // __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 10f71aa8c72d6..62ad16e71cd35 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -67,46 +67,9 @@ namespace csd = cl::sycl::detail; template class buffer; namespace detail { -#ifdef __SYCL_DEVICE_ONLY__ - -#define DEFINE_INIT_SIZES(POSTFIX) \ - \ - template struct InitSizesST##POSTFIX; \ - \ - template struct InitSizesST##POSTFIX<1, DstT> { \ - static void initSize(DstT &Dst) { \ - Dst[0] = get##POSTFIX<0>(); \ - } \ - }; \ - \ - template struct InitSizesST##POSTFIX<2, DstT> { \ - static void initSize(DstT &Dst) { \ - Dst[1] = get##POSTFIX<1>(); \ - InitSizesST##POSTFIX<1, DstT>::initSize(Dst); \ - } \ - }; \ - \ - template struct InitSizesST##POSTFIX<3, DstT> { \ - static void initSize(DstT &Dst) { \ - Dst[2] = get##POSTFIX<2>(); \ - InitSizesST##POSTFIX<2, DstT>::initSize(Dst); \ - } \ - }; \ - \ - template static void init##POSTFIX(DstT &Dst) { \ - InitSizesST##POSTFIX::initSize(Dst); \ - } - -DEFINE_INIT_SIZES(GlobalSize); -DEFINE_INIT_SIZES(GlobalInvocationId) -DEFINE_INIT_SIZES(WorkgroupSize) -DEFINE_INIT_SIZES(LocalInvocationId) -DEFINE_INIT_SIZES(WorkgroupId) -DEFINE_INIT_SIZES(GlobalOffset) - -#undef DEFINE_INIT_SIZES - -#endif //__SYCL_DEVICE_ONLY__ +/// This class is the default KernelName template parameter type for kernel +/// invocation APIs such as single_task. +class auto_name {}; class queue_impl; class stream_impl; @@ -129,6 +92,19 @@ decltype(member_ptr_helper(&F::operator())) argument_helper(F); template using lambda_arg_type = decltype(argument_helper(std::declval())); + +/// Helper struct to get a kernel name type based on given \c Name and \c Type +/// types: if \c Name is undefined (is a \c auto_name) then \c Type becomes +/// the \c Name. +template struct get_kernel_name_t { + using name = Name; +}; + +/// Specialization for the case when \c Name is undefined. +template struct get_kernel_name_t { + using name = Type; +}; + } // namespace detail // Objects of the handler class collect information about command group, such as @@ -548,7 +524,7 @@ class handler { KernelType>::type KernelFunc) { id global_id; - detail::initGlobalInvocationId(global_id); + __spirv::initGlobalInvocationId(global_id); KernelFunc(global_id); } @@ -562,8 +538,8 @@ class handler { id global_id; range global_size; - detail::initGlobalInvocationId(global_id); - detail::initGlobalSize(global_size); + __spirv::initGlobalInvocationId(global_id); + __spirv::initGlobalSize(global_size); item Item = detail::Builder::createItem(global_size, global_id); @@ -583,12 +559,12 @@ class handler { id local_id; id global_offset; - detail::initGlobalSize(global_size); - detail::initWorkgroupSize(local_size); - detail::initWorkgroupId(group_id); - detail::initGlobalInvocationId(global_id); - detail::initLocalInvocationId(local_id); - detail::initGlobalOffset(global_offset); + __spirv::initGlobalSize(global_size); + __spirv::initWorkgroupSize(local_size); + __spirv::initWorkgroupId(group_id); + __spirv::initGlobalInvocationId(global_id); + __spirv::initLocalInvocationId(local_id); + __spirv::initGlobalOffset(global_offset); group Group = detail::Builder::createGroup( global_size, local_size, group_id); @@ -631,83 +607,62 @@ class handler { } // single_task version with a kernel represented as a lambda. - template + template void single_task(KernelType KernelFunc) { + using NameT = typename csd::get_kernel_name_t::name; #ifdef __SYCL_DEVICE_ONLY__ - kernel_single_task(KernelFunc); + kernel_single_task(KernelFunc); #else MNDRDesc.set(range<1>{1}); - StoreLambda(KernelFunc); + StoreLambda(KernelFunc); MCGType = detail::CG::KERNEL; #endif } - // single_task version with a kernel represented as a functor. Simply redirect - // to the lambda-based form of invocation, setting kernel name type to the - // functor type. - template - void single_task(KernelFunctorType KernelFunctor) { - single_task(KernelFunctor); - } - // parallel_for version with a kernel represented as a lambda + range that // specifies global size only. - template + template void parallel_for(range NumWorkItems, KernelType KernelFunc) { + using NameT = typename csd::get_kernel_name_t::name; #ifdef __SYCL_DEVICE_ONLY__ - kernel_parallel_for(KernelFunc); + kernel_parallel_for(KernelFunc); #else MNDRDesc.set(std::move(NumWorkItems)); - StoreLambda(std::move(KernelFunc)); + StoreLambda(std::move(KernelFunc)); MCGType = detail::CG::KERNEL; #endif } - // parallel_for version with a kernel represented as a functor + range that - // specifies global size only. Simply redirect to the lambda-based form of - // invocation, setting kernel name type to the functor type. - template - void parallel_for(range NumWorkItems, KernelType KernelFunc) { - parallel_for(NumWorkItems, KernelFunc); - } - // parallel_for version with a kernel represented as a lambda + range and // offset that specify global size and global offset correspondingly. - template + template void parallel_for(range NumWorkItems, id WorkItemOffset, KernelType KernelFunc) { + using NameT = typename csd::get_kernel_name_t::name; #ifdef __SYCL_DEVICE_ONLY__ - kernel_parallel_for(KernelFunc); + kernel_parallel_for(KernelFunc); #else MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset)); - StoreLambda(std::move(KernelFunc)); + StoreLambda(std::move(KernelFunc)); MCGType = detail::CG::KERNEL; #endif } // parallel_for version with a kernel represented as a lambda + nd_range that // specifies global, local sizes and offset. - template + template void parallel_for(nd_range ExecutionRange, KernelType KernelFunc) { + using NameT = typename csd::get_kernel_name_t::name; #ifdef __SYCL_DEVICE_ONLY__ - kernel_parallel_for(KernelFunc); + kernel_parallel_for(KernelFunc); #else MNDRDesc.set(std::move(ExecutionRange)); - StoreLambda(std::move(KernelFunc)); + StoreLambda(std::move(KernelFunc)); MCGType = detail::CG::KERNEL; #endif } - // parallel_for version with a kernel represented as a functor + nd_range that - // specifies global, local sizes and offset. Simply redirect to the - // lambda-based form of invocation, setting kernel name type to the functor - // type. - template - void parallel_for(nd_range ExecutionRange, KernelType KernelFunc) { - parallel_for(ExecutionRange, KernelFunc); - } - // template // void parallel_for_work_group(range numWorkGroups, @@ -773,111 +728,82 @@ class handler { // single_task version which takes two "kernels". One is a lambda which is // used if device, queue is bound to, is host device. Second is a sycl::kernel // which is used otherwise. - template + template void single_task(kernel SyclKernel, KernelType KernelFunc) { + using NameT = typename csd::get_kernel_name_t::name; #ifdef __SYCL_DEVICE_ONLY__ - kernel_single_task(KernelFunc); + kernel_single_task(KernelFunc); #else MNDRDesc.set(range<1>{1}); MSyclKernel = detail::getSyclObjImpl(std::move(SyclKernel)); MCGType = detail::CG::KERNEL; - if (!MIsHost && !lambdaAndKernelHaveEqualName()) + if (!MIsHost && !lambdaAndKernelHaveEqualName()) extractArgsAndReqs(); else - StoreLambda( - std::move(KernelFunc)); + StoreLambda(std::move(KernelFunc)); #endif } - // single_task version which takes two "kernels". One is a functor which is - // used if device, queue is bound to, is host device. Second is a sycl::kernel - // which is used otherwise. Simply redirect to the lambda-based form of - // invocation, setting kernel name type to the functor type. - template - void single_task(kernel SyclKernel, KernelType KernelFunc) { - single_task(SyclKernel, KernelFunc); - } - // parallel_for version which takes two "kernels". One is a lambda which is // used if device, queue is bound to, is host device. Second is a sycl::kernel // which is used otherwise. range argument specifies global size. - template - void parallel_for(range NumWorkItems, kernel SyclKernel, + template + void parallel_for(kernel SyclKernel, range NumWorkItems, KernelType KernelFunc) { + using NameT = typename csd::get_kernel_name_t::name; #ifdef __SYCL_DEVICE_ONLY__ - kernel_parallel_for(KernelFunc); + kernel_parallel_for(KernelFunc); #else MNDRDesc.set(std::move(NumWorkItems)); MSyclKernel = detail::getSyclObjImpl(std::move(SyclKernel)); MCGType = detail::CG::KERNEL; - if (!MIsHost && !lambdaAndKernelHaveEqualName()) + if (!MIsHost && !lambdaAndKernelHaveEqualName()) extractArgsAndReqs(); else - StoreLambda(std::move(KernelFunc)); + StoreLambda(std::move(KernelFunc)); #endif } - // parallel_for version which takes two "kernels". One is a functor which is - // used if device, queue is bound to, is host device. Second is a sycl::kernel - // which is used otherwise. range argument specifies global size. Simply - // redirect to the lambda-based form of invocation, setting kernel name type - // to the functor type. - template - void parallel_for(range NumWorkItems, kernel SyclKernel, - KernelType KernelFunc) { - parallel_for(NumWorkItems, SyclKernel, - KernelFunc); - } - // parallel_for version which takes two "kernels". One is a lambda which is // used if device, queue is bound to, is host device. Second is a sycl::kernel // which is used otherwise. range and id specify global size and offset. - template - void parallel_for(range NumWorkItems, id WorkItemOffset, - kernel SyclKernel, KernelType KernelFunc) { + template + void parallel_for(kernel SyclKernel, range NumWorkItems, + id WorkItemOffset, KernelType KernelFunc) { + using NameT = typename csd::get_kernel_name_t::name; #ifdef __SYCL_DEVICE_ONLY__ - kernel_parallel_for(KernelFunc); + kernel_parallel_for(KernelFunc); #else MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset)); MSyclKernel = detail::getSyclObjImpl(std::move(SyclKernel)); MCGType = detail::CG::KERNEL; - if (!MIsHost && !lambdaAndKernelHaveEqualName()) + if (!MIsHost && !lambdaAndKernelHaveEqualName()) extractArgsAndReqs(); else - StoreLambda(std::move(KernelFunc)); + StoreLambda(std::move(KernelFunc)); #endif } // parallel_for version which takes two "kernels". One is a lambda which is // used if device, queue is bound to, is host device. Second is a sycl::kernel // which is used otherwise. nd_range specifies global, local size and offset. - template - void parallel_for(nd_range NDRange, kernel SyclKernel, + template + void parallel_for(kernel SyclKernel, nd_range NDRange, KernelType KernelFunc) { + using NameT = typename csd::get_kernel_name_t::name; #ifdef __SYCL_DEVICE_ONLY__ - kernel_parallel_for(KernelFunc); + kernel_parallel_for(KernelFunc); #else MNDRDesc.set(std::move(NDRange)); MSyclKernel = detail::getSyclObjImpl(std::move(SyclKernel)); MCGType = detail::CG::KERNEL; - if (!MIsHost && !lambdaAndKernelHaveEqualName()) + if (!MIsHost && !lambdaAndKernelHaveEqualName()) extractArgsAndReqs(); else - StoreLambda(std::move(KernelFunc)); + StoreLambda(std::move(KernelFunc)); #endif } - // parallel_for version which takes two "kernels". One is a functor which is - // used if device, queue is bound to, is host device. Second is a sycl::kernel - // which is used otherwise. nd_range specifies global, local size and offset. - // Simply redirects to the lambda-based form of invocation, setting kernel - // name type to the functor type. - template - void parallel_for(nd_range NDRange, kernel SyclKernel, - KernelType KernelFunc) { - parallel_for(NDRange, SyclKernel, KernelFunc); - } - // template // void parallel_for_work_group(range num_work_groups, kernel diff --git a/sycl/test/kernel-and-program/kernel-and-program.cpp b/sycl/test/kernel-and-program/kernel-and-program.cpp index e2deb4f95f23d..f2dd18c18866e 100644 --- a/sycl/test/kernel-and-program/kernel-and-program.cpp +++ b/sycl/test/kernel-and-program/kernel-and-program.cpp @@ -149,7 +149,7 @@ int main() { q.submit([&](cl::sycl::handler &cgh) { auto acc = buf.get_access(cgh); cgh.parallel_for( - numOfItems, krn, + krn, numOfItems, [=](cl::sycl::id<1> wiID) { acc[wiID] = acc[wiID] + 1; }); }); } @@ -233,7 +233,7 @@ int main() { localAcc(localRange, cgh); cgh.parallel_for( - cl::sycl::nd_range<1>(numOfItems, localRange), krn, + krn, cl::sycl::nd_range<1>(numOfItems, localRange), [=](cl::sycl::nd_item<1> item) { size_t idx = item.get_global_linear_id(); int pos = idx & 1;