diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_kernel_properties.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_kernel_properties.asciidoc index 06eb099f475c..3b4e51b462bc 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_kernel_properties.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_kernel_properties.asciidoc @@ -146,13 +146,13 @@ struct device_has_key { template struct property_value...> { using key_t = work_group_size_key; - constexpr size_t operator[](int dim); + constexpr size_t operator[](int dim) const; }; template struct property_value...> { using key_t = work_group_size_hint_key; - constexpr size_t operator[](int dim); + constexpr size_t operator[](int dim) const; }; template @@ -342,6 +342,19 @@ q.parallel_for(range<2>{16, 16}, properties, [=](id<2> i) { }).wait(); ``` +NOTE: It is currently not possible to use the same kernel function in two +commands with different properties. For example, the following will result in an +error at compile-time: + +```c++ + auto kernelFunc = [=](){}; + q.single_task(kernelFunc); + q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::sub_group_size<8>}, + kernelFunc); +``` + == Embedding Properties into a Kernel In other situations it may be useful to embed a kernel's properties directly diff --git a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp new file mode 100644 index 000000000000..6712c91d3ec5 --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp @@ -0,0 +1,195 @@ +//==------- properties.hpp - SYCL properties associated with kernels -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +#include + +namespace sycl { +__SYCL_INLINE_VER_NAMESPACE(_V1) { +namespace ext { +namespace oneapi { +namespace experimental { +namespace detail { +// Trait for checking that all size_t values are non-zero. +template struct AllNonZero { + static inline constexpr bool value = true; +}; +template struct AllNonZero { + static inline constexpr bool value = X > 0 && AllNonZero::value; +}; + +// Simple helpers for containing primitive types as template arguments. +template struct SizeList {}; +template struct CharList {}; + +// Helper for converting characters to a constexpr string. +template struct CharsToStr { + static inline constexpr const char value[] = {Chars..., '\0'}; +}; + +// Helper for converting a list of size_t values to a comma-separated string +// representation. This is done by extracting the digit one-by-one and when +// finishing a value, the parsed result is added to a separate list of +// "parsed" characters with the delimiter. +template +struct SizeListToStrHelper; +template +struct SizeListToStrHelper, CharList, + Chars...> + : SizeListToStrHelper, + CharList, '0' + (Value % 10), + Chars...> {}; +template +struct SizeListToStrHelper, CharList, + Chars...> + : SizeListToStrHelper, + CharList> {}; +template +struct SizeListToStrHelper, CharList, Chars...> + : CharsToStr {}; + +// Converts size_t values to a comma-separated string representation. +template +struct SizeListToStr : SizeListToStrHelper, CharList<>> {}; +} // namespace detail + +struct properties_tag {}; + +struct work_group_size_key { + template + using value_t = property_value...>; +}; + +struct work_group_size_hint_key { + template + using value_t = property_value...>; +}; + +struct sub_group_size_key { + template + using value_t = property_value>; +}; + +template +struct property_value, + std::integral_constant...> { + static_assert( + sizeof...(Dims) + 1 <= 3, + "work_group_size property currently only supports up to three values."); + static_assert(detail::AllNonZero::value, + "work_group_size property must only contain non-zero values."); + + using key_t = work_group_size_key; + + constexpr size_t operator[](int Dim) const { + return std::array{Dim0, Dims...}[Dim]; + } +}; + +template +struct property_value, + std::integral_constant...> { + static_assert(sizeof...(Dims) + 1 <= 3, + "work_group_size_hint property currently " + "only supports up to three values."); + static_assert( + detail::AllNonZero::value, + "work_group_size_hint property must only contain non-zero values."); + + using key_t = work_group_size_hint_key; + + constexpr size_t operator[](int Dim) const { + return std::array{Dim0, Dims...}[Dim]; + } +}; + +template +struct property_value> { + static_assert(Size != 0, + "sub_group_size_key property must contain a non-zero value."); + + using key_t = sub_group_size_key; + using value_t = std::integral_constant; + static constexpr uint32_t value = Size; +}; + +template +inline constexpr work_group_size_key::value_t work_group_size; + +template +inline constexpr work_group_size_hint_key::value_t + work_group_size_hint; + +template +inline constexpr sub_group_size_key::value_t sub_group_size; + +template <> struct is_property_key : std::true_type {}; +template <> +struct is_property_key : std::true_type {}; +template <> struct is_property_key : std::true_type {}; + +namespace detail { +template <> struct PropertyToKind { + static constexpr PropKind Kind = PropKind::WorkGroupSize; +}; +template <> struct PropertyToKind { + static constexpr PropKind Kind = PropKind::WorkGroupSizeHint; +}; +template <> struct PropertyToKind { + static constexpr PropKind Kind = PropKind::SubGroupSize; +}; + +template <> +struct IsCompileTimeProperty : std::true_type {}; +template <> +struct IsCompileTimeProperty : std::true_type {}; +template <> +struct IsCompileTimeProperty : std::true_type {}; + +template +struct PropertyMetaInfo> { + static constexpr const char *name = "sycl-work-group-size"; + static constexpr const char *value = SizeListToStr::value; +}; +template +struct PropertyMetaInfo> { + static constexpr const char *name = "sycl-work-group-size-hint"; + static constexpr const char *value = SizeListToStr::value; +}; +template +struct PropertyMetaInfo> { + static constexpr const char *name = "sycl-sub-group-size"; + static constexpr uint32_t value = Size; +}; + +template +struct HasKernelPropertiesGetMethod : std::false_type {}; + +template +struct HasKernelPropertiesGetMethod< + T, sycl::detail::void_t().get( + std::declval()))>> : std::true_type { + using properties_t = + decltype(std::declval().get(std::declval())); +}; + +} // namespace detail +} // namespace experimental +} // namespace oneapi +} // namespace ext +} // __SYCL_INLINE_VER_NAMESPACE(_V1) +} // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/properties/properties.hpp b/sycl/include/sycl/ext/oneapi/properties/properties.hpp index e979545d4145..7e3eccce5377 100644 --- a/sycl/include/sycl/ext/oneapi/properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/properties.hpp @@ -209,6 +209,20 @@ using empty_properties_t = properties>; // PropertyValueTs is sorted and contains only valid properties. template using properties_t = properties>; + +// Helper for merging two property lists; +template +struct merged_properties; +template +struct merged_properties, + properties_t> { + using type = properties, std::tuple>::type>; +}; +template +using merged_properties_t = + typename merged_properties::type; + } // namespace detail } // namespace experimental } // namespace oneapi diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index 28fa8278c94c..03716ad8c012 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -169,7 +169,11 @@ enum PropKind : uint32_t { ImplementInCSR = 3, LatencyAnchorID = 4, LatencyConstraint = 5, - PropKindSize = 6, + WorkGroupSize = 6, + WorkGroupSizeHint = 7, + SubGroupSize = 8, + // PropKindSize must always be the last value. + PropKindSize = 9, }; // This trait must be specialized for all properties and must have a unique diff --git a/sycl/include/sycl/ext/oneapi/properties/property_utils.hpp b/sycl/include/sycl/ext/oneapi/properties/property_utils.hpp index 27d6a8a287b7..a9022d78733e 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property_utils.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property_utils.hpp @@ -219,6 +219,59 @@ struct SortedAllUnique> SortedAllUnique>, std::false_type> {}; +//****************************************************************************** +// Property merging +//****************************************************************************** + +// Merges two sets of properties, failing if two properties are the same but +// with different values. +// NOTE: This assumes that the properties are in sorted order. +template struct MergeProperties; + +template <> struct MergeProperties, std::tuple<>> { + using type = std::tuple<>; +}; + +template +struct MergeProperties, std::tuple<>> { + using type = std::tuple; +}; + +template +struct MergeProperties, std::tuple> { + using type = std::tuple; +}; + +// Identical properties are allowed, but only one will carry over. +template +struct MergeProperties, + std::tuple> { + using merge_tails = + typename MergeProperties, + std::tuple>::type; + using type = typename PrependTuple::type; +}; + +template +struct MergeProperties, + std::tuple> { + using l_head = GetFirstType; + using r_head = GetFirstType; + static_assert( + PropertyID::value != PropertyID::value, + "Failed to merge property lists due to conflicting properties."); + static constexpr bool left_has_min = + PropertyID::value < PropertyID::value; + using l_split = HeadSplit, left_has_min>; + using r_split = HeadSplit, !left_has_min>; + using min = typename SelectNonVoid::type; + using merge_tails = typename MergeProperties::type; + using type = typename PrependTuple::type; +}; + } // namespace detail } // namespace experimental } // namespace oneapi diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 335daae9904d..6018e98ee5f2 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -18,6 +18,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -41,6 +44,15 @@ #define __SYCL_NONCONST_FUNCTOR__ #endif +// replace _KERNELFUNCPARAM(KernelFunc) with KernelType KernelFunc +// or const KernelType &KernelFunc +#ifdef __SYCL_NONCONST_FUNCTOR__ +#define _KERNELFUNCPARAMTYPE KernelType +#else +#define _KERNELFUNCPARAMTYPE const KernelType & +#endif +#define _KERNELFUNCPARAM(a) _KERNELFUNCPARAMTYPE a + template @@ -134,6 +146,28 @@ template struct NotIntMsg> { }; #endif +// Helper for merging properties with ones defined in an optional kernel functor +// getter. +template +struct GetMergedKernelProperties { + using type = PropertiesT; +}; +template +struct GetMergedKernelProperties< + KernelType, PropertiesT, + std::enable_if_t::value>> { + using get_method_properties = + typename ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod< + KernelType>::properties_t; + static_assert( + ext::oneapi::experimental::is_property_list::value, + "get(sycl::ext::oneapi::experimental::properties_tag) member in kernel " + "functor class must return a valid property list."); + using type = ext::oneapi::experimental::detail::merged_properties_t< + PropertiesT, get_method_properties>; +}; + #if __SYCL_ID_QUERIES_FIT_IN_INT__ template typename detail::enable_if_t::value || @@ -245,26 +279,32 @@ using sycl::detail::queue_impl; /// If we are given sycl::range and not sycl::nd_range we have more freedom in /// how to split the iteration space. -template +template bool reduCGFuncForRange(handler &CGH, KernelType KernelFunc, const range &Range, size_t MaxWGSize, - uint32_t NumConcurrentWorkGroups, Reduction &Redu); + uint32_t NumConcurrentWorkGroups, + PropertiesT Properties, Reduction &Redu); -template +template void reduCGFuncAtomic64(handler &CGH, KernelType KernelFunc, - const nd_range &Range, Reduction &Redu); + const nd_range &Range, PropertiesT Properties, + Reduction &Redu); -template +template void reduCGFunc(handler &CGH, KernelType KernelFunc, - const nd_range &Range, Reduction &Redu); + const nd_range &Range, PropertiesT Properties, + Reduction &Redu); // Kernels with multiple reductions // sycl::nd_range version template + typename PropertiesT, typename... Reductions, size_t... Is> void reduCGFuncMulti(handler &CGH, KernelType KernelFunc, - const nd_range &Range, + const nd_range &Range, PropertiesT Properties, std::tuple &ReduTuple, std::index_sequence); @@ -309,6 +349,7 @@ template std::tuple...> tuple_select_elements(TupleT Tuple, std::index_sequence); +template struct IsReduction; template struct AreAllButLastReductions; template @@ -944,7 +985,9 @@ class __SYCL_EXPORT handler { /// /// \param NumWorkItems is a range defining indexing space. /// \param KernelFunc is a SYCL kernel function. - template + template void parallel_for_lambda_impl(range NumWorkItems, KernelType KernelFunc) { throwIfActionIsCreated(); @@ -957,6 +1000,8 @@ class __SYCL_EXPORT handler { std::is_integral::value && Dims == 1, item, typename TransformUserItemType::type>::type; + // TODO: Properties may change the kernel function, so in order to avoid + // conflicts they should be included in the name. using NameT = typename detail::get_kernel_name_t::name; @@ -1026,7 +1071,8 @@ class __SYCL_EXPORT handler { range AdjustedRange = NumWorkItems; AdjustedRange.set_range_dim0(NewValX); - kernel_parallel_for_wrapper(Wrapper); + kernel_parallel_for_wrapper(Wrapper); #ifndef __SYCL_DEVICE_ONLY__ detail::checkValueRange(AdjustedRange); MNDRDesc.set(std::move(AdjustedRange)); @@ -1040,7 +1086,8 @@ class __SYCL_EXPORT handler { // SYCL_LANGUAGE_VERSION >= 202001 { (void)NumWorkItems; - kernel_parallel_for_wrapper(KernelFunc); + kernel_parallel_for_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ detail::checkValueRange(NumWorkItems); MNDRDesc.set(std::move(NumWorkItems)); @@ -1051,6 +1098,47 @@ class __SYCL_EXPORT handler { } } + /// Defines and invokes a SYCL kernel function for the specified nd_range. + /// + /// The SYCL kernel function is defined as a lambda function or a named + /// function object type and given an id or item for indexing in the indexing + /// space defined by range. + /// If it is a named function object and the function object type is + /// globally visible, there is no need for the developer to provide + /// a kernel name for it. + /// + /// \param ExecutionRange is a ND-range defining global and local sizes as + /// well as offset. + /// \param Properties is the properties. + /// \param KernelFunc is a SYCL kernel function. + template + void parallel_for_impl(nd_range ExecutionRange, PropertiesT Properties, + _KERNELFUNCPARAM(KernelFunc)) { + throwIfActionIsCreated(); + // TODO: Properties may change the kernel function, so in order to avoid + // conflicts they should be included in the name. + using NameT = + typename detail::get_kernel_name_t::name; + verifyUsedKernelBundle(detail::KernelInfo::getName()); + using LambdaArgType = + sycl::detail::lambda_arg_type>; + // If user type is convertible from sycl::item/sycl::nd_item, use + // sycl::item/sycl::nd_item to transport item information + using TransformedArgType = + typename TransformUserItemType::type; + (void)ExecutionRange; + kernel_parallel_for_wrapper(KernelFunc); +#ifndef __SYCL_DEVICE_ONLY__ + detail::checkValueRange(ExecutionRange); + MNDRDesc.set(std::move(ExecutionRange)); + StoreLambda( + std::move(KernelFunc)); + setType(detail::CG::Kernel); +#endif + } + /// Defines and invokes a SYCL kernel function for the specified range. /// /// The SYCL kernel function is defined as SYCL kernel object. The kernel @@ -1069,20 +1157,96 @@ class __SYCL_EXPORT handler { MKernelName = getKernelName(); } + /// Hierarchical kernel invocation method of a kernel defined as a lambda + /// encoding the body of each work-group to launch. + /// + /// Lambda may contain multiple calls to parallel_for_work_item(...) methods + /// representing the execution on each work-item. Launches NumWorkGroups + /// work-groups of runtime-defined size. + /// + /// \param NumWorkGroups is a range describing the number of work-groups in + /// each dimension. + /// \param KernelFunc is a lambda representing kernel. + template + void parallel_for_work_group_lambda_impl(range NumWorkGroups, + _KERNELFUNCPARAM(KernelFunc)) { + throwIfActionIsCreated(); + // TODO: Properties may change the kernel function, so in order to avoid + // conflicts they should be included in the name. + using NameT = + typename detail::get_kernel_name_t::name; + verifyUsedKernelBundle(detail::KernelInfo::getName()); + using LambdaArgType = + sycl::detail::lambda_arg_type>; + (void)NumWorkGroups; + kernel_parallel_for_work_group_wrapper(KernelFunc); +#ifndef __SYCL_DEVICE_ONLY__ + detail::checkValueRange(NumWorkGroups); + MNDRDesc.setNumWorkGroups(NumWorkGroups); + StoreLambda(std::move(KernelFunc)); + setType(detail::CG::Kernel); +#endif // __SYCL_DEVICE_ONLY__ + } + + /// Hierarchical kernel invocation method of a kernel defined as a lambda + /// encoding the body of each work-group to launch. + /// + /// Lambda may contain multiple calls to parallel_for_work_item(...) methods + /// representing the execution on each work-item. Launches NumWorkGroups + /// work-groups of WorkGroupSize size. + /// + /// \param NumWorkGroups is a range describing the number of work-groups in + /// each dimension. + /// \param WorkGroupSize is a range describing the size of work-groups in + /// each dimension. + /// \param KernelFunc is a lambda representing kernel. + template + void parallel_for_work_group_lambda_impl(range NumWorkGroups, + range WorkGroupSize, + _KERNELFUNCPARAM(KernelFunc)) { + throwIfActionIsCreated(); + // TODO: Properties may change the kernel function, so in order to avoid + // conflicts they should be included in the name. + using NameT = + typename detail::get_kernel_name_t::name; + verifyUsedKernelBundle(detail::KernelInfo::getName()); + using LambdaArgType = + sycl::detail::lambda_arg_type>; + (void)NumWorkGroups; + (void)WorkGroupSize; + kernel_parallel_for_work_group_wrapper(KernelFunc); +#ifndef __SYCL_DEVICE_ONLY__ + nd_range ExecRange = + nd_range(NumWorkGroups * WorkGroupSize, WorkGroupSize); + detail::checkValueRange(ExecRange); + MNDRDesc.set(std::move(ExecRange)); + StoreLambda(std::move(KernelFunc)); + setType(detail::CG::Kernel); +#endif // __SYCL_DEVICE_ONLY__ + } + #ifdef SYCL_LANGUAGE_VERSION #define __SYCL_KERNEL_ATTR__ [[clang::sycl_kernel]] #else #define __SYCL_KERNEL_ATTR__ #endif + // NOTE: the name of this function - "kernel_single_task" - is used by the // Front End to determine kernel invocation kind. - template - __SYCL_KERNEL_ATTR__ void -#ifdef __SYCL_NONCONST_FUNCTOR__ - kernel_single_task(KernelType KernelFunc) { -#else - kernel_single_task(const KernelType &KernelFunc) { + template +#ifdef __SYCL_DEVICE_ONLY__ + [[__sycl_detail__::add_ir_attributes_function( + ext::oneapi::experimental::detail::PropertyMetaInfo::name..., + ext::oneapi::experimental::detail::PropertyMetaInfo::value...)]] #endif + __SYCL_KERNEL_ATTR__ void + kernel_single_task(_KERNELFUNCPARAM(KernelFunc)) { #ifdef __SYCL_DEVICE_ONLY__ KernelFunc(); #else @@ -1092,13 +1256,14 @@ class __SYCL_EXPORT handler { // NOTE: the name of this function - "kernel_single_task" - is used by the // Front End to determine kernel invocation kind. - template - __SYCL_KERNEL_ATTR__ void -#ifdef __SYCL_NONCONST_FUNCTOR__ - kernel_single_task(KernelType KernelFunc, kernel_handler KH) { -#else - kernel_single_task(const KernelType &KernelFunc, kernel_handler KH) { + template +#ifdef __SYCL_DEVICE_ONLY__ + [[__sycl_detail__::add_ir_attributes_function( + ext::oneapi::experimental::detail::PropertyMetaInfo::name..., + ext::oneapi::experimental::detail::PropertyMetaInfo::value...)]] #endif + __SYCL_KERNEL_ATTR__ void + kernel_single_task(_KERNELFUNCPARAM(KernelFunc), kernel_handler KH) { #ifdef __SYCL_DEVICE_ONLY__ KernelFunc(KH); #else @@ -1109,13 +1274,15 @@ class __SYCL_EXPORT handler { // NOTE: the name of these functions - "kernel_parallel_for" - are used by the // Front End to determine kernel invocation kind. - template - __SYCL_KERNEL_ATTR__ void -#ifdef __SYCL_NONCONST_FUNCTOR__ - kernel_parallel_for(KernelType KernelFunc) { -#else - kernel_parallel_for(const KernelType &KernelFunc) { + template +#ifdef __SYCL_DEVICE_ONLY__ + [[__sycl_detail__::add_ir_attributes_function( + ext::oneapi::experimental::detail::PropertyMetaInfo::name..., + ext::oneapi::experimental::detail::PropertyMetaInfo::value...)]] #endif + __SYCL_KERNEL_ATTR__ void + kernel_parallel_for(_KERNELFUNCPARAM(KernelFunc)) { #ifdef __SYCL_DEVICE_ONLY__ KernelFunc(detail::Builder::getElement(detail::declptr())); #else @@ -1125,13 +1292,15 @@ class __SYCL_EXPORT handler { // NOTE: the name of these functions - "kernel_parallel_for" - are used by the // Front End to determine kernel invocation kind. - template - __SYCL_KERNEL_ATTR__ void -#ifdef __SYCL_NONCONST_FUNCTOR__ - kernel_parallel_for(KernelType KernelFunc, kernel_handler KH) { -#else - kernel_parallel_for(const KernelType &KernelFunc, kernel_handler KH) { + template +#ifdef __SYCL_DEVICE_ONLY__ + [[__sycl_detail__::add_ir_attributes_function( + ext::oneapi::experimental::detail::PropertyMetaInfo::name..., + ext::oneapi::experimental::detail::PropertyMetaInfo::value...)]] #endif + __SYCL_KERNEL_ATTR__ void + kernel_parallel_for(_KERNELFUNCPARAM(KernelFunc), kernel_handler KH) { #ifdef __SYCL_DEVICE_ONLY__ KernelFunc(detail::Builder::getElement(detail::declptr()), KH); #else @@ -1142,13 +1311,15 @@ class __SYCL_EXPORT handler { // NOTE: the name of this function - "kernel_parallel_for_work_group" - is // used by the Front End to determine kernel invocation kind. - template - __SYCL_KERNEL_ATTR__ void -#ifdef __SYCL_NONCONST_FUNCTOR__ - kernel_parallel_for_work_group(KernelType KernelFunc) { -#else - kernel_parallel_for_work_group(const KernelType &KernelFunc) { + template +#ifdef __SYCL_DEVICE_ONLY__ + [[__sycl_detail__::add_ir_attributes_function( + ext::oneapi::experimental::detail::PropertyMetaInfo::name..., + ext::oneapi::experimental::detail::PropertyMetaInfo::value...)]] #endif + __SYCL_KERNEL_ATTR__ void + kernel_parallel_for_work_group(_KERNELFUNCPARAM(KernelFunc)) { #ifdef __SYCL_DEVICE_ONLY__ KernelFunc(detail::Builder::getElement(detail::declptr())); #else @@ -1158,14 +1329,16 @@ class __SYCL_EXPORT handler { // NOTE: the name of this function - "kernel_parallel_for_work_group" - is // used by the Front End to determine kernel invocation kind. - template + template +#ifdef __SYCL_DEVICE_ONLY__ + [[__sycl_detail__::add_ir_attributes_function( + ext::oneapi::experimental::detail::PropertyMetaInfo::name..., + ext::oneapi::experimental::detail::PropertyMetaInfo::value...)]] +#endif __SYCL_KERNEL_ATTR__ void -#ifdef __SYCL_NONCONST_FUNCTOR__ - kernel_parallel_for_work_group(KernelType KernelFunc, kernel_handler KH) { -#else - kernel_parallel_for_work_group(const KernelType &KernelFunc, + kernel_parallel_for_work_group(_KERNELFUNCPARAM(KernelFunc), kernel_handler KH) { -#endif #ifdef __SYCL_DEVICE_ONLY__ KernelFunc(detail::Builder::getElement(detail::declptr()), KH); #else @@ -1174,6 +1347,90 @@ class __SYCL_EXPORT handler { #endif } + template struct KernelPropertiesUnpacker { + template + static void kernel_single_task_unpack(handler *, _KERNELFUNCPARAMTYPE) {} + + template + static void kernel_single_task_unpack(handler *, _KERNELFUNCPARAMTYPE, + kernel_handler) {} + + template + static void kernel_parallel_for_unpack(handler *, _KERNELFUNCPARAMTYPE) {} + + template + static void kernel_parallel_for_unpack(handler *, _KERNELFUNCPARAMTYPE, + kernel_handler) {} + + template + static void + kernel_parallel_for_work_group_unpack(handler *Caller, + _KERNELFUNCPARAM(KernelFunc)) {} + + template + static void kernel_parallel_for_work_group_unpack(handler *, + _KERNELFUNCPARAMTYPE, + kernel_handler) {} + + // This should always fail but must be dependent to avoid always failing. + // It is defined after the shell members to avoid that they are stripped + // from the class. + static_assert( + ext::oneapi::experimental::is_property_list::value, + "Template type is not a property list."); + }; + + template + struct KernelPropertiesUnpacker< + ext::oneapi::experimental::detail::properties_t> { + template + static void kernel_single_task_unpack(handler *Caller, + _KERNELFUNCPARAM(KernelFunc)) { + Caller->kernel_single_task(KernelFunc); + } + + template + static void kernel_single_task_unpack(handler *Caller, + _KERNELFUNCPARAM(KernelFunc), + kernel_handler KH) { + Caller->kernel_single_task(KernelFunc, + KH); + } + + template + static void kernel_parallel_for_unpack(handler *Caller, + _KERNELFUNCPARAM(KernelFunc)) { + Caller + ->kernel_parallel_for( + KernelFunc); + } + + template + static void kernel_parallel_for_unpack(handler *Caller, + _KERNELFUNCPARAM(KernelFunc), + kernel_handler KH) { + Caller + ->kernel_parallel_for( + KernelFunc, KH); + } + + template + static void + kernel_parallel_for_work_group_unpack(handler *Caller, + _KERNELFUNCPARAM(KernelFunc)) { + Caller->kernel_parallel_for_work_group(KernelFunc); + } + + template + static void kernel_parallel_for_work_group_unpack( + handler *Caller, _KERNELFUNCPARAM(KernelFunc), kernel_handler KH) { + Caller->kernel_parallel_for_work_group(KernelFunc, + KH); + } + }; + // Wrappers for kernel_*** functions above with and without support of // additional kernel_handler argument. @@ -1182,93 +1439,137 @@ class __SYCL_EXPORT handler { // Wrappers for kernel_single_task(...) - template + template std::enable_if_t::value> -#ifdef __SYCL_NONCONST_FUNCTOR__ - kernel_single_task_wrapper(KernelType KernelFunc) { -#else - kernel_single_task_wrapper(const KernelType &KernelFunc) { -#endif + kernel_single_task_wrapper(_KERNELFUNCPARAM(KernelFunc)) { #ifdef __SYCL_DEVICE_ONLY__ detail::CheckDeviceCopyable(); #endif // __SYCL_DEVICE_ONLY__ kernel_handler KH; - kernel_single_task(KernelFunc, KH); + using MergedPropertiesT = + typename detail::GetMergedKernelProperties::type; + KernelPropertiesUnpacker:: + template kernel_single_task_unpack(this, KernelFunc, KH); } - template + template std::enable_if_t::value> -#ifdef __SYCL_NONCONST_FUNCTOR__ - kernel_single_task_wrapper(KernelType KernelFunc) { -#else - kernel_single_task_wrapper(const KernelType &KernelFunc) { -#endif + kernel_single_task_wrapper(_KERNELFUNCPARAM(KernelFunc)) { #ifdef __SYCL_DEVICE_ONLY__ detail::CheckDeviceCopyable(); #endif // __SYCL_DEVICE_ONLY__ - kernel_single_task(KernelFunc); + using MergedPropertiesT = + typename detail::GetMergedKernelProperties::type; + KernelPropertiesUnpacker:: + template kernel_single_task_unpack(this, KernelFunc); } // Wrappers for kernel_parallel_for(...) - template + template std::enable_if_t< detail::KernelLambdaHasKernelHandlerArgT::value> -#ifdef __SYCL_NONCONST_FUNCTOR__ - kernel_parallel_for_wrapper(KernelType KernelFunc) { -#else - kernel_parallel_for_wrapper(const KernelType &KernelFunc) { -#endif + kernel_parallel_for_wrapper(_KERNELFUNCPARAM(KernelFunc)) { #ifdef __SYCL_DEVICE_ONLY__ detail::CheckDeviceCopyable(); #endif // __SYCL_DEVICE_ONLY__ kernel_handler KH; - kernel_parallel_for(KernelFunc, KH); + using MergedPropertiesT = + typename detail::GetMergedKernelProperties::type; + KernelPropertiesUnpacker:: + template kernel_parallel_for_unpack( + this, KernelFunc, KH); } - template + template std::enable_if_t< !detail::KernelLambdaHasKernelHandlerArgT::value> -#ifdef __SYCL_NONCONST_FUNCTOR__ - kernel_parallel_for_wrapper(KernelType KernelFunc) { -#else - kernel_parallel_for_wrapper(const KernelType &KernelFunc) { -#endif + kernel_parallel_for_wrapper(_KERNELFUNCPARAM(KernelFunc)) { #ifdef __SYCL_DEVICE_ONLY__ detail::CheckDeviceCopyable(); #endif // __SYCL_DEVICE_ONLY__ - kernel_parallel_for(KernelFunc); + using MergedPropertiesT = + typename detail::GetMergedKernelProperties::type; + KernelPropertiesUnpacker:: + template kernel_parallel_for_unpack( + this, KernelFunc); } // Wrappers for kernel_parallel_for_work_group(...) - template + template std::enable_if_t< detail::KernelLambdaHasKernelHandlerArgT::value> -#ifdef __SYCL_NONCONST_FUNCTOR__ - kernel_parallel_for_work_group_wrapper(KernelType KernelFunc) { -#else - kernel_parallel_for_work_group_wrapper(const KernelType &KernelFunc) { -#endif + kernel_parallel_for_work_group_wrapper(_KERNELFUNCPARAM(KernelFunc)) { #ifdef __SYCL_DEVICE_ONLY__ detail::CheckDeviceCopyable(); #endif // __SYCL_DEVICE_ONLY__ kernel_handler KH; - kernel_parallel_for_work_group(KernelFunc, KH); + using MergedPropertiesT = + typename detail::GetMergedKernelProperties::type; + KernelPropertiesUnpacker:: + template kernel_parallel_for_work_group_unpack( + this, KernelFunc, KH); } - template + template std::enable_if_t< !detail::KernelLambdaHasKernelHandlerArgT::value> -#ifdef __SYCL_NONCONST_FUNCTOR__ - kernel_parallel_for_work_group_wrapper(KernelType KernelFunc) { -#else - kernel_parallel_for_work_group_wrapper(const KernelType &KernelFunc) { -#endif + kernel_parallel_for_work_group_wrapper(_KERNELFUNCPARAM(KernelFunc)) { #ifdef __SYCL_DEVICE_ONLY__ detail::CheckDeviceCopyable(); #endif // __SYCL_DEVICE_ONLY__ - kernel_parallel_for_work_group(KernelFunc); + using MergedPropertiesT = + typename detail::GetMergedKernelProperties::type; + KernelPropertiesUnpacker:: + template kernel_parallel_for_work_group_unpack( + this, KernelFunc); + } + + /// Defines and invokes a SYCL kernel function as a function object type. + /// + /// If it is a named function object and the function object type is + /// globally visible, there is no need for the developer to provide + /// a kernel name for it. + /// + /// \param KernelFunc is a SYCL kernel function. + template + void single_task_lambda_impl(_KERNELFUNCPARAM(KernelFunc)) { + throwIfActionIsCreated(); + // TODO: Properties may change the kernel function, so in order to avoid + // conflicts they should be included in the name. + using NameT = + typename detail::get_kernel_name_t::name; + verifyUsedKernelBundle(detail::KernelInfo::getName()); + kernel_single_task_wrapper(KernelFunc); +#ifndef __SYCL_DEVICE_ONLY__ + // No need to check if range is out of INT_MAX limits as it's compile-time + // known constant. + MNDRDesc.set(range<1>{1}); + + StoreLambda(KernelFunc); + setType(detail::CG::Kernel); +#endif } void setStateExplicitKernelBundle(); @@ -1429,50 +1730,22 @@ class __SYCL_EXPORT handler { /// /// \param KernelFunc is a SYCL kernel function. template -#ifdef __SYCL_NONCONST_FUNCTOR__ - void single_task(KernelType KernelFunc) { -#else - void single_task(const KernelType &KernelFunc) { -#endif - throwIfActionIsCreated(); - using NameT = - typename detail::get_kernel_name_t::name; - verifyUsedKernelBundle(detail::KernelInfo::getName()); - kernel_single_task_wrapper(KernelFunc); -#ifndef __SYCL_DEVICE_ONLY__ - // No need to check if range is out of INT_MAX limits as it's compile-time - // known constant. - MNDRDesc.set(range<1>{1}); - - StoreLambda(KernelFunc); - setType(detail::CG::Kernel); -#endif + void single_task(_KERNELFUNCPARAM(KernelFunc)) { + single_task_lambda_impl(KernelFunc); } template -#ifdef __SYCL_NONCONST_FUNCTOR__ - void parallel_for(range<1> NumWorkItems, KernelType KernelFunc) { -#else - void parallel_for(range<1> NumWorkItems, const KernelType &KernelFunc) { -#endif + void parallel_for(range<1> NumWorkItems, _KERNELFUNCPARAM(KernelFunc)) { parallel_for_lambda_impl(NumWorkItems, std::move(KernelFunc)); } template -#ifdef __SYCL_NONCONST_FUNCTOR__ - void parallel_for(range<2> NumWorkItems, KernelType KernelFunc) { -#else - void parallel_for(range<2> NumWorkItems, const KernelType &KernelFunc) { -#endif + void parallel_for(range<2> NumWorkItems, _KERNELFUNCPARAM(KernelFunc)) { parallel_for_lambda_impl(NumWorkItems, std::move(KernelFunc)); } template -#ifdef __SYCL_NONCONST_FUNCTOR__ - void parallel_for(range<3> NumWorkItems, KernelType KernelFunc) { -#else - void parallel_for(range<3> NumWorkItems, const KernelType &KernelFunc) { -#endif + void parallel_for(range<3> NumWorkItems, _KERNELFUNCPARAM(KernelFunc)) { parallel_for_lambda_impl(NumWorkItems, std::move(KernelFunc)); } @@ -1505,14 +1778,6 @@ class __SYCL_EXPORT handler { host_task_impl(Func); } -// replace _KERNELFUNCPARAM(KernelFunc) with KernelType KernelFunc -// or const KernelType &KernelFunc -#ifdef __SYCL_NONCONST_FUNCTOR__ -#define _KERNELFUNCPARAM(a) KernelType a -#else -#define _KERNELFUNCPARAM(a) const KernelType &a -#endif - /// Defines and invokes a SYCL kernel function for the specified range and /// offset. /// @@ -1558,35 +1823,31 @@ class __SYCL_EXPORT handler { /// /// \param ExecutionRange is a ND-range defining global and local sizes as /// well as offset. - /// \param KernelFunc is a SYCL kernel function. + /// \param Rest any number of reduction variables followed byt a SYCL kernel + /// function. + template + std::enable_if_t::value> + parallel_for(nd_range Range, RestT... Rest) { + parallel_for_impl( + Range, ext::oneapi::experimental::detail::empty_properties_t{}, + Rest...); + } + template - void parallel_for(nd_range ExecutionRange, - _KERNELFUNCPARAM(KernelFunc)) { - throwIfActionIsCreated(); - using NameT = - typename detail::get_kernel_name_t::name; - verifyUsedKernelBundle(detail::KernelInfo::getName()); - using LambdaArgType = - sycl::detail::lambda_arg_type>; - // If user type is convertible from sycl::item/sycl::nd_item, use - // sycl::item/sycl::nd_item to transport item information - using TransformedArgType = - typename TransformUserItemType::type; - (void)ExecutionRange; - kernel_parallel_for_wrapper(KernelFunc); -#ifndef __SYCL_DEVICE_ONLY__ - detail::checkValueRange(ExecutionRange); - MNDRDesc.set(std::move(ExecutionRange)); - StoreLambda( - std::move(KernelFunc)); - setType(detail::CG::Kernel); -#endif + int Dims, typename Reduction> + std::enable_if_t::value> + parallel_for(range Range, Reduction Redu, + _KERNELFUNCPARAM(KernelFunc)) { + parallel_for_impl( + Range, ext::oneapi::experimental::detail::empty_properties_t{}, Redu, + KernelFunc); } // "if constexpr" simplifies implementation/increases readability in comparison // with SFINAE-based approach. #if __cplusplus >= 201703L + /// Defines and invokes a SYCL kernel function for the specified nd_range. /// /// The SYCL kernel function is defined as a lambda function or a named @@ -1595,9 +1856,12 @@ class __SYCL_EXPORT handler { /// The parameter \p Redu contains the object creted by the reduction() /// function and defines the type and operation used in the corresponding /// argument of 'reducer' type passed to lambda/functor function. - template - void parallel_for(range Range, Reduction Redu, + template + std::enable_if_t< + detail::IsReduction::value && + ext::oneapi::experimental::is_property_list::value> + parallel_for_impl(range Range, PropertiesT Properties, Reduction Redu, _KERNELFUNCPARAM(KernelFunc)) { std::shared_ptr QueueCopy = MQueue; @@ -1617,9 +1881,9 @@ class __SYCL_EXPORT handler { // queue/device, while it is safer to use queries to the kernel pre-compiled // for the device. size_t PrefWGSize = detail::reduGetPreferredWGSize(MQueue, OneElemSize); - if (detail::reduCGFuncForRange(*this, KernelFunc, Range, - PrefWGSize, - NumConcurrentWorkGroups, Redu)) { + if (detail::reduCGFuncForRange( + *this, KernelFunc, Range, PrefWGSize, NumConcurrentWorkGroups, + Properties, Redu)) { this->finalize(); MLastEvent = withAuxHandler(QueueCopy, [&](handler &CopyHandler) { detail::reduSaveFinalResultToUserMem(CopyHandler, Redu); @@ -1627,14 +1891,17 @@ class __SYCL_EXPORT handler { } } - template - void parallel_for(nd_range Range, Reduction Redu, - _KERNELFUNCPARAM(KernelFunc)) { + template + std::enable_if_t< + detail::IsReduction::value && + ext::oneapi::experimental::is_property_list::value> + parallel_for_impl(nd_range Range, PropertiesT Properties, + Reduction Redu, _KERNELFUNCPARAM(KernelFunc)) { if constexpr (!Reduction::has_fast_atomics && !Reduction::has_float64_atomics) { // The most basic implementation. - parallel_for_impl(Range, Redu, KernelFunc); + parallel_for_basic_impl(Range, Properties, Redu, KernelFunc); return; } else { // Can't "early" return for "if constexpr". std::shared_ptr QueueCopy = MQueue; @@ -1649,16 +1916,18 @@ class __SYCL_EXPORT handler { if (D.has(aspect::atomic64)) { detail::reduCGFuncAtomic64(*this, KernelFunc, Range, - Redu); + Properties, Redu); } else { // Resort to basic implementation as well. - parallel_for_impl(Range, Redu, KernelFunc); + parallel_for_basic_impl(Range, Properties, Redu, + KernelFunc); return; } } else { // Use fast sycl::atomic operations to update reduction variable at the // end of each work-group work. - detail::reduCGFunc(*this, KernelFunc, Range, Redu); + detail::reduCGFunc(*this, KernelFunc, Range, Properties, + Redu); } // If the reduction variable must be initialized with the identity value // before the kernel run, then an additional working accessor is created, @@ -1678,10 +1947,13 @@ class __SYCL_EXPORT handler { } } - template - void parallel_for_impl(nd_range Range, Reduction Redu, - KernelType KernelFunc) { + template + std::enable_if_t< + detail::IsReduction::value && + ext::oneapi::experimental::is_property_list::value> + parallel_for_basic_impl(nd_range Range, PropertiesT Properties, + Reduction Redu, KernelType KernelFunc) { // This parallel_for() is lowered to the following sequence: // 1) Call a kernel that a) call user's lambda function and b) performs // one iteration of reduction, storing the partial reductions/sums @@ -1717,7 +1989,7 @@ class __SYCL_EXPORT handler { PI_ERROR_INVALID_WORK_GROUP_SIZE); // 1. Call the kernel that includes user's lambda function. - detail::reduCGFunc(*this, KernelFunc, Range, Redu); + detail::reduCGFunc(*this, KernelFunc, Range, Properties, Redu); std::shared_ptr QueueCopy = MQueue; this->finalize(); @@ -1758,11 +2030,14 @@ class __SYCL_EXPORT handler { // This is basically a tree reduction where we re-use user's reduction // variable instead of creating temporary storage for the last iteration // (#WG == 1). - template - std::enable_if_t<(sizeof...(RestT) >= 3 && - detail::AreAllButLastReductions::value)> - parallel_for(nd_range Range, RestT... Rest) { + std::enable_if_t< + (sizeof...(RestT) >= 3 && + detail::AreAllButLastReductions::value && + ext::oneapi::experimental::is_property_list::value)> + parallel_for_impl(nd_range Range, PropertiesT Properties, + RestT... Rest) { std::tuple ArgsTuple(Rest...); constexpr size_t NumArgs = sizeof...(RestT); auto KernelFunc = std::get(ArgsTuple); @@ -1782,8 +2057,8 @@ class __SYCL_EXPORT handler { std::to_string(MaxWGSize), PI_ERROR_INVALID_WORK_GROUP_SIZE); - detail::reduCGFuncMulti(*this, KernelFunc, Range, ReduTuple, - ReduIndices); + detail::reduCGFuncMulti(*this, KernelFunc, Range, Properties, + ReduTuple, ReduIndices); std::shared_ptr QueueCopy = MQueue; this->finalize(); @@ -1811,20 +2086,7 @@ class __SYCL_EXPORT handler { int Dims> void parallel_for_work_group(range NumWorkGroups, _KERNELFUNCPARAM(KernelFunc)) { - throwIfActionIsCreated(); - using NameT = - typename detail::get_kernel_name_t::name; - verifyUsedKernelBundle(detail::KernelInfo::getName()); - using LambdaArgType = - sycl::detail::lambda_arg_type>; - (void)NumWorkGroups; - kernel_parallel_for_work_group_wrapper(KernelFunc); -#ifndef __SYCL_DEVICE_ONLY__ - detail::checkValueRange(NumWorkGroups); - MNDRDesc.setNumWorkGroups(NumWorkGroups); - StoreLambda(std::move(KernelFunc)); - setType(detail::CG::Kernel); -#endif // __SYCL_DEVICE_ONLY__ + parallel_for_work_group_lambda_impl(NumWorkGroups, KernelFunc); } /// Hierarchical kernel invocation method of a kernel defined as a lambda @@ -1844,23 +2106,8 @@ class __SYCL_EXPORT handler { void parallel_for_work_group(range NumWorkGroups, range WorkGroupSize, _KERNELFUNCPARAM(KernelFunc)) { - throwIfActionIsCreated(); - using NameT = - typename detail::get_kernel_name_t::name; - verifyUsedKernelBundle(detail::KernelInfo::getName()); - using LambdaArgType = - sycl::detail::lambda_arg_type>; - (void)NumWorkGroups; - (void)WorkGroupSize; - kernel_parallel_for_work_group_wrapper(KernelFunc); -#ifndef __SYCL_DEVICE_ONLY__ - nd_range ExecRange = - nd_range(NumWorkGroups * WorkGroupSize, WorkGroupSize); - detail::checkValueRange(ExecRange); - MNDRDesc.set(std::move(ExecRange)); - StoreLambda(std::move(KernelFunc)); - setType(detail::CG::Kernel); -#endif // __SYCL_DEVICE_ONLY__ + parallel_for_work_group_lambda_impl(NumWorkGroups, + WorkGroupSize, KernelFunc); } /// Invokes a SYCL kernel. @@ -2168,6 +2415,81 @@ class __SYCL_EXPORT handler { #endif // __SYCL_DEVICE_ONLY__ } + template + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value> + single_task(PropertiesT, _KERNELFUNCPARAM(KernelFunc)) { + single_task_lambda_impl(KernelFunc); + } + + template + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value> + parallel_for(range<1> NumWorkItems, PropertiesT, + _KERNELFUNCPARAM(KernelFunc)) { + parallel_for_lambda_impl( + NumWorkItems, std::move(KernelFunc)); + } + + template + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value> + parallel_for(range<2> NumWorkItems, PropertiesT, + _KERNELFUNCPARAM(KernelFunc)) { + parallel_for_lambda_impl( + NumWorkItems, std::move(KernelFunc)); + } + + template + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value> + parallel_for(range<3> NumWorkItems, PropertiesT, + _KERNELFUNCPARAM(KernelFunc)) { + parallel_for_lambda_impl( + NumWorkItems, std::move(KernelFunc)); + } + + template + std::enable_if_t< + detail::IsReduction::value && + ext::oneapi::experimental::is_property_list::value> + parallel_for(range Range, PropertiesT Properties, Reduction Redu, + _KERNELFUNCPARAM(KernelFunc)) { + parallel_for_impl(Range, Properties, Redu, KernelFunc); + } + + template + std::enable_if_t< + detail::AreAllButLastReductions::value && + ext::oneapi::experimental::is_property_list::value> + parallel_for(nd_range Range, PropertiesT Properties, RestT... Rest) { + parallel_for_impl(Range, Properties, Rest...); + } + + template + void parallel_for_work_group(range NumWorkGroups, PropertiesT, + _KERNELFUNCPARAM(KernelFunc)) { + parallel_for_work_group_lambda_impl(NumWorkGroups, KernelFunc); + } + + template + void parallel_for_work_group(range NumWorkGroups, + range WorkGroupSize, PropertiesT, + _KERNELFUNCPARAM(KernelFunc)) { + parallel_for_work_group_lambda_impl(NumWorkGroups, + WorkGroupSize, KernelFunc); + } + // Clean up KERNELFUNC macro. #undef _KERNELFUNCPARAM diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 6289836e99b0..9ed308da08e1 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -727,10 +727,15 @@ class __SYCL_EXPORT queue { /// single_task version with a kernel represented as a lambda. /// + /// \param Properties is the kernel properties. /// \param KernelFunc is the Kernel functor or lambda /// \param CodeLoc contains the code location of user code - template - event single_task(_KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc)) { + template + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value, event> + single_task(PropertiesT Properties, + _KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc)) { static_assert( (detail::check_fn_signature, void()>::value || @@ -741,19 +746,35 @@ class __SYCL_EXPORT queue { _CODELOCARG(&CodeLoc); return submit( [&](handler &CGH) { - CGH.template single_task(KernelFunc); + CGH.template single_task( + Properties, KernelFunc); }, CodeLoc); } /// single_task version with a kernel represented as a lambda. /// - /// \param DepEvent is an event that specifies the kernel dependencies /// \param KernelFunc is the Kernel functor or lambda /// \param CodeLoc contains the code location of user code template - event single_task(event DepEvent, - _KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc)) { + event single_task(_KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc)) { + return single_task( + ext::oneapi::experimental::detail::empty_properties_t{}, + KernelFunc _CODELOCFW(CodeLoc)); + } + + /// single_task version with a kernel represented as a lambda. + /// + /// \param DepEvent is an event that specifies the kernel dependencies + /// \param Properties is the kernel properties. + /// \param KernelFunc is the Kernel functor or lambda + /// \param CodeLoc contains the code location of user code + template + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value, event> + single_task(event DepEvent, PropertiesT Properties, + _KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc)) { static_assert( (detail::check_fn_signature, void()>::value || @@ -765,20 +786,38 @@ class __SYCL_EXPORT queue { return submit( [&](handler &CGH) { CGH.depends_on(DepEvent); - CGH.template single_task(KernelFunc); + CGH.template single_task( + Properties, KernelFunc); }, CodeLoc); } /// single_task version with a kernel represented as a lambda. /// - /// \param DepEvents is a vector of events that specifies the kernel - /// dependencies + /// \param DepEvent is an event that specifies the kernel dependencies /// \param KernelFunc is the Kernel functor or lambda /// \param CodeLoc contains the code location of user code template - event single_task(const std::vector &DepEvents, + event single_task(event DepEvent, _KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc)) { + return single_task( + DepEvent, ext::oneapi::experimental::detail::empty_properties_t{}, + KernelFunc _CODELOCFW(CodeLoc)); + } + + /// single_task version with a kernel represented as a lambda. + /// + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies + /// \param Properties is the kernel properties. + /// \param KernelFunc is the Kernel functor or lambda + /// \param CodeLoc contains the code location of user code + template + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value, event> + single_task(const std::vector &DepEvents, PropertiesT Properties, + _KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc)) { static_assert( (detail::check_fn_signature, void()>::value || @@ -790,11 +829,26 @@ class __SYCL_EXPORT queue { return submit( [&](handler &CGH) { CGH.depends_on(DepEvents); - CGH.template single_task(KernelFunc); + CGH.template single_task( + Properties, KernelFunc); }, CodeLoc); } + /// single_task version with a kernel represented as a lambda. + /// + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies + /// \param KernelFunc is the Kernel functor or lambda + /// \param CodeLoc contains the code location of user code + template + event single_task(const std::vector &DepEvents, + _KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc)) { + return single_task( + DepEvents, ext::oneapi::experimental::detail::empty_properties_t{}, + KernelFunc _CODELOCFW(CodeLoc)); + } + /// parallel_for version with a kernel represented as a lambda + range that /// specifies global size only. /// @@ -1002,21 +1056,40 @@ class __SYCL_EXPORT queue { /// specifies global, local sizes and offset. /// /// \param Range specifies the global and local work spaces of the kernel + /// \param Properties is the kernel properties. /// \param Rest acts as-if: "ReductionTypes&&... Reductions, /// const KernelType &KernelFunc". template - std::enable_if_t::value, event> - parallel_for(nd_range Range, RestT &&...Rest) { + typename PropertiesT, typename... RestT> + std::enable_if_t< + detail::AreAllButLastReductions::value && + ext::oneapi::experimental::is_property_list::value, + event> + parallel_for(nd_range Range, PropertiesT Properties, RestT &&...Rest) { // Actual code location needs to be captured from KernelInfo object. const detail::code_location CodeLoc = {}; return submit( [&](handler &CGH) { - CGH.template parallel_for(Range, Rest...); + CGH.template parallel_for(Range, Properties, Rest...); }, CodeLoc); } + /// parallel_for version with a kernel represented as a lambda + nd_range that + /// specifies global, local sizes and offset. + /// + /// \param Range specifies the global and local work spaces of the kernel + /// \param Rest acts as-if: "ReductionTypes&&... Reductions, + /// const KernelType &KernelFunc". + template + std::enable_if_t::value, event> + parallel_for(nd_range Range, RestT &&...Rest) { + return parallel_for( + Range, ext::oneapi::experimental::detail::empty_properties_t{}, + Rest...); + } + /// parallel_for version with a kernel represented as a lambda + nd_range that /// specifies global, local sizes and offset. /// @@ -1130,6 +1203,29 @@ class __SYCL_EXPORT queue { const detail::code_location &CodeLoc, const SubmitPostProcessF &PostProcess); + /// parallel_for_impl with a kernel represented as a lambda + range that + /// specifies global size only. + /// + /// \param Range specifies the global work space of the kernel + /// \param Properties is the kernel properties. + /// \param KernelFunc is the Kernel functor or lambda + template + std::enable_if_t< + detail::AreAllButLastReductions::value && + ext::oneapi::experimental::is_property_list::value, + event> + parallel_for_impl(range Range, PropertiesT Properties, + RestT &&...Rest) { + // Actual code location needs to be captured from KernelInfo object. + const detail::code_location CodeLoc = {}; + return submit( + [&](handler &CGH) { + CGH.template parallel_for(Range, Properties, Rest...); + }, + CodeLoc); + } + /// parallel_for_impl with a kernel represented as a lambda + range that /// specifies global size only. /// @@ -1138,11 +1234,30 @@ class __SYCL_EXPORT queue { template std::enable_if_t::value, event> parallel_for_impl(range Range, RestT &&...Rest) { + return parallel_for_impl( + Range, ext::oneapi::experimental::detail::empty_properties_t{}, + Rest...); + } + + /// parallel_for_impl with a kernel represented as a lambda + range that + /// specifies global size only. + /// + /// \param Range specifies the global work space of the kernel + /// \param DepEvent is an event that specifies the kernel dependencies + /// \param Properties is the kernel properties. + /// \param KernelFunc is the Kernel functor or lambda + template + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value, event> + parallel_for_impl(range Range, event DepEvent, PropertiesT Properties, + RestT &&...Rest) { // Actual code location needs to be captured from KernelInfo object. const detail::code_location CodeLoc = {}; return submit( [&](handler &CGH) { - CGH.template parallel_for(Range, Rest...); + CGH.depends_on(DepEvent); + CGH.template parallel_for(Range, Properties, Rest...); }, CodeLoc); } @@ -1153,15 +1268,33 @@ class __SYCL_EXPORT queue { /// \param Range specifies the global work space of the kernel /// \param DepEvent is an event that specifies the kernel dependencies /// \param KernelFunc is the Kernel functor or lambda - /// \param CodeLoc contains the code location of user code template event parallel_for_impl(range Range, event DepEvent, RestT &&...Rest) { + return parallel_for_impl( + Range, DepEvent, + ext::oneapi::experimental::detail::empty_properties_t{}, Rest...); + } + + /// parallel_for_impl version with a kernel represented as a lambda + range + /// that specifies global size only. + /// + /// \param Range specifies the global work space of the kernel + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies + /// \param Properties is the kernel properties. + /// \param KernelFunc is the Kernel functor or lambda + template + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value, event> + parallel_for_impl(range Range, const std::vector &DepEvents, + PropertiesT Properties, RestT &&...Rest) { // Actual code location needs to be captured from KernelInfo object. const detail::code_location CodeLoc = {}; return submit( [&](handler &CGH) { - CGH.depends_on(DepEvent); - CGH.template parallel_for(Range, Rest...); + CGH.depends_on(DepEvents); + CGH.template parallel_for(Range, Properties, Rest...); }, CodeLoc); } @@ -1177,14 +1310,9 @@ class __SYCL_EXPORT queue { event parallel_for_impl(range Range, const std::vector &DepEvents, RestT &&...Rest) { - // Actual code location needs to be captured from KernelInfo object. - const detail::code_location CodeLoc = {}; - return submit( - [&](handler &CGH) { - CGH.depends_on(DepEvents); - CGH.template parallel_for(Range, Rest...); - }, - CodeLoc); + return parallel_for_impl( + Range, DepEvents, + ext::oneapi::experimental::detail::empty_properties_t{}, Rest...); } buffer &getAssertHappenedBuffer(); diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 0c0b23187d78..3ba393197706 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -30,20 +30,23 @@ namespace detail { /// implementation classes. It is needed to detect the reduction classes. class reduction_impl_base {}; +/// Predicate returning true if a type is a reduction. +template struct IsReduction { + static constexpr bool value = + std::is_base_of>::value; +}; + /// Predicate returning true if all template type parameters except the last one /// are reductions. template struct AreAllButLastReductions { static constexpr bool value = - std::is_base_of>::value && - AreAllButLastReductions::value; + IsReduction::value && AreAllButLastReductions::value; }; /// Helper specialization of AreAllButLastReductions for one element only. /// Returns true if the template parameter is not a reduction. template struct AreAllButLastReductions { - static constexpr bool value = - !std::is_base_of>::value; + static constexpr bool value = !IsReduction::value; }; } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) @@ -856,11 +859,12 @@ namespace main_krn { template struct RangeFastAtomics; } // namespace main_krn } // namespace reduction -template +template bool reduCGFuncForRangeFastAtomics(handler &CGH, KernelType KernelFunc, const range &Range, const nd_range<1> &NDRange, - Reduction &Redu) { + PropertiesT Properties, Reduction &Redu) { size_t NElements = Reduction::num_elements; auto Out = Redu.getReadWriteAccessorToInitializedMem(CGH); auto GroupSum = Reduction::getReadWriteLocalAcc(NElements, CGH); @@ -868,7 +872,7 @@ bool reduCGFuncForRangeFastAtomics(handler &CGH, KernelType KernelFunc, KernelName>; size_t NWorkGroups = NDRange.get_group_range().size(); size_t PerGroup = Range.size() / NWorkGroups; - CGH.parallel_for(NDRange, [=](nd_item<1> NDId) { + CGH.parallel_for(NDRange, Properties, [=](nd_item<1> NDId) { // Call user's functions. Reducer.MValue gets initialized there. typename Reduction::reducer_type Reducer; reductionLoop(Range, PerGroup, Reducer, NDId, KernelFunc); @@ -902,10 +906,12 @@ namespace main_krn { template struct RangeFastReduce; } // namespace main_krn } // namespace reduction -template +template bool reduCGFuncForRangeFastReduce(handler &CGH, KernelType KernelFunc, const range &Range, - const nd_range<1> &NDRange, Reduction &Redu) { + const nd_range<1> &NDRange, + PropertiesT Properties, Reduction &Redu) { size_t NElements = Reduction::num_elements; size_t WGSize = NDRange.get_local_range().size(); size_t NWorkGroups = NDRange.get_group_range().size(); @@ -925,7 +931,7 @@ bool reduCGFuncForRangeFastReduce(handler &CGH, KernelType KernelFunc, using Name = __sycl_reduction_kernel; size_t PerGroup = Range.size() / NWorkGroups; - CGH.parallel_for(NDRange, [=](nd_item<1> NDId) { + CGH.parallel_for(NDRange, Properties, [=](nd_item<1> NDId) { // Call user's functions. Reducer.MValue gets initialized there. typename Reduction::reducer_type Reducer; reductionLoop(Range, PerGroup, Reducer, NDId, KernelFunc); @@ -1006,10 +1012,12 @@ namespace main_krn { template struct RangeBasic; } // namespace main_krn } // namespace reduction -template +template bool reduCGFuncForRangeBasic(handler &CGH, KernelType KernelFunc, const range &Range, - const nd_range<1> &NDRange, Reduction &Redu) { + const nd_range<1> &NDRange, PropertiesT Properties, + Reduction &Redu) { size_t NElements = Reduction::num_elements; size_t WGSize = NDRange.get_local_range().size(); size_t NWorkGroups = NDRange.get_group_range().size(); @@ -1031,7 +1039,7 @@ bool reduCGFuncForRangeBasic(handler &CGH, KernelType KernelFunc, using Name = __sycl_reduction_kernel; size_t PerGroup = Range.size() / NWorkGroups; - CGH.parallel_for(NDRange, [=](nd_item<1> NDId) { + CGH.parallel_for(NDRange, Properties, [=](nd_item<1> NDId) { // Call user's functions. Reducer.MValue gets initialized there. typename Reduction::reducer_type Reducer(Identity, BOp); reductionLoop(Range, PerGroup, Reducer, NDId, KernelFunc); @@ -1119,10 +1127,12 @@ bool reduCGFuncForRangeBasic(handler &CGH, KernelType KernelFunc, /// Returns "true" if the result has to be saved to user's variable by /// reduSaveFinalResultToUserMem. -template +template bool reduCGFuncForRange(handler &CGH, KernelType KernelFunc, const range &Range, size_t MaxWGSize, - uint32_t NumConcurrentWorkGroups, Reduction &Redu) { + uint32_t NumConcurrentWorkGroups, + PropertiesT Properties, Reduction &Redu) { size_t NWorkItems = Range.size(); size_t WGSize = std::min(NWorkItems, MaxWGSize); size_t NWorkGroups = NWorkItems / WGSize; @@ -1135,13 +1145,13 @@ bool reduCGFuncForRange(handler &CGH, KernelType KernelFunc, if constexpr (Reduction::has_fast_reduce) return reduCGFuncForRangeFastReduce(CGH, KernelFunc, Range, - NDRange, Redu); + NDRange, Properties, Redu); else if constexpr (Reduction::has_fast_atomics) return reduCGFuncForRangeFastAtomics(CGH, KernelFunc, Range, - NDRange, Redu); + NDRange, Properties, Redu); else return reduCGFuncForRangeBasic(CGH, KernelFunc, Range, NDRange, - Redu); + Properties, Redu); } namespace reduction { @@ -1158,16 +1168,17 @@ template struct NDRangeBothFastReduceAndAtomics; /// /// Briefly: calls user's lambda, reduce() + atomic, INT + /// ADD/MIN/MAX. -template +template void reduCGFuncForNDRangeBothFastReduceAndAtomics(handler &CGH, KernelType KernelFunc, const nd_range &Range, + PropertiesT Properties, Reduction &, AccTy Out) { size_t NElements = Reduction::num_elements; using Name = __sycl_reduction_kernel< reduction::main_krn::NDRangeBothFastReduceAndAtomics, KernelName>; - CGH.parallel_for(Range, [=](nd_item NDIt) { + CGH.parallel_for(Range, Properties, [=](nd_item NDIt) { // Call user's function. Reducer.MValue gets initialized there. typename Reduction::reducer_type Reducer; KernelFunc(NDIt, Reducer); @@ -1195,12 +1206,13 @@ template struct NDRangeFastAtomicsOnly; /// user's reduction variable. /// /// Briefly: calls user's lambda, tree-reduction + atomic, INT + AND/OR/XOR. -template +template void reduCGFuncForNDRangeFastAtomicsOnly(handler &CGH, bool IsPow2WG, KernelType KernelFunc, const nd_range &Range, - Reduction &, AccTy Out) { + PropertiesT Properties, Reduction &, + AccTy Out) { size_t NElements = Reduction::num_elements; size_t WGSize = Range.get_local_range().size(); @@ -1214,7 +1226,7 @@ void reduCGFuncForNDRangeFastAtomicsOnly(handler &CGH, bool IsPow2WG, using Name = __sycl_reduction_kernel; - CGH.parallel_for(Range, [=](nd_item NDIt) { + CGH.parallel_for(Range, Properties, [=](nd_item NDIt) { // Call user's functions. Reducer.MValue gets initialized there. typename Reduction::reducer_type Reducer; KernelFunc(NDIt, Reducer); @@ -1276,11 +1288,12 @@ template struct NDRangeFastReduceOnly; /// to a global buffer. /// /// Briefly: user's lambda, reduce(), FP + ADD/MIN/MAX. -template +template void reduCGFuncForNDRangeFastReduceOnly(handler &CGH, KernelType KernelFunc, const nd_range &Range, - Reduction &Redu, AccTy Out) { + PropertiesT Properties, Reduction &Redu, + AccTy Out) { size_t NElements = Reduction::num_elements; size_t NWorkGroups = Range.get_group_range().size(); bool IsUpdateOfUserVar = @@ -1289,7 +1302,7 @@ void reduCGFuncForNDRangeFastReduceOnly(handler &CGH, KernelType KernelFunc, using Name = __sycl_reduction_kernel; - CGH.parallel_for(Range, [=](nd_item NDIt) { + CGH.parallel_for(Range, Properties, [=](nd_item NDIt) { // Call user's functions. Reducer.MValue gets initialized there. typename Reduction::reducer_type Reducer; KernelFunc(NDIt, Reducer); @@ -1323,11 +1336,12 @@ template struct NDRangeBasic; /// to a global buffer. /// /// Briefly: user's lambda, tree-reduction, CUSTOM types/ops. -template +template void reduCGFuncForNDRangeBasic(handler &CGH, bool IsPow2WG, KernelType KernelFunc, - const nd_range &Range, Reduction &Redu, + const nd_range &Range, + PropertiesT Properties, Reduction &Redu, AccTy Out) { size_t NElements = Reduction::num_elements; size_t WGSize = Range.get_local_range().size(); @@ -1346,7 +1360,7 @@ void reduCGFuncForNDRangeBasic(handler &CGH, bool IsPow2WG, using Name = __sycl_reduction_kernel; auto BOp = Redu.getBinaryOperation(); - CGH.parallel_for(Range, [=](nd_item NDIt) { + CGH.parallel_for(Range, Properties, [=](nd_item NDIt) { // Call user's functions. Reducer.MValue gets initialized there. typename Reduction::reducer_type Reducer(ReduIdentity, BOp); KernelFunc(NDIt, Reducer); @@ -1969,9 +1983,9 @@ template struct NDRangeMulti; } // namespace main_krn } // namespace reduction template + typename PropertiesT, typename... Reductions, size_t... Is> void reduCGFuncMulti(handler &CGH, KernelType KernelFunc, - const nd_range &Range, + const nd_range &Range, PropertiesT Properties, std::tuple &ReduTuple, std::index_sequence ReduIndices) { size_t WGSize = Range.get_local_range().size(); @@ -2010,7 +2024,7 @@ void reduCGFuncMulti(handler &CGH, KernelType KernelFunc, using Name = __sycl_reduction_kernel; - CGH.parallel_for(Range, [=](nd_item NDIt) { + CGH.parallel_for(Range, Properties, [=](nd_item NDIt) { // Pass all reductions to user's lambda in the same order as supplied // Each reducer initializes its own storage auto ReduIndices = std::index_sequence_for(); @@ -2050,9 +2064,11 @@ template struct NDRangeAtomic64; // Specialization for devices with the atomic64 aspect, which guarantees 64 bit // floating point support for atomic reduction operation. -template +template void reduCGFuncAtomic64(handler &CGH, KernelType KernelFunc, - const nd_range &Range, Reduction &Redu) { + const nd_range &Range, PropertiesT Properties, + Reduction &Redu) { auto Out = Redu.getReadWriteAccessorToInitializedMem(CGH); static_assert( Reduction::has_float64_atomics, @@ -2060,7 +2076,7 @@ void reduCGFuncAtomic64(handler &CGH, KernelType KernelFunc, size_t NElements = Reduction::num_elements; using Name = __sycl_reduction_kernel; - CGH.parallel_for(Range, [=](nd_item NDIt) { + CGH.parallel_for(Range, Properties, [=](nd_item NDIt) { // Call user's function. Reducer.MValue gets initialized there. typename Reduction::reducer_type Reducer; KernelFunc(NDIt, Reducer); @@ -2229,9 +2245,11 @@ void reduAuxCGFuncImplArray( ...); } -template +template void reduCGFunc(handler &CGH, KernelType KernelFunc, - const nd_range &Range, Reduction &Redu) { + const nd_range &Range, PropertiesT Properties, + Reduction &Redu) { size_t WGSize = Range.get_local_range().size(); auto Out = [&]() { if constexpr (Reduction::has_fast_atomics) { @@ -2257,19 +2275,19 @@ void reduCGFunc(handler &CGH, KernelType KernelFunc, if constexpr (Reduction::has_fast_reduce) { if constexpr (Reduction::has_fast_atomics) { reduCGFuncForNDRangeBothFastReduceAndAtomics( - CGH, KernelFunc, Range, Redu, Out); + CGH, KernelFunc, Range, Properties, Redu, Out); } else { reduCGFuncForNDRangeFastReduceOnly( - CGH, KernelFunc, Range, Redu, Out); + CGH, KernelFunc, Range, Properties, Redu, Out); } } else { bool IsPow2WG = (WGSize & (WGSize - 1)) == 0; if constexpr (Reduction::has_fast_atomics) { reduCGFuncForNDRangeFastAtomicsOnly( - CGH, IsPow2WG, KernelFunc, Range, Redu, Out); + CGH, IsPow2WG, KernelFunc, Range, Properties, Redu, Out); } else { reduCGFuncForNDRangeBasic( - CGH, IsPow2WG, KernelFunc, Range, Redu, Out); + CGH, IsPow2WG, KernelFunc, Range, Properties, Redu, Out); } } } diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index 5dc57176d69e..70a29d96d509 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -65,6 +65,7 @@ #include #include #include +#include #include #include #include diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp index 07eb324dde7c..da45b2ec509b 100644 --- a/sycl/source/detail/error_handling/enqueue_kernel.cpp +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -83,14 +83,14 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, NDRDesc.LocalSize[1] != CompileWGSize[1] || NDRDesc.LocalSize[2] != CompileWGSize[2]) throw sycl::nd_range_error( - "The specified local size {" + std::to_string(NDRDesc.LocalSize[0]) + + "The specified local size {" + std::to_string(NDRDesc.LocalSize[2]) + ", " + std::to_string(NDRDesc.LocalSize[1]) + ", " + - std::to_string(NDRDesc.LocalSize[2]) + + std::to_string(NDRDesc.LocalSize[0]) + "} doesn't match the required work-group size specified " "in the program source {" + - std::to_string(CompileWGSize[0]) + ", " + + std::to_string(CompileWGSize[2]) + ", " + std::to_string(CompileWGSize[1]) + ", " + - std::to_string(CompileWGSize[2]) + "}", + std::to_string(CompileWGSize[0]) + "}", PI_ERROR_INVALID_WORK_GROUP_SIZE); } if (IsOpenCL) { diff --git a/sycl/test/extensions/properties/properties_kernel.cpp b/sycl/test/extensions/properties/properties_kernel.cpp new file mode 100644 index 000000000000..112c266269ba --- /dev/null +++ b/sycl/test/extensions/properties/properties_kernel.cpp @@ -0,0 +1,56 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsyntax-only -Xclang -verify %s +// expected-no-diagnostics + +#include + +using namespace sycl::ext::oneapi::experimental; + +int main() { + static_assert(is_property_key::value); + static_assert(is_property_key::value); + static_assert(is_property_key::value); + + static_assert(is_property_value)>::value); + static_assert(is_property_value)>::value); + static_assert(is_property_value)>::value); + static_assert(is_property_value)>::value); + static_assert(is_property_value)>::value); + static_assert( + is_property_value)>::value); + static_assert(is_property_value)>::value); + + static_assert( + std::is_same_v)::key_t>); + static_assert(std::is_same_v)::key_t>); + static_assert(std::is_same_v)::key_t>); + static_assert(std::is_same_v)::key_t>); + static_assert(std::is_same_v)::key_t>); + static_assert( + std::is_same_v)::key_t>); + static_assert( + std::is_same_v)::key_t>); + + static_assert(work_group_size<15>[0] == 15); + static_assert(work_group_size<16, 17>[0] == 16); + static_assert(work_group_size<16, 17>[1] == 17); + static_assert(work_group_size<18, 19, 20>[0] == 18); + static_assert(work_group_size<18, 19, 20>[1] == 19); + static_assert(work_group_size<18, 19, 20>[2] == 20); + static_assert(work_group_size_hint<21>[0] == 21); + static_assert(work_group_size_hint<22, 23>[0] == 22); + static_assert(work_group_size_hint<22, 23>[1] == 23); + static_assert(work_group_size_hint<24, 25, 26>[0] == 24); + static_assert(work_group_size_hint<24, 25, 26>[1] == 25); + static_assert(work_group_size_hint<24, 25, 26>[2] == 26); + static_assert(sub_group_size<27>.value == 27); + + static_assert(std::is_same_v)::value_t, + std::integral_constant>); + + return 0; +} diff --git a/sycl/test/extensions/properties/properties_kernel_negative.cpp b/sycl/test/extensions/properties/properties_kernel_negative.cpp new file mode 100644 index 000000000000..6fd9a135399f --- /dev/null +++ b/sycl/test/extensions/properties/properties_kernel_negative.cpp @@ -0,0 +1,325 @@ +// RUN: %clangxx -fsycl -ferror-limit=0 -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=warning,note %s + +#include + +template struct KernelFunctorWithWGSize { + void operator()() const {} + auto get(sycl::ext::oneapi::experimental::properties_tag) { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size}; + } +}; + +template struct KernelFunctorWithWGSizeHint { + void operator()() const {} + auto get(sycl::ext::oneapi::experimental::properties_tag) { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size_hint}; + } +}; + +template struct KernelFunctorWithSGSize { + void operator()() const {} + auto get(sycl::ext::oneapi::experimental::properties_tag) { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::sub_group_size}; + } +}; + +void check_work_group_size() { + // expected-error@+1 {{too few template arguments for variable template 'work_group_size'}} + auto WGSize0 = sycl::ext::oneapi::experimental::work_group_size<>; + + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size<0>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size<0>' requested here}} + auto WGSize1 = sycl::ext::oneapi::experimental::work_group_size<0>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size<0, 0>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size<0, 0>' requested here}} + auto WGSize2 = sycl::ext::oneapi::experimental::work_group_size<0, 0>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size<1, 0>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size<1, 0>' requested here}} + auto WGSize3 = sycl::ext::oneapi::experimental::work_group_size<1, 0>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size<0, 1>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size<0, 1>' requested here}} + auto WGSize4 = sycl::ext::oneapi::experimental::work_group_size<0, 1>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size<0, 0, 0>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size<0, 0, 0>' requested here}} + auto WGSize5 = sycl::ext::oneapi::experimental::work_group_size<0, 0, 0>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size<1, 0, 0>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size<1, 0, 0>' requested here}} + auto WGSize6 = sycl::ext::oneapi::experimental::work_group_size<1, 0, 0>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size<0, 1, 0>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size<0, 1, 0>' requested here}} + auto WGSize7 = sycl::ext::oneapi::experimental::work_group_size<0, 1, 0>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size<0, 0, 1>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size<0, 0, 1>' requested here}} + auto WGSize8 = sycl::ext::oneapi::experimental::work_group_size<0, 0, 1>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size<1, 1, 0>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size<1, 1, 0>' requested here}} + auto WGSize9 = sycl::ext::oneapi::experimental::work_group_size<1, 1, 0>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size<0, 1, 1>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size<0, 1, 1>' requested here}} + auto WGSize10 = sycl::ext::oneapi::experimental::work_group_size<0, 1, 1>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size<1, 0, 1>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size<1, 0, 1>' requested here}} + auto WGSize11 = sycl::ext::oneapi::experimental::work_group_size<1, 0, 1>; + + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size property currently only supports up to three values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size<1, 1, 1, 1>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size<1, 1, 1, 1>' requested here}} + auto WGSize12 = sycl::ext::oneapi::experimental::work_group_size<1, 1, 1, 1>; + + sycl::queue Q; + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.+}}: Template type is not a property list.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size<1>}, + KernelFunctorWithWGSize<2>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size<1, 1>}, + KernelFunctorWithWGSize<1, 2>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size<1, 1>}, + KernelFunctorWithWGSize<2, 1>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size<1, 1>}, + KernelFunctorWithWGSize<2, 2>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size<1, 1, 1>}, + KernelFunctorWithWGSize<1, 1, 2>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size<1, 1, 1>}, + KernelFunctorWithWGSize<1, 2, 1>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size<1, 1, 1>}, + KernelFunctorWithWGSize<2, 1, 1>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size<1, 1, 1>}, + KernelFunctorWithWGSize<1, 2, 2>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size<1, 1, 1>}, + KernelFunctorWithWGSize<2, 2, 1>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size<1, 1, 1>}, + KernelFunctorWithWGSize<2, 1, 2>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size<1, 1, 1>}, + KernelFunctorWithWGSize<2, 2, 2>{}); +} + +void check_work_group_size_hint() { + // expected-error@+1 {{too few template arguments for variable template 'work_group_size_hint'}} + auto WGSize0 = sycl::ext::oneapi::experimental::work_group_size_hint<>; + + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size_hint property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size_hint<0>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size_hint<0>' requested here}} + auto WGSize1 = sycl::ext::oneapi::experimental::work_group_size_hint<0>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size_hint property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size_hint<0, 0>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size_hint<0, 0>' requested here}} + auto WGSize2 = sycl::ext::oneapi::experimental::work_group_size_hint<0, 0>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size_hint property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size_hint<1, 0>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size_hint<1, 0>' requested here}} + auto WGSize3 = sycl::ext::oneapi::experimental::work_group_size_hint<1, 0>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size_hint property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size_hint<0, 1>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size_hint<0, 1>' requested here}} + auto WGSize4 = sycl::ext::oneapi::experimental::work_group_size_hint<0, 1>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size_hint property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size_hint<0, 0, 0>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size_hint<0, 0, 0>' requested here}} + auto WGSize5 = sycl::ext::oneapi::experimental::work_group_size_hint<0, 0, 0>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size_hint property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size_hint<1, 0, 0>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size_hint<1, 0, 0>' requested here}} + auto WGSize6 = sycl::ext::oneapi::experimental::work_group_size_hint<1, 0, 0>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size_hint property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size_hint<0, 1, 0>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size_hint<0, 1, 0>' requested here}} + auto WGSize7 = sycl::ext::oneapi::experimental::work_group_size_hint<0, 1, 0>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size_hint property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size_hint<0, 0, 1>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size_hint<0, 0, 1>' requested here}} + auto WGSize8 = sycl::ext::oneapi::experimental::work_group_size_hint<0, 0, 1>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size_hint property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size_hint<1, 1, 0>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size_hint<1, 1, 0>' requested here}} + auto WGSize9 = sycl::ext::oneapi::experimental::work_group_size_hint<1, 1, 0>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size_hint property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size_hint<0, 1, 1>' must be initialized by a constant expression}} + // expected-note@+2 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size_hint<0, 1, 1>' requested here}} + auto WGSize10 = + sycl::ext::oneapi::experimental::work_group_size_hint<0, 1, 1>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size_hint property must only contain non-zero values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size_hint<1, 0, 1>' must be initialized by a constant expression}} + // expected-note@+2 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size_hint<1, 0, 1>' requested here}} + auto WGSize11 = + sycl::ext::oneapi::experimental::work_group_size_hint<1, 0, 1>; + + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: work_group_size_hint property currently only supports up to three values.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'work_group_size_hint<1, 1, 1, 1>' must be initialized by a constant expression}} + // expected-note@+2 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::work_group_size_hint<1, 1, 1, 1>' requested here}} + auto WGSize12 = + sycl::ext::oneapi::experimental::work_group_size_hint<1, 1, 1, 1>; + + sycl::queue Q; + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size_hint<1>}, + KernelFunctorWithWGSizeHint<2>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size_hint<1, 1>}, + KernelFunctorWithWGSizeHint<1, 2>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size_hint<1, 1>}, + KernelFunctorWithWGSizeHint<2, 1>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size_hint<1, 1>}, + KernelFunctorWithWGSizeHint<2, 2>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size_hint<1, 1, 1>}, + KernelFunctorWithWGSizeHint<1, 1, 2>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size_hint<1, 1, 1>}, + KernelFunctorWithWGSizeHint<1, 2, 1>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size_hint<1, 1, 1>}, + KernelFunctorWithWGSizeHint<2, 1, 1>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size_hint<1, 1, 1>}, + KernelFunctorWithWGSizeHint<1, 2, 2>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size_hint<1, 1, 1>}, + KernelFunctorWithWGSizeHint<2, 2, 1>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size_hint<1, 1, 1>}, + KernelFunctorWithWGSizeHint<2, 1, 2>{}); + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties, std::integral_constant, std::integral_constant>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size_hint<1, 1, 1>}, + KernelFunctorWithWGSizeHint<2, 2, 2>{}); +} + +void check_sub_group_size() { + // expected-error@+1 {{too few template arguments for variable template 'sub_group_size'}} + auto WGSize0 = sycl::ext::oneapi::experimental::sub_group_size<>; + + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: sub_group_size_key property must contain a non-zero value.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'sub_group_size<0>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::sub_group_size<0>' requested here}} + auto WGSize1 = sycl::ext::oneapi::experimental::sub_group_size<0>; + + sycl::queue Q; + + // expected-error-re@sycl/ext/oneapi/properties/property_utils.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::queue::single_task, sycl::ext::oneapi::experimental::properties>>>>' requested here}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::sub_group_size<1>}, + KernelFunctorWithSGSize<2>{}); +} + +int main() { + check_work_group_size(); + check_work_group_size_hint(); + check_sub_group_size(); + return 0; +} diff --git a/sycl/test/extensions/properties/properties_kernel_negative_device.cpp b/sycl/test/extensions/properties/properties_kernel_negative_device.cpp new file mode 100644 index 000000000000..ed7705c48473 --- /dev/null +++ b/sycl/test/extensions/properties/properties_kernel_negative_device.cpp @@ -0,0 +1,71 @@ +// RUN: %clangxx -fsycl-device-only -Xclang -fsycl-is-device -ferror-limit=0 -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=note %s + +#include + +struct KernelFunctorWithOnlyWGSizeAttr { + // expected-warning@+1 {{kernel has both attribute 'reqd_work_group_size' and kernel properties; conflicting properties are ignored}} + void operator() [[sycl::reqd_work_group_size(32)]] () const {} +}; + +template struct KernelFunctorWithWGSizeWithAttr { + // expected-warning@+1 {{kernel has both attribute 'reqd_work_group_size' and kernel properties; conflicting properties are ignored}} + void operator() [[sycl::reqd_work_group_size(32)]] () const {} + auto get(sycl::ext::oneapi::experimental::properties_tag) { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size}; + } +}; + +struct KernelFunctorWithOnlySGSizeAttr { + // expected-warning@+1 {{kernel has both attribute 'reqd_sub_group_size' and kernel properties; conflicting properties are ignored}} + void operator() [[sycl::reqd_sub_group_size(32)]] () const {} +}; + +template struct KernelFunctorWithSGSizeWithAttr { + // expected-warning@+1 {{kernel has both attribute 'reqd_sub_group_size' and kernel properties; conflicting properties are ignored}} + void operator() [[sycl::reqd_sub_group_size(32)]] () const {} + auto get(sycl::ext::oneapi::experimental::properties_tag) { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::sub_group_size}; + } +}; + +void check_work_group_size() { + sycl::queue Q; + + // expected-warning@+4 {{kernel has both attribute 'reqd_work_group_size' and kernel properties; conflicting properties are ignored}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size<1>}, + []() [[sycl::reqd_work_group_size(32)]] {}); + + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size<1>}, + KernelFunctorWithOnlyWGSizeAttr{}); + + Q.single_task(KernelFunctorWithWGSizeWithAttr<1>{}); +} + +void check_sub_group_size() { + sycl::queue Q; + + // expected-warning@+4 {{kernel has both attribute 'reqd_sub_group_size' and kernel properties; conflicting properties are ignored}} + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::sub_group_size<1>}, + []() [[sycl::reqd_sub_group_size(32)]] {}); + + Q.single_task( + sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::sub_group_size<1>}, + KernelFunctorWithOnlySGSizeAttr{}); + + Q.single_task(KernelFunctorWithSGSizeWithAttr<1>{}); +} + +int main() { + check_work_group_size(); + check_sub_group_size(); + return 0; +} diff --git a/sycl/test/extensions/properties/properties_kernel_sub_group_size.cpp b/sycl/test/extensions/properties/properties_kernel_sub_group_size.cpp new file mode 100644 index 000000000000..96fbb460788d --- /dev/null +++ b/sycl/test/extensions/properties/properties_kernel_sub_group_size.cpp @@ -0,0 +1,277 @@ +// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR +// RUN: %clangxx -fsycl -Xclang -verify %s +// expected-no-diagnostics + +#include + +int main() { + sycl::queue Q; + sycl::event Ev; + + sycl::range<1> R1{1}; + sycl::range<2> R2{1, 2}; + sycl::range<3> R3{1, 2, 3}; + + sycl::nd_range<1> NDR1{R1, R1}; + sycl::nd_range<2> NDR2{R2, R2}; + sycl::nd_range<3> NDR3{R3, R3}; + + constexpr auto Props = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::sub_group_size<1>}; + + auto Redu1 = sycl::reduction(nullptr, sycl::plus()); + auto Redu2 = sycl::reduction(nullptr, sycl::multiplies()); + + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel0(){{.*}} #[[SGSizeAttr1:[0-9]+]] + Q.single_task(Props, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel1(){{.*}} #[[SGSizeAttr1]] + Q.single_task(Ev, Props, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel2(){{.*}} #[[SGSizeAttr1]] + Q.single_task({Ev}, Props, []() {}); + + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel9(){{.*}} #[[SGSizeAttr2:[0-9]+]] + Q.parallel_for(R1, Props, [](sycl::id<1>) {}); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel10(){{.*}} #[[SGSizeAttr2]] + Q.parallel_for(R1, Ev, Props, [](sycl::id<1>) {}); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel11(){{.*}} #[[SGSizeAttr2]] + Q.parallel_for(R1, {Ev}, Props, [](sycl::id<1>) {}); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel12(){{.*}} #[[SGSizeAttr2]] + Q.parallel_for(R2, Props, [](sycl::id<2>) {}); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel13(){{.*}} #[[SGSizeAttr2]] + Q.parallel_for(R2, Ev, Props, [](sycl::id<2>) {}); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel14(){{.*}} #[[SGSizeAttr2]] + Q.parallel_for(R2, {Ev}, Props, [](sycl::id<2>) {}); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel15(){{.*}} #[[SGSizeAttr2]] + Q.parallel_for(R3, Props, [](sycl::id<3>) {}); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel16(){{.*}} #[[SGSizeAttr2]] + Q.parallel_for(R3, Ev, Props, [](sycl::id<3>) {}); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel17(){{.*}} #[[SGSizeAttr2]] + Q.parallel_for(R3, {Ev}, Props, [](sycl::id<3>) {}); + + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel18{{.*}}{{.*}} #[[SGSizeAttr3:[0-9]+]] + Q.parallel_for(R1, Props, Redu1, + [](sycl::id<1>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel19{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(R1, Ev, Props, Redu1, + [](sycl::id<1>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel20{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(R1, {Ev}, Props, Redu1, + [](sycl::id<1>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel21{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(R2, Props, Redu1, + [](sycl::id<2>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel22{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(R2, Ev, Props, Redu1, + [](sycl::id<2>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel23{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(R2, {Ev}, Props, Redu1, + [](sycl::id<2>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel24{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(R3, Props, Redu1, + [](sycl::id<3>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel25{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(R3, Ev, Props, Redu1, + [](sycl::id<3>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel26{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(R3, {Ev}, Props, Redu1, + [](sycl::id<3>, auto &) {}); + + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel27(){{.*}} #[[SGSizeAttr2]] + Q.parallel_for(NDR1, Props, [](sycl::nd_item<1>) {}); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel28(){{.*}} #[[SGSizeAttr2]] + Q.parallel_for(NDR1, Ev, Props, + [](sycl::nd_item<1>) {}); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel29(){{.*}} #[[SGSizeAttr2]] + Q.parallel_for(NDR1, {Ev}, Props, + [](sycl::nd_item<1>) {}); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel30(){{.*}} #[[SGSizeAttr2]] + Q.parallel_for(NDR2, Props, [](sycl::nd_item<2>) {}); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel31(){{.*}} #[[SGSizeAttr2]] + Q.parallel_for(NDR2, Ev, Props, + [](sycl::nd_item<2>) {}); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel32(){{.*}} #[[SGSizeAttr2]] + Q.parallel_for(NDR2, {Ev}, Props, + [](sycl::nd_item<2>) {}); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel33(){{.*}} #[[SGSizeAttr2]] + Q.parallel_for(NDR3, Props, [](sycl::nd_item<3>) {}); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel34(){{.*}} #[[SGSizeAttr2]] + Q.parallel_for(NDR3, Ev, Props, + [](sycl::nd_item<3>) {}); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel35(){{.*}} #[[SGSizeAttr2]] + Q.parallel_for(NDR3, {Ev}, Props, + [](sycl::nd_item<3>) {}); + + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel36{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(NDR1, Props, Redu1, + [](sycl::nd_item<1>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel37{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(NDR1, Ev, Props, Redu1, + [](sycl::nd_item<1>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel38{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(NDR1, {Ev}, Props, Redu1, + [](sycl::nd_item<1>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel39{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(NDR2, Props, Redu1, + [](sycl::nd_item<2>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel40{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(NDR2, Ev, Props, Redu1, + [](sycl::nd_item<2>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel41{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(NDR2, {Ev}, Props, Redu1, + [](sycl::nd_item<2>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel42{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(NDR3, Props, Redu1, + [](sycl::nd_item<3>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel43{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(NDR3, Ev, Props, Redu1, + [](sycl::nd_item<3>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel44{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(NDR3, {Ev}, Props, Redu1, + [](sycl::nd_item<3>, auto &) {}); + + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel45{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(NDR1, Props, Redu1, Redu2, + [](sycl::nd_item<1>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel46{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(NDR1, Ev, Props, Redu1, Redu2, + [](sycl::nd_item<1>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel47{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(NDR1, {Ev}, Props, Redu1, Redu2, + [](sycl::nd_item<1>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel48{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(NDR2, Props, Redu1, Redu2, + [](sycl::nd_item<2>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel49{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(NDR2, Ev, Props, Redu1, Redu2, + [](sycl::nd_item<2>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel50{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(NDR2, {Ev}, Props, Redu1, Redu2, + [](sycl::nd_item<2>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel51{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(NDR3, Props, Redu1, Redu2, + [](sycl::nd_item<3>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel52{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(NDR3, Ev, Props, Redu1, Redu2, + [](sycl::nd_item<3>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel53{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.parallel_for(NDR3, {Ev}, Props, Redu1, Redu2, + [](sycl::nd_item<3>, auto &, auto &) {}); + + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel54(){{.*}} #[[SGSizeAttr1]] + Q.submit([&](sycl::handler &CGH) { + CGH.single_task(Props, []() {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel55(){{.*}} #[[SGSizeAttr1]] + Q.submit([&](sycl::handler &CGH) { + CGH.single_task(Props, []() {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel56(){{.*}} #[[SGSizeAttr1]] + Q.submit([&](sycl::handler &CGH) { + CGH.single_task(Props, []() {}); + }); + + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel57(){{.*}} #[[SGSizeAttr2]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(R1, Props, [](sycl::id<1>) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel58(){{.*}} #[[SGSizeAttr2]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(R2, Props, [](sycl::id<2>) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel59(){{.*}} #[[SGSizeAttr2]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(R3, Props, [](sycl::id<3>) {}); + }); + + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel60{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(R1, Props, Redu1, + [](sycl::id<1>, auto &) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel61{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(R2, Props, Redu1, + [](sycl::id<2>, auto &) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel62{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(R3, Props, Redu1, + [](sycl::id<3>, auto &) {}); + }); + + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel63(){{.*}} #[[SGSizeAttr2]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(NDR1, Props, + [](sycl::nd_item<1>) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel64(){{.*}} #[[SGSizeAttr2]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(NDR2, Props, + [](sycl::nd_item<2>) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel65(){{.*}} #[[SGSizeAttr2]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(NDR3, Props, + [](sycl::nd_item<3>) {}); + }); + + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel66{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(NDR1, Props, Redu1, + [](sycl::nd_item<1>, auto &) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel67{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(NDR2, Props, Redu1, + [](sycl::nd_item<2>, auto &) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel68{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(NDR3, Props, Redu1, + [](sycl::nd_item<3>, auto &) {}); + }); + + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel69{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for( + NDR1, Props, Redu1, Redu2, [](sycl::nd_item<1>, auto &, auto &) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel70{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for( + NDR2, Props, Redu1, Redu2, [](sycl::nd_item<2>, auto &, auto &) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}SGSizeKernel71{{.*}}{{.*}} #[[SGSizeAttr3]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for( + NDR3, Props, Redu1, Redu2, [](sycl::nd_item<3>, auto &, auto &) {}); + }); + + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel72(){{.*}} #[[SGSizeAttr3]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for_work_group( + R1, Props, [](sycl::group<1> G) { + G.parallel_for_work_item([&](sycl::h_item<1>) {}); + }); + }); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel73(){{.*}} #[[SGSizeAttr3]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for_work_group( + R2, Props, [](sycl::group<2> G) { + G.parallel_for_work_item([&](sycl::h_item<2>) {}); + }); + }); + // CHECK-IR: spir_kernel void @{{.*}}SGSizeKernel74(){{.*}} #[[SGSizeAttr3]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for_work_group( + R3, Props, [](sycl::group<3> G) { + G.parallel_for_work_item([&](sycl::h_item<3>) {}); + }); + }); + + return 0; +} + +// CHECK-IR: attributes #[[SGSizeAttr1]] = { {{.*}}"sycl-sub-group-size"="1" +// CHECK-IR: attributes #[[SGSizeAttr2]] = { {{.*}}"sycl-sub-group-size"="1" +// CHECK-IR: attributes #[[SGSizeAttr3]] = { {{.*}}"sycl-sub-group-size"="1" diff --git a/sycl/test/extensions/properties/properties_kernel_work_group_size.cpp b/sycl/test/extensions/properties/properties_kernel_work_group_size.cpp new file mode 100644 index 000000000000..336b0825cfc1 --- /dev/null +++ b/sycl/test/extensions/properties/properties_kernel_work_group_size.cpp @@ -0,0 +1,299 @@ +// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR +// RUN: %clangxx -fsycl -Xclang -verify %s +// expected-no-diagnostics + +#include + +int main() { + sycl::queue Q; + sycl::event Ev; + + sycl::range<1> R1{1}; + sycl::range<2> R2{1, 2}; + sycl::range<3> R3{1, 2, 3}; + + sycl::nd_range<1> NDR1{R1, R1}; + sycl::nd_range<2> NDR2{R2, R2}; + sycl::nd_range<3> NDR3{R3, R3}; + + constexpr auto Props1 = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size<1>}; + constexpr auto Props2 = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size<1, 2>}; + constexpr auto Props3 = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size<1, 2, 3>}; + + auto Redu1 = sycl::reduction(nullptr, sycl::plus()); + auto Redu2 = sycl::reduction(nullptr, sycl::multiplies()); + + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel0(){{.*}} #[[WGSizeAttr1:[0-9]+]] + Q.single_task(Props1, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel1(){{.*}} #[[WGSizeAttr1]] + Q.single_task(Ev, Props1, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel2(){{.*}} #[[WGSizeAttr1]] + Q.single_task({Ev}, Props1, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel3(){{.*}} #[[WGSizeAttr2:[0-9]+]] + Q.single_task(Props2, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel4(){{.*}} #[[WGSizeAttr2]] + Q.single_task(Ev, Props2, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel5(){{.*}} #[[WGSizeAttr2]] + Q.single_task({Ev}, Props2, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel6(){{.*}} #[[WGSizeAttr3:[0-9]+]] + Q.single_task(Props3, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel7(){{.*}} #[[WGSizeAttr3]] + Q.single_task(Ev, Props3, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel8(){{.*}} #[[WGSizeAttr3]] + Q.single_task({Ev}, Props3, []() {}); + + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel9(){{.*}} #[[WGSizeAttr4:[0-9]+]] + Q.parallel_for(R1, Props1, [](sycl::id<1>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel10(){{.*}} #[[WGSizeAttr4]] + Q.parallel_for(R1, Ev, Props1, [](sycl::id<1>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel11(){{.*}} #[[WGSizeAttr4]] + Q.parallel_for(R1, {Ev}, Props1, [](sycl::id<1>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel12(){{.*}} #[[WGSizeAttr5:[0-9]+]] + Q.parallel_for(R2, Props2, [](sycl::id<2>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel13(){{.*}} #[[WGSizeAttr5]] + Q.parallel_for(R2, Ev, Props2, [](sycl::id<2>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel14(){{.*}} #[[WGSizeAttr5]] + Q.parallel_for(R2, {Ev}, Props2, [](sycl::id<2>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel15(){{.*}} #[[WGSizeAttr6:[0-9]+]] + Q.parallel_for(R3, Props3, [](sycl::id<3>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel16(){{.*}} #[[WGSizeAttr6]] + Q.parallel_for(R3, Ev, Props3, [](sycl::id<3>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel17(){{.*}} #[[WGSizeAttr6]] + Q.parallel_for(R3, {Ev}, Props3, [](sycl::id<3>) {}); + + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel18{{.*}}{{.*}} #[[WGSizeAttr7:[0-9]+]] + Q.parallel_for(R1, Props1, Redu1, + [](sycl::id<1>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel19{{.*}}{{.*}} #[[WGSizeAttr7]] + Q.parallel_for(R1, Ev, Props1, Redu1, + [](sycl::id<1>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel20{{.*}}{{.*}} #[[WGSizeAttr7]] + Q.parallel_for(R1, {Ev}, Props1, Redu1, + [](sycl::id<1>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel21{{.*}}{{.*}} #[[WGSizeAttr8:[0-9]+]] + Q.parallel_for(R2, Props2, Redu1, + [](sycl::id<2>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel22{{.*}}{{.*}} #[[WGSizeAttr8]] + Q.parallel_for(R2, Ev, Props2, Redu1, + [](sycl::id<2>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel23{{.*}}{{.*}} #[[WGSizeAttr8]] + Q.parallel_for(R2, {Ev}, Props2, Redu1, + [](sycl::id<2>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel24{{.*}}{{.*}} #[[WGSizeAttr9:[0-9]+]] + Q.parallel_for(R3, Props3, Redu1, + [](sycl::id<3>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel25{{.*}}{{.*}} #[[WGSizeAttr9]] + Q.parallel_for(R3, Ev, Props3, Redu1, + [](sycl::id<3>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel26{{.*}}{{.*}} #[[WGSizeAttr9]] + Q.parallel_for(R3, {Ev}, Props3, Redu1, + [](sycl::id<3>, auto &) {}); + + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel27(){{.*}} #[[WGSizeAttr4]] + Q.parallel_for(NDR1, Props1, [](sycl::nd_item<1>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel28(){{.*}} #[[WGSizeAttr4]] + Q.parallel_for(NDR1, Ev, Props1, + [](sycl::nd_item<1>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel29(){{.*}} #[[WGSizeAttr4]] + Q.parallel_for(NDR1, {Ev}, Props1, + [](sycl::nd_item<1>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel30(){{.*}} #[[WGSizeAttr5]] + Q.parallel_for(NDR2, Props2, [](sycl::nd_item<2>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel31(){{.*}} #[[WGSizeAttr5]] + Q.parallel_for(NDR2, Ev, Props2, + [](sycl::nd_item<2>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel32(){{.*}} #[[WGSizeAttr5]] + Q.parallel_for(NDR2, {Ev}, Props2, + [](sycl::nd_item<2>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel33(){{.*}} #[[WGSizeAttr6]] + Q.parallel_for(NDR3, Props3, [](sycl::nd_item<3>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel34(){{.*}} #[[WGSizeAttr6]] + Q.parallel_for(NDR3, Ev, Props3, + [](sycl::nd_item<3>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel35(){{.*}} #[[WGSizeAttr6]] + Q.parallel_for(NDR3, {Ev}, Props3, + [](sycl::nd_item<3>) {}); + + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel36{{.*}}{{.*}} #[[WGSizeAttr7]] + Q.parallel_for(NDR1, Props1, Redu1, + [](sycl::nd_item<1>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel37{{.*}}{{.*}} #[[WGSizeAttr7]] + Q.parallel_for(NDR1, Ev, Props1, Redu1, + [](sycl::nd_item<1>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel38{{.*}}{{.*}} #[[WGSizeAttr7]] + Q.parallel_for(NDR1, {Ev}, Props1, Redu1, + [](sycl::nd_item<1>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel39{{.*}}{{.*}} #[[WGSizeAttr8]] + Q.parallel_for(NDR2, Props2, Redu1, + [](sycl::nd_item<2>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel40{{.*}}{{.*}} #[[WGSizeAttr8]] + Q.parallel_for(NDR2, Ev, Props2, Redu1, + [](sycl::nd_item<2>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel41{{.*}}{{.*}} #[[WGSizeAttr8]] + Q.parallel_for(NDR2, {Ev}, Props2, Redu1, + [](sycl::nd_item<2>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel42{{.*}}{{.*}} #[[WGSizeAttr9]] + Q.parallel_for(NDR3, Props3, Redu1, + [](sycl::nd_item<3>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel43{{.*}}{{.*}} #[[WGSizeAttr9]] + Q.parallel_for(NDR3, Ev, Props3, Redu1, + [](sycl::nd_item<3>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel44{{.*}}{{.*}} #[[WGSizeAttr9]] + Q.parallel_for(NDR3, {Ev}, Props3, Redu1, + [](sycl::nd_item<3>, auto &) {}); + + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel45{{.*}}{{.*}} #[[WGSizeAttr7]] + Q.parallel_for(NDR1, Props1, Redu1, Redu2, + [](sycl::nd_item<1>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel46{{.*}}{{.*}} #[[WGSizeAttr7]] + Q.parallel_for(NDR1, Ev, Props1, Redu1, Redu2, + [](sycl::nd_item<1>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel47{{.*}}{{.*}} #[[WGSizeAttr7]] + Q.parallel_for(NDR1, {Ev}, Props1, Redu1, Redu2, + [](sycl::nd_item<1>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel48{{.*}}{{.*}} #[[WGSizeAttr8]] + Q.parallel_for(NDR2, Props2, Redu1, Redu2, + [](sycl::nd_item<2>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel49{{.*}}{{.*}} #[[WGSizeAttr8]] + Q.parallel_for(NDR2, Ev, Props2, Redu1, Redu2, + [](sycl::nd_item<2>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel50{{.*}}{{.*}} #[[WGSizeAttr8]] + Q.parallel_for(NDR2, {Ev}, Props2, Redu1, Redu2, + [](sycl::nd_item<2>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel51{{.*}}{{.*}} #[[WGSizeAttr9]] + Q.parallel_for(NDR3, Props3, Redu1, Redu2, + [](sycl::nd_item<3>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel52{{.*}}{{.*}} #[[WGSizeAttr9]] + Q.parallel_for(NDR3, Ev, Props3, Redu1, Redu2, + [](sycl::nd_item<3>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel53{{.*}}{{.*}} #[[WGSizeAttr9]] + Q.parallel_for(NDR3, {Ev}, Props3, Redu1, Redu2, + [](sycl::nd_item<3>, auto &, auto &) {}); + + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel54(){{.*}} #[[WGSizeAttr1]] + Q.submit([&](sycl::handler &CGH) { + CGH.single_task(Props1, []() {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel55(){{.*}} #[[WGSizeAttr2]] + Q.submit([&](sycl::handler &CGH) { + CGH.single_task(Props2, []() {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel56(){{.*}} #[[WGSizeAttr3]] + Q.submit([&](sycl::handler &CGH) { + CGH.single_task(Props3, []() {}); + }); + + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel57(){{.*}} #[[WGSizeAttr4]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(R1, Props1, [](sycl::id<1>) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel58(){{.*}} #[[WGSizeAttr5]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(R2, Props2, [](sycl::id<2>) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel59(){{.*}} #[[WGSizeAttr6]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(R3, Props3, [](sycl::id<3>) {}); + }); + + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel60{{.*}}{{.*}} #[[WGSizeAttr7]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(R1, Props1, Redu1, + [](sycl::id<1>, auto &) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel61{{.*}}{{.*}} #[[WGSizeAttr8]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(R2, Props2, Redu1, + [](sycl::id<2>, auto &) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel62{{.*}}{{.*}} #[[WGSizeAttr9]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(R3, Props3, Redu1, + [](sycl::id<3>, auto &) {}); + }); + + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel63(){{.*}} #[[WGSizeAttr4]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(NDR1, Props1, + [](sycl::nd_item<1>) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel64(){{.*}} #[[WGSizeAttr5]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(NDR2, Props2, + [](sycl::nd_item<2>) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel65(){{.*}} #[[WGSizeAttr6]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(NDR3, Props3, + [](sycl::nd_item<3>) {}); + }); + + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel66{{.*}}{{.*}} #[[WGSizeAttr7]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(NDR1, Props1, Redu1, + [](sycl::nd_item<1>, auto &) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel67{{.*}}{{.*}} #[[WGSizeAttr8]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(NDR2, Props2, Redu1, + [](sycl::nd_item<2>, auto &) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel68{{.*}}{{.*}} #[[WGSizeAttr9]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(NDR3, Props3, Redu1, + [](sycl::nd_item<3>, auto &) {}); + }); + + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel69{{.*}}{{.*}} #[[WGSizeAttr7]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for( + NDR1, Props1, Redu1, Redu2, [](sycl::nd_item<1>, auto &, auto &) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel70{{.*}}{{.*}} #[[WGSizeAttr8]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for( + NDR2, Props2, Redu1, Redu2, [](sycl::nd_item<2>, auto &, auto &) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeKernel71{{.*}}{{.*}} #[[WGSizeAttr9]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for( + NDR3, Props3, Redu1, Redu2, [](sycl::nd_item<3>, auto &, auto &) {}); + }); + + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel72(){{.*}} #[[WGSizeAttr7]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for_work_group( + R1, Props1, [](sycl::group<1> G) { + G.parallel_for_work_item([&](sycl::h_item<1>) {}); + }); + }); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel73(){{.*}} #[[WGSizeAttr8]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for_work_group( + R2, Props2, [](sycl::group<2> G) { + G.parallel_for_work_item([&](sycl::h_item<2>) {}); + }); + }); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel74(){{.*}} #[[WGSizeAttr9]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for_work_group( + R3, Props3, [](sycl::group<3> G) { + G.parallel_for_work_item([&](sycl::h_item<3>) {}); + }); + }); + + return 0; +} + +// CHECK-IR: attributes #[[WGSizeAttr1]] = { {{.*}}"sycl-work-group-size"="1" +// CHECK-IR: attributes #[[WGSizeAttr2]] = { {{.*}}"sycl-work-group-size"="1,2" +// CHECK-IR: attributes #[[WGSizeAttr3]] = { {{.*}}"sycl-work-group-size"="1,2,3" +// CHECK-IR: attributes #[[WGSizeAttr4]] = { {{.*}}"sycl-work-group-size"="1" +// CHECK-IR: attributes #[[WGSizeAttr5]] = { {{.*}}"sycl-work-group-size"="1,2" +// CHECK-IR: attributes #[[WGSizeAttr6]] = { {{.*}}"sycl-work-group-size"="1,2,3" +// CHECK-IR: attributes #[[WGSizeAttr7]] = { {{.*}}"sycl-work-group-size"="1" +// CHECK-IR: attributes #[[WGSizeAttr8]] = { {{.*}}"sycl-work-group-size"="1,2" +// CHECK-IR: attributes #[[WGSizeAttr9]] = { {{.*}}"sycl-work-group-size"="1,2,3" diff --git a/sycl/test/extensions/properties/properties_kernel_work_group_size_hint.cpp b/sycl/test/extensions/properties/properties_kernel_work_group_size_hint.cpp new file mode 100644 index 000000000000..c82c9c7230fc --- /dev/null +++ b/sycl/test/extensions/properties/properties_kernel_work_group_size_hint.cpp @@ -0,0 +1,308 @@ +// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR +// RUN: %clangxx -fsycl -Xclang -verify %s +// expected-no-diagnostics + +#include + +int main() { + sycl::queue Q; + sycl::event Ev; + + sycl::range<1> R1{1}; + sycl::range<2> R2{1, 2}; + sycl::range<3> R3{1, 2, 3}; + + sycl::nd_range<1> NDR1{R1, R1}; + sycl::nd_range<2> NDR2{R2, R2}; + sycl::nd_range<3> NDR3{R3, R3}; + + constexpr auto Props1 = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size_hint<1>}; + constexpr auto Props2 = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size_hint<1, 2>}; + constexpr auto Props3 = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size_hint<1, 2, 3>}; + + auto Redu1 = sycl::reduction(nullptr, sycl::plus()); + auto Redu2 = sycl::reduction(nullptr, sycl::multiplies()); + + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel0(){{.*}} #[[WGSizeHintAttr1:[0-9]+]] + Q.single_task(Props1, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel1(){{.*}} #[[WGSizeHintAttr1]] + Q.single_task(Ev, Props1, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel2(){{.*}} #[[WGSizeHintAttr1]] + Q.single_task({Ev}, Props1, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel3(){{.*}} #[[WGSizeHintAttr2:[0-9]+]] + Q.single_task(Props2, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel4(){{.*}} #[[WGSizeHintAttr2]] + Q.single_task(Ev, Props2, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel5(){{.*}} #[[WGSizeHintAttr2]] + Q.single_task({Ev}, Props2, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel6(){{.*}} #[[WGSizeHintAttr3:[0-9]+]] + Q.single_task(Props3, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel7(){{.*}} #[[WGSizeHintAttr3]] + Q.single_task(Ev, Props3, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel8(){{.*}} #[[WGSizeHintAttr3]] + Q.single_task({Ev}, Props3, []() {}); + + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel9(){{.*}} #[[WGSizeHintAttr4:[0-9]+]] + Q.parallel_for(R1, Props1, [](sycl::id<1>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel10(){{.*}} #[[WGSizeHintAttr4]] + Q.parallel_for(R1, Ev, Props1, [](sycl::id<1>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel11(){{.*}} #[[WGSizeHintAttr4]] + Q.parallel_for(R1, {Ev}, Props1, + [](sycl::id<1>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel12(){{.*}} #[[WGSizeHintAttr5:[0-9]+]] + Q.parallel_for(R2, Props2, [](sycl::id<2>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel13(){{.*}} #[[WGSizeHintAttr5]] + Q.parallel_for(R2, Ev, Props2, [](sycl::id<2>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel14(){{.*}} #[[WGSizeHintAttr5]] + Q.parallel_for(R2, {Ev}, Props2, + [](sycl::id<2>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel15(){{.*}} #[[WGSizeHintAttr6:[0-9]+]] + Q.parallel_for(R3, Props3, [](sycl::id<3>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel16(){{.*}} #[[WGSizeHintAttr6]] + Q.parallel_for(R3, Ev, Props3, [](sycl::id<3>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel17(){{.*}} #[[WGSizeHintAttr6]] + Q.parallel_for(R3, {Ev}, Props3, + [](sycl::id<3>) {}); + + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel18{{.*}}{{.*}} #[[WGSizeHintAttr7:[0-9]+]] + Q.parallel_for(R1, Props1, Redu1, + [](sycl::id<1>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel19{{.*}}{{.*}} #[[WGSizeHintAttr7]] + Q.parallel_for(R1, Ev, Props1, Redu1, + [](sycl::id<1>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel20{{.*}}{{.*}} #[[WGSizeHintAttr7]] + Q.parallel_for(R1, {Ev}, Props1, Redu1, + [](sycl::id<1>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel21{{.*}}{{.*}} #[[WGSizeHintAttr8:[0-9]+]] + Q.parallel_for(R2, Props2, Redu1, + [](sycl::id<2>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel22{{.*}}{{.*}} #[[WGSizeHintAttr8]] + Q.parallel_for(R2, Ev, Props2, Redu1, + [](sycl::id<2>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel23{{.*}}{{.*}} #[[WGSizeHintAttr8]] + Q.parallel_for(R2, {Ev}, Props2, Redu1, + [](sycl::id<2>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel24{{.*}}{{.*}} #[[WGSizeHintAttr9:[0-9]+]] + Q.parallel_for(R3, Props3, Redu1, + [](sycl::id<3>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel25{{.*}}{{.*}} #[[WGSizeHintAttr9]] + Q.parallel_for(R3, Ev, Props3, Redu1, + [](sycl::id<3>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel26{{.*}}{{.*}} #[[WGSizeHintAttr9]] + Q.parallel_for(R3, {Ev}, Props3, Redu1, + [](sycl::id<3>, auto &) {}); + + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel27(){{.*}} #[[WGSizeHintAttr4]] + Q.parallel_for(NDR1, Props1, + [](sycl::nd_item<1>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel28(){{.*}} #[[WGSizeHintAttr4]] + Q.parallel_for(NDR1, Ev, Props1, + [](sycl::nd_item<1>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel29(){{.*}} #[[WGSizeHintAttr4]] + Q.parallel_for(NDR1, {Ev}, Props1, + [](sycl::nd_item<1>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel30(){{.*}} #[[WGSizeHintAttr5]] + Q.parallel_for(NDR2, Props2, + [](sycl::nd_item<2>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel31(){{.*}} #[[WGSizeHintAttr5]] + Q.parallel_for(NDR2, Ev, Props2, + [](sycl::nd_item<2>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel32(){{.*}} #[[WGSizeHintAttr5]] + Q.parallel_for(NDR2, {Ev}, Props2, + [](sycl::nd_item<2>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel33(){{.*}} #[[WGSizeHintAttr6]] + Q.parallel_for(NDR3, Props3, + [](sycl::nd_item<3>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel34(){{.*}} #[[WGSizeHintAttr6]] + Q.parallel_for(NDR3, Ev, Props3, + [](sycl::nd_item<3>) {}); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel35(){{.*}} #[[WGSizeHintAttr6]] + Q.parallel_for(NDR3, {Ev}, Props3, + [](sycl::nd_item<3>) {}); + + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel36{{.*}}{{.*}} #[[WGSizeHintAttr7]] + Q.parallel_for(NDR1, Props1, Redu1, + [](sycl::nd_item<1>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel37{{.*}}{{.*}} #[[WGSizeHintAttr7]] + Q.parallel_for(NDR1, Ev, Props1, Redu1, + [](sycl::nd_item<1>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel38{{.*}}{{.*}} #[[WGSizeHintAttr7]] + Q.parallel_for(NDR1, {Ev}, Props1, Redu1, + [](sycl::nd_item<1>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel39{{.*}}{{.*}} #[[WGSizeHintAttr8]] + Q.parallel_for(NDR2, Props2, Redu1, + [](sycl::nd_item<2>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel40{{.*}}{{.*}} #[[WGSizeHintAttr8]] + Q.parallel_for(NDR2, Ev, Props2, Redu1, + [](sycl::nd_item<2>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel41{{.*}}{{.*}} #[[WGSizeHintAttr8]] + Q.parallel_for(NDR2, {Ev}, Props2, Redu1, + [](sycl::nd_item<2>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel42{{.*}}{{.*}} #[[WGSizeHintAttr9]] + Q.parallel_for(NDR3, Props3, Redu1, + [](sycl::nd_item<3>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel43{{.*}}{{.*}} #[[WGSizeHintAttr9]] + Q.parallel_for(NDR3, Ev, Props3, Redu1, + [](sycl::nd_item<3>, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel44{{.*}}{{.*}} #[[WGSizeHintAttr9]] + Q.parallel_for(NDR3, {Ev}, Props3, Redu1, + [](sycl::nd_item<3>, auto &) {}); + + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel45{{.*}}{{.*}} #[[WGSizeHintAttr7]] + Q.parallel_for( + NDR1, Props1, Redu1, Redu2, [](sycl::nd_item<1>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel46{{.*}}{{.*}} #[[WGSizeHintAttr7]] + Q.parallel_for( + NDR1, Ev, Props1, Redu1, Redu2, [](sycl::nd_item<1>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel47{{.*}}{{.*}} #[[WGSizeHintAttr7]] + Q.parallel_for( + NDR1, {Ev}, Props1, Redu1, Redu2, + [](sycl::nd_item<1>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel48{{.*}}{{.*}} #[[WGSizeHintAttr8]] + Q.parallel_for( + NDR2, Props2, Redu1, Redu2, [](sycl::nd_item<2>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel49{{.*}}{{.*}} #[[WGSizeHintAttr8]] + Q.parallel_for( + NDR2, Ev, Props2, Redu1, Redu2, [](sycl::nd_item<2>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel50{{.*}}{{.*}} #[[WGSizeHintAttr8]] + Q.parallel_for( + NDR2, {Ev}, Props2, Redu1, Redu2, + [](sycl::nd_item<2>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel51{{.*}}{{.*}} #[[WGSizeHintAttr9]] + Q.parallel_for( + NDR3, Props3, Redu1, Redu2, [](sycl::nd_item<3>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel52{{.*}}{{.*}} #[[WGSizeHintAttr9]] + Q.parallel_for( + NDR3, Ev, Props3, Redu1, Redu2, [](sycl::nd_item<3>, auto &, auto &) {}); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel53{{.*}}{{.*}} #[[WGSizeHintAttr9]] + Q.parallel_for( + NDR3, {Ev}, Props3, Redu1, Redu2, + [](sycl::nd_item<3>, auto &, auto &) {}); + + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel54(){{.*}} #[[WGSizeHintAttr1]] + Q.submit([&](sycl::handler &CGH) { + CGH.single_task(Props1, []() {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel55(){{.*}} #[[WGSizeHintAttr2]] + Q.submit([&](sycl::handler &CGH) { + CGH.single_task(Props2, []() {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel56(){{.*}} #[[WGSizeHintAttr3]] + Q.submit([&](sycl::handler &CGH) { + CGH.single_task(Props3, []() {}); + }); + + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel57(){{.*}} #[[WGSizeHintAttr4]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(R1, Props1, [](sycl::id<1>) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel58(){{.*}} #[[WGSizeHintAttr5]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(R2, Props2, [](sycl::id<2>) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel59(){{.*}} #[[WGSizeHintAttr6]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(R3, Props3, [](sycl::id<3>) {}); + }); + + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel60{{.*}}{{.*}} #[[WGSizeHintAttr7]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(R1, Props1, Redu1, + [](sycl::id<1>, auto &) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel61{{.*}}{{.*}} #[[WGSizeHintAttr8]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(R2, Props2, Redu1, + [](sycl::id<2>, auto &) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel62{{.*}}{{.*}} #[[WGSizeHintAttr9]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(R3, Props3, Redu1, + [](sycl::id<3>, auto &) {}); + }); + + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel63(){{.*}} #[[WGSizeHintAttr4]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(NDR1, Props1, + [](sycl::nd_item<1>) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel64(){{.*}} #[[WGSizeHintAttr5]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(NDR2, Props2, + [](sycl::nd_item<2>) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel65(){{.*}} #[[WGSizeHintAttr6]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(NDR3, Props3, + [](sycl::nd_item<3>) {}); + }); + + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel66{{.*}}{{.*}} #[[WGSizeHintAttr7]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(NDR1, Props1, Redu1, + [](sycl::nd_item<1>, auto &) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel67{{.*}}{{.*}} #[[WGSizeHintAttr8]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(NDR2, Props2, Redu1, + [](sycl::nd_item<2>, auto &) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel68{{.*}}{{.*}} #[[WGSizeHintAttr9]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for(NDR3, Props3, Redu1, + [](sycl::nd_item<3>, auto &) {}); + }); + + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel69{{.*}}{{.*}} #[[WGSizeHintAttr7]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for( + NDR1, Props1, Redu1, Redu2, [](sycl::nd_item<1>, auto &, auto &) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel70{{.*}}{{.*}} #[[WGSizeHintAttr8]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for( + NDR2, Props2, Redu1, Redu2, [](sycl::nd_item<2>, auto &, auto &) {}); + }); + // CHECK-IR: spir_kernel void @{{.*}}main_krn{{.*}}WGSizeHintKernel71{{.*}}{{.*}} #[[WGSizeHintAttr9]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for( + NDR3, Props3, Redu1, Redu2, [](sycl::nd_item<3>, auto &, auto &) {}); + }); + + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel72(){{.*}} #[[WGSizeHintAttr7]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for_work_group( + R1, Props1, [](sycl::group<1> G) { + G.parallel_for_work_item([&](sycl::h_item<1>) {}); + }); + }); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel73(){{.*}} #[[WGSizeHintAttr8]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for_work_group( + R2, Props2, [](sycl::group<2> G) { + G.parallel_for_work_item([&](sycl::h_item<2>) {}); + }); + }); + // CHECK-IR: spir_kernel void @{{.*}}WGSizeHintKernel74(){{.*}} #[[WGSizeHintAttr9]] + Q.submit([&](sycl::handler &CGH) { + CGH.parallel_for_work_group( + R3, Props3, [](sycl::group<3> G) { + G.parallel_for_work_item([&](sycl::h_item<3>) {}); + }); + }); + + return 0; +} + +// CHECK-IR: attributes #[[WGSizeHintAttr1]] = { {{.*}}"sycl-work-group-size-hint"="1" +// CHECK-IR: attributes #[[WGSizeHintAttr2]] = { {{.*}}"sycl-work-group-size-hint"="1,2" +// CHECK-IR: attributes #[[WGSizeHintAttr3]] = { {{.*}}"sycl-work-group-size-hint"="1,2,3" +// CHECK-IR: attributes #[[WGSizeHintAttr4]] = { {{.*}}"sycl-work-group-size-hint"="1" +// CHECK-IR: attributes #[[WGSizeHintAttr5]] = { {{.*}}"sycl-work-group-size-hint"="1,2" +// CHECK-IR: attributes #[[WGSizeHintAttr6]] = { {{.*}}"sycl-work-group-size-hint"="1,2,3" +// CHECK-IR: attributes #[[WGSizeHintAttr7]] = { {{.*}}"sycl-work-group-size-hint"="1" +// CHECK-IR: attributes #[[WGSizeHintAttr8]] = { {{.*}}"sycl-work-group-size-hint"="1,2" +// CHECK-IR: attributes #[[WGSizeHintAttr9]] = { {{.*}}"sycl-work-group-size-hint"="1,2,3"