@@ -140,17 +140,10 @@ template <typename... Ts> ReduTupleT<Ts...> makeReduTupleT(Ts... Elements) {
140140 return sycl::detail::make_tuple (Elements...);
141141}
142142
143- #ifdef __INTEL_PREVIEW_BREAKING_CHANGES
144- __SYCL_EXPORT size_t reduGetMaxWGSize (const std::shared_ptr<queue_impl> &Queue,
143+ __SYCL_EXPORT size_t reduGetMaxWGSize (handler &cgh,
145144 size_t LocalMemBytesPerWorkItem);
146- __SYCL_EXPORT size_t reduGetPreferredWGSize (
147- const std::shared_ptr<queue_impl> &Queue, size_t LocalMemBytesPerWorkItem);
148- #else
149- __SYCL_EXPORT size_t reduGetMaxWGSize (std::shared_ptr<queue_impl> Queue,
150- size_t LocalMemBytesPerWorkItem);
151- __SYCL_EXPORT size_t reduGetPreferredWGSize (std::shared_ptr<queue_impl> &Queue,
145+ __SYCL_EXPORT size_t reduGetPreferredWGSize (handler &cgh,
152146 size_t LocalMemBytesPerWorkItem);
153- #endif
154147__SYCL_EXPORT size_t reduComputeWGSize (size_t NWorkItems, size_t MaxWGSize,
155148 size_t &NWorkGroups);
156149
@@ -1224,15 +1217,12 @@ template <>
12241217struct NDRangeReduction <reduction::strategy::local_atomic_and_atomic_cross_wg> {
12251218 template <typename KernelName, int Dims, typename PropertiesT,
12261219 typename KernelType, typename Reduction>
1227- static void run (handler &CGH,
1228- const std::shared_ptr<detail::queue_impl> &Queue,
1229- nd_range<Dims> NDRange, PropertiesT &Properties,
1220+ static void run (handler &CGH, nd_range<Dims> NDRange, PropertiesT &Properties,
12301221 Reduction &Redu, KernelType &KernelFunc) {
12311222 static_assert (Reduction::has_identity,
12321223 " Identityless reductions are not supported by the "
12331224 " local_atomic_and_atomic_cross_wg strategy." );
12341225
1235- std::ignore = Queue;
12361226 using Name = __sycl_reduction_kernel<
12371227 reduction::MainKrn, KernelName,
12381228 reduction::strategy::local_atomic_and_atomic_cross_wg>;
@@ -1276,15 +1266,12 @@ struct NDRangeReduction<
12761266 reduction::strategy::group_reduce_and_last_wg_detection> {
12771267 template <typename KernelName, int Dims, typename PropertiesT,
12781268 typename KernelType, typename Reduction>
1279- static void run (handler &CGH,
1280- const std::shared_ptr<detail::queue_impl> &Queue,
1281- nd_range<Dims> NDRange, PropertiesT &Properties,
1269+ static void run (handler &CGH, nd_range<Dims> NDRange, PropertiesT &Properties,
12821270 Reduction &Redu, KernelType &KernelFunc) {
12831271 static_assert (Reduction::has_identity,
12841272 " Identityless reductions are not supported by the "
12851273 " group_reduce_and_last_wg_detection strategy." );
12861274
1287- std::ignore = Queue;
12881275 size_t NElements = Reduction::num_elements;
12891276 size_t WGSize = NDRange.get_local_range ().size ();
12901277 size_t NWorkGroups = NDRange.get_group_range ().size ();
@@ -1476,9 +1463,7 @@ void doTreeReductionOnTuple(size_t WorkSize, size_t LID,
14761463template <> struct NDRangeReduction <reduction::strategy::range_basic> {
14771464 template <typename KernelName, int Dims, typename PropertiesT,
14781465 typename KernelType, typename Reduction>
1479- static void run (handler &CGH,
1480- const std::shared_ptr<detail::queue_impl> &Queue,
1481- nd_range<Dims> NDRange, PropertiesT &Properties,
1466+ static void run (handler &CGH, nd_range<Dims> NDRange, PropertiesT &Properties,
14821467 Reduction &Redu, KernelType &KernelFunc) {
14831468 using reducer_type = typename Reduction::reducer_type;
14841469 using element_type = typename ReducerTraits<reducer_type>::element_type;
@@ -1490,7 +1475,6 @@ template <> struct NDRangeReduction<reduction::strategy::range_basic> {
14901475 constexpr bool UsePartialSumForOutput =
14911476 !Reduction::is_usm && Reduction::has_identity;
14921477
1493- std::ignore = Queue;
14941478 size_t NElements = Reduction::num_elements;
14951479 size_t WGSize = NDRange.get_local_range ().size ();
14961480 size_t NWorkGroups = NDRange.get_group_range ().size ();
@@ -1588,15 +1572,12 @@ template <>
15881572struct NDRangeReduction <reduction::strategy::group_reduce_and_atomic_cross_wg> {
15891573 template <typename KernelName, int Dims, typename PropertiesT,
15901574 typename KernelType, typename Reduction>
1591- static void run (handler &CGH,
1592- const std::shared_ptr<detail::queue_impl> &Queue,
1593- nd_range<Dims> NDRange, PropertiesT &Properties,
1575+ static void run (handler &CGH, nd_range<Dims> NDRange, PropertiesT &Properties,
15941576 Reduction &Redu, KernelType &KernelFunc) {
15951577 static_assert (Reduction::has_identity,
15961578 " Identityless reductions are not supported by the "
15971579 " group_reduce_and_atomic_cross_wg strategy." );
15981580
1599- std::ignore = Queue;
16001581 using Name = __sycl_reduction_kernel<
16011582 reduction::MainKrn, KernelName,
16021583 reduction::strategy::group_reduce_and_atomic_cross_wg>;
@@ -1625,14 +1606,11 @@ struct NDRangeReduction<
16251606 reduction::strategy::local_mem_tree_and_atomic_cross_wg> {
16261607 template <typename KernelName, int Dims, typename PropertiesT,
16271608 typename KernelType, typename Reduction>
1628- static void run (handler &CGH,
1629- const std::shared_ptr<detail::queue_impl> &Queue,
1630- nd_range<Dims> NDRange, PropertiesT &Properties,
1609+ static void run (handler &CGH, nd_range<Dims> NDRange, PropertiesT &Properties,
16311610 Reduction &Redu, KernelType &KernelFunc) {
16321611 using reducer_type = typename Reduction::reducer_type;
16331612 using element_type = typename ReducerTraits<reducer_type>::element_type;
16341613
1635- std::ignore = Queue;
16361614 using Name = __sycl_reduction_kernel<
16371615 reduction::MainKrn, KernelName,
16381616 reduction::strategy::local_mem_tree_and_atomic_cross_wg>;
@@ -1687,9 +1665,7 @@ struct NDRangeReduction<
16871665 reduction::strategy::group_reduce_and_multiple_kernels> {
16881666 template <typename KernelName, int Dims, typename PropertiesT,
16891667 typename KernelType, typename Reduction>
1690- static void run (handler &CGH,
1691- const std::shared_ptr<detail::queue_impl> &Queue,
1692- nd_range<Dims> NDRange, PropertiesT &Properties,
1668+ static void run (handler &CGH, nd_range<Dims> NDRange, PropertiesT &Properties,
16931669 Reduction &Redu, KernelType &KernelFunc) {
16941670 static_assert (Reduction::has_identity,
16951671 " Identityless reductions are not supported by the "
@@ -1708,7 +1684,7 @@ struct NDRangeReduction<
17081684 // TODO: currently the maximal work group size is determined for the given
17091685 // queue/device, while it may be safer to use queries to the kernel compiled
17101686 // for the device.
1711- size_t MaxWGSize = reduGetMaxWGSize (Queue , OneElemSize);
1687+ size_t MaxWGSize = reduGetMaxWGSize (CGH , OneElemSize);
17121688 if (NDRange.get_local_range ().size () > MaxWGSize)
17131689 throw sycl::exception (make_error_code (errc::nd_range),
17141690 " The implementation handling parallel_for with"
@@ -1826,9 +1802,7 @@ struct NDRangeReduction<
18261802template <> struct NDRangeReduction <reduction::strategy::basic> {
18271803 template <typename KernelName, int Dims, typename PropertiesT,
18281804 typename KernelType, typename Reduction>
1829- static void run (handler &CGH,
1830- const std::shared_ptr<detail::queue_impl> &Queue,
1831- nd_range<Dims> NDRange, PropertiesT &Properties,
1805+ static void run (handler &CGH, nd_range<Dims> NDRange, PropertiesT &Properties,
18321806 Reduction &Redu, KernelType &KernelFunc) {
18331807 using element_type = typename Reduction::reducer_element_type;
18341808
@@ -1837,7 +1811,7 @@ template <> struct NDRangeReduction<reduction::strategy::basic> {
18371811 // TODO: currently the maximal work group size is determined for the given
18381812 // queue/device, while it may be safer to use queries to the kernel
18391813 // compiled for the device.
1840- size_t MaxWGSize = reduGetMaxWGSize (Queue , OneElemSize);
1814+ size_t MaxWGSize = reduGetMaxWGSize (CGH , OneElemSize);
18411815 if (NDRange.get_local_range ().size () > MaxWGSize)
18421816 throw sycl::exception (make_error_code (errc::nd_range),
18431817 " The implementation handling parallel_for with"
@@ -2602,9 +2576,8 @@ tuple_select_elements(TupleT Tuple, std::index_sequence<Is...>) {
26022576template <> struct NDRangeReduction <reduction::strategy::multi> {
26032577 template <typename KernelName, int Dims, typename PropertiesT,
26042578 typename ... RestT>
2605- static void
2606- run (handler &CGH, const std::shared_ptr<detail::queue_impl> &Queue,
2607- nd_range<Dims> NDRange, PropertiesT &Properties, RestT... Rest) {
2579+ static void run (handler &CGH, nd_range<Dims> NDRange, PropertiesT &Properties,
2580+ RestT... Rest) {
26082581 std::tuple<RestT...> ArgsTuple (Rest...);
26092582 constexpr size_t NumArgs = sizeof ...(RestT);
26102583 auto KernelFunc = std::get<NumArgs - 1 >(ArgsTuple);
@@ -2615,7 +2588,7 @@ template <> struct NDRangeReduction<reduction::strategy::multi> {
26152588 // TODO: currently the maximal work group size is determined for the given
26162589 // queue/device, while it is safer to use queries to the kernel compiled
26172590 // for the device.
2618- size_t MaxWGSize = reduGetMaxWGSize (Queue , LocalMemPerWorkItem);
2591+ size_t MaxWGSize = reduGetMaxWGSize (CGH , LocalMemPerWorkItem);
26192592 if (NDRange.get_local_range ().size () > MaxWGSize)
26202593 throw sycl::exception (make_error_code (errc::nd_range),
26212594 " The implementation handling parallel_for with"
@@ -2646,13 +2619,10 @@ template <> struct NDRangeReduction<reduction::strategy::auto_select> {
26462619
26472620 template <typename KernelName, int Dims, typename PropertiesT,
26482621 typename KernelType, typename Reduction>
2649- static void run (handler &CGH,
2650- const std::shared_ptr<detail::queue_impl> &Queue,
2651- nd_range<Dims> NDRange, PropertiesT &Properties,
2622+ static void run (handler &CGH, nd_range<Dims> NDRange, PropertiesT &Properties,
26522623 Reduction &Redu, KernelType &KernelFunc) {
26532624 auto Delegate = [&](auto Impl) {
2654- Impl.template run <KernelName>(CGH, Queue, NDRange, Properties, Redu,
2655- KernelFunc);
2625+ Impl.template run <KernelName>(CGH, NDRange, Properties, Redu, KernelFunc);
26562626 };
26572627
26582628 if constexpr (Reduction::has_float64_atomics) {
@@ -2694,10 +2664,9 @@ template <> struct NDRangeReduction<reduction::strategy::auto_select> {
26942664 }
26952665 template <typename KernelName, int Dims, typename PropertiesT,
26962666 typename ... RestT>
2697- static void
2698- run (handler &CGH, const std::shared_ptr<detail::queue_impl> &Queue,
2699- nd_range<Dims> NDRange, PropertiesT &Properties, RestT... Rest) {
2700- return Impl<Strat::multi>::run<KernelName>(CGH, Queue, NDRange, Properties,
2667+ static void run (handler &CGH, nd_range<Dims> NDRange, PropertiesT &Properties,
2668+ RestT... Rest) {
2669+ return Impl<Strat::multi>::run<KernelName>(CGH, NDRange, Properties,
27012670 Rest...);
27022671 }
27032672};
@@ -2706,12 +2675,11 @@ template <typename KernelName, reduction::strategy Strategy, int Dims,
27062675 typename PropertiesT, typename ... RestT>
27072676void reduction_parallel_for (handler &CGH, nd_range<Dims> NDRange,
27082677 PropertiesT Properties, RestT... Rest) {
2709- NDRangeReduction<Strategy>::template run<KernelName>(CGH, CGH. MQueue , NDRange ,
2710- Properties, Rest...);
2678+ NDRangeReduction<Strategy>::template run<KernelName>(CGH, NDRange, Properties ,
2679+ Rest...);
27112680}
27122681
2713- __SYCL_EXPORT uint32_t
2714- reduGetMaxNumConcurrentWorkGroups (std::shared_ptr<queue_impl> Queue);
2682+ __SYCL_EXPORT uint32_t reduGetMaxNumConcurrentWorkGroups (handler &cgh);
27152683
27162684template <typename KernelName, reduction::strategy Strategy, int Dims,
27172685 typename PropertiesT, typename ... RestT>
@@ -2742,13 +2710,13 @@ void reduction_parallel_for(handler &CGH, range<Dims> Range,
27422710#ifdef __SYCL_REDUCTION_NUM_CONCURRENT_WORKGROUPS
27432711 __SYCL_REDUCTION_NUM_CONCURRENT_WORKGROUPS;
27442712#else
2745- reduGetMaxNumConcurrentWorkGroups (CGH. MQueue );
2713+ reduGetMaxNumConcurrentWorkGroups (CGH);
27462714#endif
27472715
27482716 // TODO: currently the preferred work group size is determined for the given
27492717 // queue/device, while it is safer to use queries to the kernel pre-compiled
27502718 // for the device.
2751- size_t PrefWGSize = reduGetPreferredWGSize (CGH. MQueue , OneElemSize);
2719+ size_t PrefWGSize = reduGetPreferredWGSize (CGH, OneElemSize);
27522720
27532721 size_t NWorkItems = Range.size ();
27542722 size_t WGSize = std::min (NWorkItems, PrefWGSize);
0 commit comments