[SYCL] Implement braced-init-list or a number as range for queue::par…

…allel_for (#1931) Modification - Make three overloads for `queue::parallel_for` to support `range` implicit conversion from number or `braced-init-list` - Add tests for `queue::parallel_for` calls with generic lambda Implement the following `queue` extension - https://github.com/intel/llvm/tree/sycl/sycl/doc/extensions/QueueShortcuts/ Signed-off-by: Ruslan Arutyunyan <ruslan.arutyunyan@intel.com>
intel · Jun 23, 2020 · 17299ee · 17299ee
1 parent 58fc414
commit 17299ee
Show file tree

Hide file tree

Showing 4 changed files with 214 additions and 9 deletions.
diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp
@@ -432,10 +432,9 @@ class __SYCL_EXPORT queue {
   /// \param NumWorkItems is a range that specifies the work space of the kernel
   /// \param KernelFunc is the Kernel functor or lambda
   /// \param CodeLoc contains the code location of user code
-  template <typename KernelName = detail::auto_name, typename KernelType,
-            int Dims>
+  template <typename KernelName = detail::auto_name, typename KernelType>
   event parallel_for(
-      range<Dims> NumWorkItems, KernelType KernelFunc
+      range<1> NumWorkItems, KernelType KernelFunc
 #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA
       ,
       const detail::code_location &CodeLoc = detail::code_location::current()
@@ -444,12 +443,47 @@ class __SYCL_EXPORT queue {
 #ifdef DISABLE_SYCL_INSTRUMENTATION_METADATA
     const detail::code_location &CodeLoc = {};
 #endif
-    return submit(
-        [&](handler &CGH) {
-          CGH.template parallel_for<KernelName, KernelType>(NumWorkItems,
-                                                            KernelFunc);
-        },
-        CodeLoc);
+    return parallel_for_impl<KernelName>(NumWorkItems, KernelFunc, CodeLoc);
+  }
+
+  /// parallel_for version with a kernel represented as a lambda + range that
+  /// specifies global size only.
+  ///
+  /// \param NumWorkItems is a range that specifies the work space of the kernel
+  /// \param KernelFunc is the Kernel functor or lambda
+  /// \param CodeLoc contains the code location of user code
+  template <typename KernelName = detail::auto_name, typename KernelType>
+  event parallel_for(
+      range<2> NumWorkItems, KernelType KernelFunc
+#ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA
+      ,
+      const detail::code_location &CodeLoc = detail::code_location::current()
+#endif
+  ) {
+#ifdef DISABLE_SYCL_INSTRUMENTATION_METADATA
+    const detail::code_location &CodeLoc = {};
+#endif
+    return parallel_for_impl<KernelName>(NumWorkItems, KernelFunc, CodeLoc);
+  }
+
+  /// parallel_for version with a kernel represented as a lambda + range that
+  /// specifies global size only.
+  ///
+  /// \param NumWorkItems is a range that specifies the work space of the kernel
+  /// \param KernelFunc is the Kernel functor or lambda
+  /// \param CodeLoc contains the code location of user code
+  template <typename KernelName = detail::auto_name, typename KernelType>
+  event parallel_for(
+      range<3> NumWorkItems, KernelType KernelFunc
+#ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA
+      ,
+      const detail::code_location &CodeLoc = detail::code_location::current()
+#endif
+  ) {
+#ifdef DISABLE_SYCL_INSTRUMENTATION_METADATA
+    const detail::code_location &CodeLoc = {};
+#endif
+    return parallel_for_impl<KernelName>(NumWorkItems, KernelFunc, CodeLoc);
   }
 
   /// parallel_for version with a kernel represented as a lambda + range that
@@ -716,6 +750,25 @@ class __SYCL_EXPORT queue {
   /// A template-free version of submit.
   event submit_impl(function_class<void(handler &)> CGH, queue secondQueue,
                     const detail::code_location &CodeLoc);
+
+  /// parallel_for_impl with a kernel represented as a lambda + range that
+  /// specifies global size only.
+  ///
+  /// \param NumWorkItems is a range that specifies the work space of the kernel
+  /// \param KernelFunc is the Kernel functor or lambda
+  /// \param CodeLoc contains the code location of user code
+  template <typename KernelName = detail::auto_name, typename KernelType,
+            int Dims>
+  event parallel_for_impl(
+      range<Dims> NumWorkItems, KernelType KernelFunc,
+      const detail::code_location &CodeLoc = detail::code_location::current()) {
+    return submit(
+        [&](handler &CGH) {
+          CGH.template parallel_for<KernelName, KernelType>(NumWorkItems,
+                                                            KernelFunc);
+        },
+        CodeLoc);
+  }
 };
 
 } // namespace sycl

diff --git a/sycl/test/basic_tests/queue.cpp → sycl/test/basic_tests/queue/queue.cpp b/sycl/test/basic_tests/queue.cpp → sycl/test/basic_tests/queue/queue.cpp
diff --git a/sycl/test/basic_tests/queue/queue_parallel_for_generic.cpp b/sycl/test/basic_tests/queue/queue_parallel_for_generic.cpp
@@ -0,0 +1,74 @@
+// XFAIL: cuda
+// piextUSM*Alloc functions for CUDA are not behaving as described in
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
+// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
+//
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+
+//==- queue_parallel_for_generic.cpp - SYCL queue parallel_for generic lambda -=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===------------------------------------------------------------------------===//
+
+#include <CL/sycl.hpp>
+#include <iostream>
+#include <type_traits>
+
+int main() {
+  sycl::queue q{};
+  auto dev = q.get_device();
+  auto ctx = q.get_context();
+  constexpr int N = 8;
+
+  if (!dev.get_info<sycl::info::device::usm_shared_allocations>()) {
+    return 0;
+  }
+
+  auto A = static_cast<int *>(sycl::malloc_shared(N * sizeof(int), dev, ctx));
+
+  for (int i = 0; i < N; i++) {
+    A[i] = 1;
+  }
+
+  q.parallel_for<class Bar>(N, [=](auto i) {
+    static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
+                  "lambda arg type is unexpected");
+    A[i]++;
+  });
+
+  q.parallel_for<class Foo>({N}, [=](auto i) {
+    static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
+                  "lambda arg type is unexpected");
+    A[i]++;
+  });
+
+  sycl::id<1> offset(0);
+  q.parallel_for<class Baz>(sycl::range<1>{N}, offset, [=](auto i) {
+    static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
+                  "lambda arg type is unexpected");
+    A[i]++;
+  });
+
+  sycl::nd_range<1> NDR(sycl::range<1>{N}, sycl::range<1>{2});
+  q.parallel_for<class NDFoo>(NDR, [=](auto nd_i) {
+    static_assert(std::is_same<decltype(nd_i), sycl::nd_item<1>>::value,
+                  "lambda arg type is unexpected");
+    auto i = nd_i.get_global_id(0);
+    A[i]++;
+  });
+
+  q.wait();
+
+  for (int i = 0; i < N; i++) {
+    if (A[i] != 5)
+      return 1;
+  }
+  sycl::free(A, ctx);
+}
diff --git a/sycl/test/basic_tests/queue/queue_parallel_for_interface.cpp b/sycl/test/basic_tests/queue/queue_parallel_for_interface.cpp
@@ -0,0 +1,78 @@
+// RUN: %clangxx -fsycl -fsyntax-only %s -o %t.out
+
+//==- queue_parallel_for_generic.cpp - SYCL queue parallel_for interface test -=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===------------------------------------------------------------------------===//
+
+#include <CL/sycl.hpp>
+#include <iostream>
+#include <type_traits>
+
+template <typename KernelName, std::size_t... Is>
+void test_range_impl(sycl::queue q, std::index_sequence<Is...>,
+                     sycl::range<sizeof...(Is)> *) {
+  constexpr auto dims = sizeof...(Is);
+
+  q.parallel_for<KernelName>(sycl::range<dims>{Is...}, [=](auto i) {
+    static_assert(std::is_same<decltype(i), sycl::item<dims>>::value,
+                  "lambda arg type is unexpected");
+  });
+}
+
+template <typename KernelName, std::size_t... Is>
+void test_range_impl(sycl::queue q, std::index_sequence<Is...>,
+                     sycl::nd_range<sizeof...(Is)> *) {
+  constexpr auto dims = sizeof...(Is);
+
+  sycl::nd_range<dims> ndr{sycl::range<dims>{Is...}, sycl::range<dims>{Is...}};
+  q.parallel_for<KernelName>(ndr, [=](auto i) {
+    static_assert(std::is_same<decltype(i), sycl::nd_item<dims>>::value,
+                  "lambda arg type is unexpected");
+  });
+}
+
+template <typename KernelName, template <int> class Range, std::size_t Dims>
+void test_range(sycl::queue q) {
+  test_range_impl<KernelName>(q, std::make_index_sequence<Dims>{},
+                              static_cast<Range<Dims> *>(nullptr));
+}
+
+void test_number_braced_init_list(sycl::queue q) {
+  constexpr auto n = 1;
+  q.parallel_for<class Number>(n, [=](auto i) {
+    static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
+                  "lambda arg type is unexpected");
+  });
+
+  q.parallel_for<class BracedInitList1>({n}, [=](auto i) {
+    static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
+                  "lambda arg type is unexpected");
+  });
+
+  q.parallel_for<class BracedInitList2>({n, n}, [=](auto i) {
+    static_assert(std::is_same<decltype(i), sycl::item<2>>::value,
+                  "lambda arg type is unexpected");
+  });
+
+  q.parallel_for<class BracedInitList3>({n, n, n}, [=](auto i) {
+    static_assert(std::is_same<decltype(i), sycl::item<3>>::value,
+                  "lambda arg type is unexpected");
+  });
+}
+
+int main() {
+  sycl::queue q{};
+
+  test_range<class test_range1, sycl::range, 1>(q);
+  test_range<class test_range2, sycl::range, 2>(q);
+  test_range<class test_range3, sycl::range, 3>(q);
+  test_range<class test_nd_range1, sycl::nd_range, 1>(q);
+  test_range<class test_nd_range2, sycl::nd_range, 2>(q);
+  test_range<class test_nd_range3, sycl::nd_range, 3>(q);
+
+  test_number_braced_init_list(q);
+}