From d59bfddca8c9863daa29f8259b79e1e9ce4f2f69 Mon Sep 17 00:00:00 2001 From: Ruslan Arutyunyan Date: Fri, 19 Jun 2020 15:51:27 +0300 Subject: [PATCH 1/2] [SYCL] Implement braced-init-list or a number as range for queue::parallel_for Modification: Make three different overloads for queue::parallel for to support range implicit conversion from number or braced-init-list Add tests for queue::parallel_for calls with generic lambda Signed-off-by: Ruslan Arutyunyan --- sycl/include/CL/sycl/queue.hpp | 71 +++++++++++++--- sycl/test/basic_tests/{ => queue}/queue.cpp | 0 .../queue/queue_parallel_for_generic.cpp | 72 +++++++++++++++++ .../queue/queue_parallel_for_interface.cpp | 81 +++++++++++++++++++ 4 files changed, 215 insertions(+), 9 deletions(-) rename sycl/test/basic_tests/{ => queue}/queue.cpp (100%) create mode 100644 sycl/test/basic_tests/queue/queue_parallel_for_generic.cpp create mode 100644 sycl/test/basic_tests/queue/queue_parallel_for_interface.cpp diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 77ce1fab86474..2e65114c87b34 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -432,10 +432,9 @@ class __SYCL_EXPORT queue { /// \param NumWorkItems is a range that specifies the work space of the kernel /// \param KernelFunc is the Kernel functor or lambda /// \param CodeLoc contains the code location of user code - template + template event parallel_for( - range NumWorkItems, KernelType KernelFunc + range<1> NumWorkItems, KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() @@ -444,12 +443,47 @@ class __SYCL_EXPORT queue { #ifdef DISABLE_SYCL_INSTRUMENTATION_METADATA const detail::code_location &CodeLoc = {}; #endif - return submit( - [&](handler &CGH) { - CGH.template parallel_for(NumWorkItems, - KernelFunc); - }, - CodeLoc); + return parallel_for_impl(NumWorkItems, KernelFunc, CodeLoc); + } + + /// parallel_for version with a kernel represented as a lambda + range that + /// specifies global size only. + /// + /// \param NumWorkItems is a range that specifies the work space of the kernel + /// \param KernelFunc is the Kernel functor or lambda + /// \param CodeLoc contains the code location of user code + template + event parallel_for( + range<2> NumWorkItems, KernelType KernelFunc +#ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA + , + const detail::code_location &CodeLoc = detail::code_location::current() +#endif + ) { +#ifdef DISABLE_SYCL_INSTRUMENTATION_METADATA + const detail::code_location &CodeLoc = {}; +#endif + return parallel_for_impl(NumWorkItems, KernelFunc, CodeLoc); + } + + /// parallel_for version with a kernel represented as a lambda + range that + /// specifies global size only. + /// + /// \param NumWorkItems is a range that specifies the work space of the kernel + /// \param KernelFunc is the Kernel functor or lambda + /// \param CodeLoc contains the code location of user code + template + event parallel_for( + range<3> NumWorkItems, KernelType KernelFunc +#ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA + , + const detail::code_location &CodeLoc = detail::code_location::current() +#endif + ) { +#ifdef DISABLE_SYCL_INSTRUMENTATION_METADATA + const detail::code_location &CodeLoc = {}; +#endif + return parallel_for_impl(NumWorkItems, KernelFunc, CodeLoc); } /// parallel_for version with a kernel represented as a lambda + range that @@ -716,6 +750,25 @@ class __SYCL_EXPORT queue { /// A template-free version of submit. event submit_impl(function_class CGH, queue secondQueue, const detail::code_location &CodeLoc); + + /// parallel_for_impl with a kernel represented as a lambda + range that + /// specifies global size only. + /// + /// \param NumWorkItems is a range that specifies the work space of the kernel + /// \param KernelFunc is the Kernel functor or lambda + /// \param CodeLoc contains the code location of user code + template + event parallel_for_impl( + range NumWorkItems, KernelType KernelFunc, + const detail::code_location &CodeLoc = detail::code_location::current()) { + return submit( + [&](handler &CGH) { + CGH.template parallel_for(NumWorkItems, + KernelFunc); + }, + CodeLoc); + } }; } // namespace sycl diff --git a/sycl/test/basic_tests/queue.cpp b/sycl/test/basic_tests/queue/queue.cpp similarity index 100% rename from sycl/test/basic_tests/queue.cpp rename to sycl/test/basic_tests/queue/queue.cpp diff --git a/sycl/test/basic_tests/queue/queue_parallel_for_generic.cpp b/sycl/test/basic_tests/queue/queue_parallel_for_generic.cpp new file mode 100644 index 0000000000000..5ce67bd01ac4a --- /dev/null +++ b/sycl/test/basic_tests/queue/queue_parallel_for_generic.cpp @@ -0,0 +1,72 @@ +// UNSUPPORTED: cuda +// CUDA does not support unnamed lambdas. +// +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-unnamed-lambda %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +//==- queue_parallel_for_generic.cpp - SYCL queue parallel_for generic lambda -=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===------------------------------------------------------------------------===// + +#include +#include +#include + +int main() { + sycl::queue q{}; + auto dev = q.get_device(); + auto ctx = q.get_context(); + constexpr int N = 8; + + if (dev.get_info()) { + auto A = static_cast(sycl::malloc_shared(N * sizeof(int), dev, ctx)); + + for (int i = 0; i < N; i++) { + A[i] = 1; + } + + q.parallel_for(N, [=](auto i) { + static_assert(std::is_same>::value, + "lambda arg type is unexpected"); + A[i]++; + }); + + q.parallel_for({N}, [=](auto i) { + static_assert(std::is_same>::value, + "lambda arg type is unexpected"); + A[i]++; + }); + + sycl::id<1> offset(0); + q.parallel_for(sycl::range<1>{N}, offset, [=](auto i) { + static_assert(std::is_same>::value, + "lambda arg type is unexpected"); + A[i]++; + }); + + sycl::nd_range<1> NDR(sycl::range<1>{N}, sycl::range<1>{2}); + q.parallel_for(NDR, [=](auto nd_i) { + static_assert(std::is_same>::value, + "lambda arg type is unexpected"); + auto i = nd_i.get_global_id(0); + A[i]++; + }); + + q.wait(); + + for (int i = 0; i < N; i++) { + if (A[i] != 5) + return 1; + } + sycl::free(A, ctx); + } + + return 0; +} diff --git a/sycl/test/basic_tests/queue/queue_parallel_for_interface.cpp b/sycl/test/basic_tests/queue/queue_parallel_for_interface.cpp new file mode 100644 index 0000000000000..3bc2b64306534 --- /dev/null +++ b/sycl/test/basic_tests/queue/queue_parallel_for_interface.cpp @@ -0,0 +1,81 @@ +// UNSUPPORTED: cuda +// CUDA does not support unnamed lambdas. +// +// RUN: %clangxx -fsycl -fsyntax-only -fsycl-unnamed-lambda %s -o %t.out + +//==- queue_parallel_for_generic.cpp - SYCL queue parallel_for interface test -=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===------------------------------------------------------------------------===// + +#include +#include +#include + +template +void test_range_impl(sycl::queue q, std::index_sequence, + sycl::range *) { + constexpr auto dims = sizeof...(Is); + + q.parallel_for(sycl::range{Is...}, [=](auto i) { + static_assert(std::is_same>::value, + "lambda arg type is unexpected"); + }); +} + +template +void test_range_impl(sycl::queue q, std::index_sequence, + sycl::nd_range *) { + constexpr auto dims = sizeof...(Is); + + sycl::nd_range ndr{sycl::range{Is...}, sycl::range{Is...}}; + q.parallel_for(ndr, [=](auto i) { + static_assert(std::is_same>::value, + "lambda arg type is unexpected"); + }); +} + +template