Skip to content

Commit

Permalink
[SYCL] Implement braced-init-list or a number as range for queue::par…
Browse files Browse the repository at this point in the history
…allel_for (#1931)

Modification
- Make three overloads for `queue::parallel_for` to support `range` implicit conversion from number or `braced-init-list`
- Add tests for `queue::parallel_for` calls with generic lambda

Implement the following `queue` extension - https://github.com/intel/llvm/tree/sycl/sycl/doc/extensions/QueueShortcuts/

Signed-off-by: Ruslan Arutyunyan <ruslan.arutyunyan@intel.com>
  • Loading branch information
rarutyun authored Jun 23, 2020
1 parent 58fc414 commit 17299ee
Show file tree
Hide file tree
Showing 4 changed files with 214 additions and 9 deletions.
71 changes: 62 additions & 9 deletions sycl/include/CL/sycl/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -432,10 +432,9 @@ class __SYCL_EXPORT queue {
/// \param NumWorkItems is a range that specifies the work space of the kernel
/// \param KernelFunc is the Kernel functor or lambda
/// \param CodeLoc contains the code location of user code
template <typename KernelName = detail::auto_name, typename KernelType,
int Dims>
template <typename KernelName = detail::auto_name, typename KernelType>
event parallel_for(
range<Dims> NumWorkItems, KernelType KernelFunc
range<1> NumWorkItems, KernelType KernelFunc
#ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA
,
const detail::code_location &CodeLoc = detail::code_location::current()
Expand All @@ -444,12 +443,47 @@ class __SYCL_EXPORT queue {
#ifdef DISABLE_SYCL_INSTRUMENTATION_METADATA
const detail::code_location &CodeLoc = {};
#endif
return submit(
[&](handler &CGH) {
CGH.template parallel_for<KernelName, KernelType>(NumWorkItems,
KernelFunc);
},
CodeLoc);
return parallel_for_impl<KernelName>(NumWorkItems, KernelFunc, CodeLoc);
}

/// parallel_for version with a kernel represented as a lambda + range that
/// specifies global size only.
///
/// \param NumWorkItems is a range that specifies the work space of the kernel
/// \param KernelFunc is the Kernel functor or lambda
/// \param CodeLoc contains the code location of user code
template <typename KernelName = detail::auto_name, typename KernelType>
event parallel_for(
range<2> NumWorkItems, KernelType KernelFunc
#ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA
,
const detail::code_location &CodeLoc = detail::code_location::current()
#endif
) {
#ifdef DISABLE_SYCL_INSTRUMENTATION_METADATA
const detail::code_location &CodeLoc = {};
#endif
return parallel_for_impl<KernelName>(NumWorkItems, KernelFunc, CodeLoc);
}

/// parallel_for version with a kernel represented as a lambda + range that
/// specifies global size only.
///
/// \param NumWorkItems is a range that specifies the work space of the kernel
/// \param KernelFunc is the Kernel functor or lambda
/// \param CodeLoc contains the code location of user code
template <typename KernelName = detail::auto_name, typename KernelType>
event parallel_for(
range<3> NumWorkItems, KernelType KernelFunc
#ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA
,
const detail::code_location &CodeLoc = detail::code_location::current()
#endif
) {
#ifdef DISABLE_SYCL_INSTRUMENTATION_METADATA
const detail::code_location &CodeLoc = {};
#endif
return parallel_for_impl<KernelName>(NumWorkItems, KernelFunc, CodeLoc);
}

/// parallel_for version with a kernel represented as a lambda + range that
Expand Down Expand Up @@ -716,6 +750,25 @@ class __SYCL_EXPORT queue {
/// A template-free version of submit.
event submit_impl(function_class<void(handler &)> CGH, queue secondQueue,
const detail::code_location &CodeLoc);

/// parallel_for_impl with a kernel represented as a lambda + range that
/// specifies global size only.
///
/// \param NumWorkItems is a range that specifies the work space of the kernel
/// \param KernelFunc is the Kernel functor or lambda
/// \param CodeLoc contains the code location of user code
template <typename KernelName = detail::auto_name, typename KernelType,
int Dims>
event parallel_for_impl(
range<Dims> NumWorkItems, KernelType KernelFunc,
const detail::code_location &CodeLoc = detail::code_location::current()) {
return submit(
[&](handler &CGH) {
CGH.template parallel_for<KernelName, KernelType>(NumWorkItems,
KernelFunc);
},
CodeLoc);
}
};

} // namespace sycl
Expand Down
File renamed without changes.
74 changes: 74 additions & 0 deletions sycl/test/basic_tests/queue/queue_parallel_for_generic.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// XFAIL: cuda
// piextUSM*Alloc functions for CUDA are not behaving as described in
// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

//==- queue_parallel_for_generic.cpp - SYCL queue parallel_for generic lambda -=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===------------------------------------------------------------------------===//

#include <CL/sycl.hpp>
#include <iostream>
#include <type_traits>

int main() {
sycl::queue q{};
auto dev = q.get_device();
auto ctx = q.get_context();
constexpr int N = 8;

if (!dev.get_info<sycl::info::device::usm_shared_allocations>()) {
return 0;
}

auto A = static_cast<int *>(sycl::malloc_shared(N * sizeof(int), dev, ctx));

for (int i = 0; i < N; i++) {
A[i] = 1;
}

q.parallel_for<class Bar>(N, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
"lambda arg type is unexpected");
A[i]++;
});

q.parallel_for<class Foo>({N}, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
"lambda arg type is unexpected");
A[i]++;
});

sycl::id<1> offset(0);
q.parallel_for<class Baz>(sycl::range<1>{N}, offset, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
"lambda arg type is unexpected");
A[i]++;
});

sycl::nd_range<1> NDR(sycl::range<1>{N}, sycl::range<1>{2});
q.parallel_for<class NDFoo>(NDR, [=](auto nd_i) {
static_assert(std::is_same<decltype(nd_i), sycl::nd_item<1>>::value,
"lambda arg type is unexpected");
auto i = nd_i.get_global_id(0);
A[i]++;
});

q.wait();

for (int i = 0; i < N; i++) {
if (A[i] != 5)
return 1;
}
sycl::free(A, ctx);
}
78 changes: 78 additions & 0 deletions sycl/test/basic_tests/queue/queue_parallel_for_interface.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// RUN: %clangxx -fsycl -fsyntax-only %s -o %t.out

//==- queue_parallel_for_generic.cpp - SYCL queue parallel_for interface test -=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===------------------------------------------------------------------------===//

#include <CL/sycl.hpp>
#include <iostream>
#include <type_traits>

template <typename KernelName, std::size_t... Is>
void test_range_impl(sycl::queue q, std::index_sequence<Is...>,
sycl::range<sizeof...(Is)> *) {
constexpr auto dims = sizeof...(Is);

q.parallel_for<KernelName>(sycl::range<dims>{Is...}, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::item<dims>>::value,
"lambda arg type is unexpected");
});
}

template <typename KernelName, std::size_t... Is>
void test_range_impl(sycl::queue q, std::index_sequence<Is...>,
sycl::nd_range<sizeof...(Is)> *) {
constexpr auto dims = sizeof...(Is);

sycl::nd_range<dims> ndr{sycl::range<dims>{Is...}, sycl::range<dims>{Is...}};
q.parallel_for<KernelName>(ndr, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::nd_item<dims>>::value,
"lambda arg type is unexpected");
});
}

template <typename KernelName, template <int> class Range, std::size_t Dims>
void test_range(sycl::queue q) {
test_range_impl<KernelName>(q, std::make_index_sequence<Dims>{},
static_cast<Range<Dims> *>(nullptr));
}

void test_number_braced_init_list(sycl::queue q) {
constexpr auto n = 1;
q.parallel_for<class Number>(n, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
"lambda arg type is unexpected");
});

q.parallel_for<class BracedInitList1>({n}, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
"lambda arg type is unexpected");
});

q.parallel_for<class BracedInitList2>({n, n}, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::item<2>>::value,
"lambda arg type is unexpected");
});

q.parallel_for<class BracedInitList3>({n, n, n}, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::item<3>>::value,
"lambda arg type is unexpected");
});
}

int main() {
sycl::queue q{};

test_range<class test_range1, sycl::range, 1>(q);
test_range<class test_range2, sycl::range, 2>(q);
test_range<class test_range3, sycl::range, 3>(q);
test_range<class test_nd_range1, sycl::nd_range, 1>(q);
test_range<class test_nd_range2, sycl::nd_range, 2>(q);
test_range<class test_nd_range3, sycl::nd_range, 3>(q);

test_number_braced_init_list(q);
}

0 comments on commit 17299ee

Please sign in to comment.