Skip to content

[SYCL] Implement braced-init-list or a number as range for queue::parallel_for #1931

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 62 additions & 9 deletions sycl/include/CL/sycl/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -432,10 +432,9 @@ class __SYCL_EXPORT queue {
/// \param NumWorkItems is a range that specifies the work space of the kernel
/// \param KernelFunc is the Kernel functor or lambda
/// \param CodeLoc contains the code location of user code
template <typename KernelName = detail::auto_name, typename KernelType,
int Dims>
template <typename KernelName = detail::auto_name, typename KernelType>
event parallel_for(
range<Dims> NumWorkItems, KernelType KernelFunc
range<1> NumWorkItems, KernelType KernelFunc
#ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA
,
const detail::code_location &CodeLoc = detail::code_location::current()
Expand All @@ -444,12 +443,47 @@ class __SYCL_EXPORT queue {
#ifdef DISABLE_SYCL_INSTRUMENTATION_METADATA
const detail::code_location &CodeLoc = {};
#endif
return submit(
[&](handler &CGH) {
CGH.template parallel_for<KernelName, KernelType>(NumWorkItems,
KernelFunc);
},
CodeLoc);
return parallel_for_impl<KernelName>(NumWorkItems, KernelFunc, CodeLoc);
}

/// parallel_for version with a kernel represented as a lambda + range that
/// specifies global size only.
///
/// \param NumWorkItems is a range that specifies the work space of the kernel
/// \param KernelFunc is the Kernel functor or lambda
/// \param CodeLoc contains the code location of user code
template <typename KernelName = detail::auto_name, typename KernelType>
event parallel_for(
range<2> NumWorkItems, KernelType KernelFunc
#ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA
,
const detail::code_location &CodeLoc = detail::code_location::current()
#endif
) {
#ifdef DISABLE_SYCL_INSTRUMENTATION_METADATA
const detail::code_location &CodeLoc = {};
#endif
return parallel_for_impl<KernelName>(NumWorkItems, KernelFunc, CodeLoc);
}

/// parallel_for version with a kernel represented as a lambda + range that
/// specifies global size only.
///
/// \param NumWorkItems is a range that specifies the work space of the kernel
/// \param KernelFunc is the Kernel functor or lambda
/// \param CodeLoc contains the code location of user code
template <typename KernelName = detail::auto_name, typename KernelType>
event parallel_for(
range<3> NumWorkItems, KernelType KernelFunc
#ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA
,
const detail::code_location &CodeLoc = detail::code_location::current()
#endif
) {
#ifdef DISABLE_SYCL_INSTRUMENTATION_METADATA
const detail::code_location &CodeLoc = {};
#endif
return parallel_for_impl<KernelName>(NumWorkItems, KernelFunc, CodeLoc);
}

/// parallel_for version with a kernel represented as a lambda + range that
Expand Down Expand Up @@ -716,6 +750,25 @@ class __SYCL_EXPORT queue {
/// A template-free version of submit.
event submit_impl(function_class<void(handler &)> CGH, queue secondQueue,
const detail::code_location &CodeLoc);

/// parallel_for_impl with a kernel represented as a lambda + range that
/// specifies global size only.
///
/// \param NumWorkItems is a range that specifies the work space of the kernel
/// \param KernelFunc is the Kernel functor or lambda
/// \param CodeLoc contains the code location of user code
template <typename KernelName = detail::auto_name, typename KernelType,
int Dims>
event parallel_for_impl(
range<Dims> NumWorkItems, KernelType KernelFunc,
const detail::code_location &CodeLoc = detail::code_location::current()) {
return submit(
[&](handler &CGH) {
CGH.template parallel_for<KernelName, KernelType>(NumWorkItems,
KernelFunc);
},
CodeLoc);
}
};

} // namespace sycl
Expand Down
74 changes: 74 additions & 0 deletions sycl/test/basic_tests/queue/queue_parallel_for_generic.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// XFAIL: cuda
// piextUSM*Alloc functions for CUDA are not behaving as described in
// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

//==- queue_parallel_for_generic.cpp - SYCL queue parallel_for generic lambda -=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===------------------------------------------------------------------------===//

#include <CL/sycl.hpp>
#include <iostream>
#include <type_traits>

int main() {
sycl::queue q{};
auto dev = q.get_device();
auto ctx = q.get_context();
constexpr int N = 8;

if (!dev.get_info<sycl::info::device::usm_shared_allocations>()) {
return 0;
}

auto A = static_cast<int *>(sycl::malloc_shared(N * sizeof(int), dev, ctx));

for (int i = 0; i < N; i++) {
A[i] = 1;
}

q.parallel_for<class Bar>(N, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
"lambda arg type is unexpected");
A[i]++;
});

q.parallel_for<class Foo>({N}, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
"lambda arg type is unexpected");
A[i]++;
});

sycl::id<1> offset(0);
q.parallel_for<class Baz>(sycl::range<1>{N}, offset, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
"lambda arg type is unexpected");
A[i]++;
});

sycl::nd_range<1> NDR(sycl::range<1>{N}, sycl::range<1>{2});
q.parallel_for<class NDFoo>(NDR, [=](auto nd_i) {
static_assert(std::is_same<decltype(nd_i), sycl::nd_item<1>>::value,
"lambda arg type is unexpected");
auto i = nd_i.get_global_id(0);
A[i]++;
});

q.wait();

for (int i = 0; i < N; i++) {
if (A[i] != 5)
return 1;
}
sycl::free(A, ctx);
}
78 changes: 78 additions & 0 deletions sycl/test/basic_tests/queue/queue_parallel_for_interface.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// RUN: %clangxx -fsycl -fsyntax-only %s -o %t.out

//==- queue_parallel_for_generic.cpp - SYCL queue parallel_for interface test -=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===------------------------------------------------------------------------===//

#include <CL/sycl.hpp>
#include <iostream>
#include <type_traits>

template <typename KernelName, std::size_t... Is>
void test_range_impl(sycl::queue q, std::index_sequence<Is...>,
sycl::range<sizeof...(Is)> *) {
constexpr auto dims = sizeof...(Is);

q.parallel_for<KernelName>(sycl::range<dims>{Is...}, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::item<dims>>::value,
"lambda arg type is unexpected");
});
}

template <typename KernelName, std::size_t... Is>
void test_range_impl(sycl::queue q, std::index_sequence<Is...>,
sycl::nd_range<sizeof...(Is)> *) {
constexpr auto dims = sizeof...(Is);

sycl::nd_range<dims> ndr{sycl::range<dims>{Is...}, sycl::range<dims>{Is...}};
q.parallel_for<KernelName>(ndr, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::nd_item<dims>>::value,
"lambda arg type is unexpected");
});
}

template <typename KernelName, template <int> class Range, std::size_t Dims>
void test_range(sycl::queue q) {
test_range_impl<KernelName>(q, std::make_index_sequence<Dims>{},
static_cast<Range<Dims> *>(nullptr));
}

void test_number_braced_init_list(sycl::queue q) {
constexpr auto n = 1;
q.parallel_for<class Number>(n, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
"lambda arg type is unexpected");
});

q.parallel_for<class BracedInitList1>({n}, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
"lambda arg type is unexpected");
});

q.parallel_for<class BracedInitList2>({n, n}, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::item<2>>::value,
"lambda arg type is unexpected");
});

q.parallel_for<class BracedInitList3>({n, n, n}, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::item<3>>::value,
"lambda arg type is unexpected");
});
}

int main() {
sycl::queue q{};

test_range<class test_range1, sycl::range, 1>(q);
test_range<class test_range2, sycl::range, 2>(q);
test_range<class test_range3, sycl::range, 3>(q);
test_range<class test_nd_range1, sycl::nd_range, 1>(q);
test_range<class test_nd_range2, sycl::nd_range, 2>(q);
test_range<class test_nd_range3, sycl::nd_range, 3>(q);

test_number_braced_init_list(q);
}