Skip to content

Commit a37b3cf

Browse files
authored
[SYCL] Implement free function kernel enqueue functions (#20698)
Implement the new enqueue functions for free function kernels that were added in #19995
1 parent e8adc66 commit a37b3cf

File tree

3 files changed

+251
-0
lines changed

3 files changed

+251
-0
lines changed

sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <sycl/detail/common.hpp>
1414
#include <sycl/event.hpp>
1515
#include <sycl/ext/oneapi/experimental/enqueue_types.hpp>
16+
#include <sycl/ext/oneapi/experimental/free_function_traits.hpp>
1617
#include <sycl/ext/oneapi/experimental/graph.hpp>
1718
#include <sycl/ext/oneapi/properties/properties.hpp>
1819
#include <sycl/handler.hpp>
@@ -179,6 +180,21 @@ void single_task(queue Q, const kernel &KernelObj, ArgsT &&...Args) {
179180
});
180181
}
181182

183+
// Free function kernel single_task enqueue functions
184+
template <auto *Func, typename... ArgsT>
185+
void single_task(queue Q, [[maybe_unused]] kernel_function_s<Func> KernelFunc,
186+
ArgsT &&...Args) {
187+
detail::submit_kernel_direct_single_task(std::move(Q),
188+
[Args...]() { Func(Args...); });
189+
}
190+
191+
template <auto *Func, typename... ArgsT>
192+
void single_task(handler &CGH,
193+
[[maybe_unused]] kernel_function_s<Func> KernelFunc,
194+
ArgsT &&...Args) {
195+
CGH.single_task([Args...]() { Func(Args...); });
196+
}
197+
182198
// TODO: Make overloads for scalar arguments for range.
183199
template <typename KernelName = sycl::detail::auto_name, int Dimensions,
184200
typename KernelType, typename... ReductionsT>
@@ -357,6 +373,48 @@ void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
357373
});
358374
}
359375

376+
// Free function kernel nd_launch enqueue functions
377+
template <auto *Func, int Dimensions, typename... ArgsT>
378+
void nd_launch(queue Q, nd_range<Dimensions> Range,
379+
[[maybe_unused]] kernel_function_s<Func> KernelFunc,
380+
ArgsT &&...Args) {
381+
detail::submit_kernel_direct_parallel_for(
382+
std::move(Q), Range, [Args...](sycl::nd_item<>) { Func(Args...); });
383+
}
384+
385+
template <auto *Func, int Dimensions, typename... ArgsT>
386+
void nd_launch(handler &CGH, nd_range<Dimensions> Range,
387+
[[maybe_unused]] kernel_function_s<Func> KernelFunc,
388+
ArgsT &&...Args) {
389+
CGH.parallel_for(Range, [Args...](sycl::nd_item<>) { Func(Args...); });
390+
}
391+
392+
template <auto *Func, int Dimensions, typename Properties, typename... ArgsT>
393+
void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
394+
[[maybe_unused]] kernel_function_s<Func> KernelFunc,
395+
ArgsT &&...Args) {
396+
397+
ext::oneapi::experimental::detail::LaunchConfigAccess<nd_range<Dimensions>,
398+
Properties>
399+
ConfigAccess(Config);
400+
detail::submit_kernel_direct_parallel_for(
401+
std::move(Q), ConfigAccess.getRange(),
402+
[Args...](sycl::nd_item<>) { Func(Args...); }, {},
403+
ConfigAccess.getProperties());
404+
}
405+
406+
template <auto *Func, int Dimensions, typename Properties, typename... ArgsT>
407+
void nd_launch(handler &CGH,
408+
launch_config<nd_range<Dimensions>, Properties> Config,
409+
[[maybe_unused]] kernel_function_s<Func> KernelFunc,
410+
ArgsT &&...Args) {
411+
ext::oneapi::experimental::detail::LaunchConfigAccess<nd_range<Dimensions>,
412+
Properties>
413+
ConfigAccess(Config);
414+
CGH.parallel_for(ConfigAccess.getRange(), ConfigAccess.getProperties(),
415+
[Args...](sycl::nd_item<>) { Func(Args...); });
416+
}
417+
360418
inline void memcpy(handler &CGH, void *Dest, const void *Src, size_t NumBytes) {
361419
CGH.memcpy(Dest, Src, NumBytes);
362420
}

sycl/include/sycl/ext/oneapi/experimental/free_function_traits.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ template <typename T> struct is_struct_with_special_type {
6363
};
6464

6565
} // namespace detail
66+
67+
template <auto *Func> struct kernel_function_s {};
68+
69+
template <auto *Func> inline constexpr kernel_function_s<Func> kernel_function;
6670
} // namespace ext::oneapi::experimental
6771

6872
template <typename T> struct is_device_copyable;
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
// REQUIRES: aspect-usm_shared_allocations
2+
// UNSUPPORTED: target-amd
3+
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16072
4+
5+
// RUN: %{build} -o %t.out
6+
// RUN: %{run} %t.out
7+
8+
// XFAIL: target-native_cpu
9+
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/20142
10+
11+
// This test checks that free function kernels can be submitted using the
12+
// enqueued functions defined in the free function kernel extension, namely the
13+
// single_task and the nd_launch functions that take a queue/handler as an
14+
// argument. These were added in https://github.com/intel/llvm/pull/19995.
15+
16+
#include <cassert>
17+
#include <sycl/ext/oneapi/experimental/enqueue_functions.hpp>
18+
#include <sycl/ext/oneapi/experimental/free_function_traits.hpp>
19+
#include <sycl/ext/oneapi/free_function_queries.hpp>
20+
#include <sycl/ext/oneapi/work_group_static.hpp>
21+
#include <sycl/usm.hpp>
22+
23+
namespace syclext = sycl::ext::oneapi;
24+
namespace syclexp = sycl::ext::oneapi::experimental;
25+
26+
using accType =
27+
sycl::accessor<int, 1, sycl::access_mode::read_write, sycl::target::device,
28+
sycl::access::placeholder::true_t>;
29+
30+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel))
31+
void empty() {}
32+
33+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
34+
void initialize(int *ptr) {
35+
size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id();
36+
ptr[Lid] = Lid;
37+
}
38+
39+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel))
40+
void successor(int *src, int *dst) { *dst = *src + 1; }
41+
42+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
43+
void square(int *src, int *dst) {
44+
size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id();
45+
dst[Lid] = src[Lid] * src[Lid];
46+
}
47+
48+
template <typename T>
49+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
50+
void squareWithScratchMemoryTemplated(T *src, T *dst) {
51+
size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id();
52+
T *LocalMem = reinterpret_cast<T *>(syclexp::get_work_group_scratch_memory());
53+
LocalMem[Lid] = src[Lid] * src[Lid];
54+
dst[Lid] = LocalMem[Lid];
55+
}
56+
57+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
58+
void squareWithAccessor(accType src, accType dst) {
59+
size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id();
60+
dst[Lid] = src[Lid] * src[Lid];
61+
}
62+
63+
constexpr int SIZE = 16;
64+
65+
int main() {
66+
sycl::queue Q;
67+
int *Src = sycl::malloc_shared<int>(SIZE, Q);
68+
int *Dst = sycl::malloc_shared<int>(SIZE, Q);
69+
70+
syclexp::single_task(Q, syclexp::kernel_function_s<empty>{});
71+
72+
syclexp::nd_launch(
73+
Q, ::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)),
74+
syclexp::kernel_function<initialize>, Src);
75+
Q.wait();
76+
77+
syclexp::launch_config Config{
78+
::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)),
79+
syclexp::properties{
80+
syclexp::work_group_scratch_size(SIZE * sizeof(int))}};
81+
82+
static_assert(
83+
std::is_same_v<
84+
decltype(syclexp::nd_launch(
85+
Q, Config,
86+
syclexp::kernel_function<squareWithScratchMemoryTemplated<int>>,
87+
Src, Dst)),
88+
void>);
89+
90+
syclexp::nd_launch(
91+
Q, Config,
92+
syclexp::kernel_function<squareWithScratchMemoryTemplated<int>>, Src,
93+
Dst);
94+
Q.wait();
95+
96+
for (int I = 0; I < SIZE; I++) {
97+
assert(Dst[I] == Src[I] * Src[I]);
98+
}
99+
100+
syclexp::nd_launch(
101+
Q, ::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)),
102+
syclexp::kernel_function<square>, Src, Dst);
103+
Q.wait();
104+
105+
for (int I = 0; I < SIZE; I++) {
106+
assert(Dst[I] == Src[I] * Src[I]);
107+
}
108+
109+
static_assert(
110+
std::is_same_v<decltype(syclexp::single_task(
111+
Q, syclexp::kernel_function<successor>, Src, Dst)),
112+
void>);
113+
syclexp::single_task(Q, syclexp::kernel_function<successor>, Src, Dst);
114+
Q.wait();
115+
assert(Dst[0] == Src[0] + 1);
116+
117+
int SrcData[SIZE];
118+
int DstData[SIZE];
119+
for (int I = 0; I < SIZE; ++I) {
120+
SrcData[I] = I;
121+
}
122+
123+
{ // Test with accessors
124+
sycl::buffer<int> SrcBuf{&SrcData[0], SIZE};
125+
sycl::buffer<int> DstBuf{&DstData[0], SIZE};
126+
accType SrcAcc{SrcBuf};
127+
accType DstAcc{DstBuf};
128+
129+
Q.submit([&](sycl::handler &CGH) {
130+
CGH.require(SrcAcc);
131+
CGH.require(DstAcc);
132+
syclexp::nd_launch(CGH, Config,
133+
syclexp::kernel_function<squareWithAccessor>, SrcAcc,
134+
DstAcc);
135+
});
136+
}
137+
for (int I = 0; I < SIZE; ++I) {
138+
assert(DstData[I] == SrcData[I] * SrcData[I]);
139+
}
140+
141+
Q.submit([&](sycl::handler &CGH) {
142+
static_assert(
143+
std::is_same_v<decltype(syclexp::nd_launch(
144+
CGH, Config,
145+
syclexp::kernel_function<
146+
squareWithScratchMemoryTemplated<int>>,
147+
Src, Dst)),
148+
void>);
149+
syclexp::nd_launch(
150+
CGH, Config,
151+
syclexp::kernel_function<squareWithScratchMemoryTemplated<int>>, Src,
152+
Dst);
153+
}).wait();
154+
155+
for (int I = 0; I < SIZE; I++) {
156+
assert(Dst[I] == Src[I] * Src[I]);
157+
}
158+
159+
Q.submit([&](sycl::handler &CGH) {
160+
static_assert(
161+
std::is_same_v<decltype(syclexp::nd_launch(
162+
CGH,
163+
::sycl::nd_range<1>(::sycl::range<1>(SIZE),
164+
::sycl::range<1>(SIZE)),
165+
syclexp::kernel_function<square>, Src, Dst)),
166+
void>);
167+
168+
syclexp::nd_launch(
169+
CGH,
170+
::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)),
171+
syclexp::kernel_function<square>, Src, Dst);
172+
}).wait();
173+
174+
for (int I = 0; I < SIZE; I++) {
175+
assert(Dst[I] == Src[I] * Src[I]);
176+
}
177+
178+
Q.submit([&](sycl::handler &CGH) {
179+
static_assert(std::is_same_v<decltype(syclexp::single_task(
180+
CGH, syclexp::kernel_function<successor>,
181+
Src, Dst)),
182+
void>);
183+
syclexp::single_task(CGH, syclexp::kernel_function<successor>, Src, Dst);
184+
}).wait();
185+
186+
assert(Dst[0] == Src[0] + 1);
187+
188+
return 0;
189+
}

0 commit comments

Comments
 (0)