Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
b5ac52a
Implement free function kernel enqueue functions
lbushi25 Nov 20, 2025
63d860c
Remove unused code
lbushi25 Nov 20, 2025
00e0f0d
Improve comments
lbushi25 Nov 20, 2025
cd92d0c
Fix LIT command typo
lbushi25 Nov 20, 2025
4621ff6
Fix compilation error
lbushi25 Nov 20, 2025
76e0f8b
Fix unused argument error
lbushi25 Nov 20, 2025
ce2a16b
Fix unit-tests failures
lbushi25 Nov 20, 2025
e88b0f9
Fix formatting
lbushi25 Nov 20, 2025
b1b3ce9
Add XFAIL for native CPU
lbushi25 Nov 20, 2025
8b685ea
Add more tests
lbushi25 Nov 21, 2025
4cc1d15
Add a templated kernel test
lbushi25 Nov 21, 2025
4e7847d
Add a test to check definition of kernel_function_s
lbushi25 Nov 21, 2025
51e7aff
Merge branch 'intel:sycl' into enqueue_free_functions
lbushi25 Nov 24, 2025
4dc3224
Update free_function_kernels_enqueue.cpp
lbushi25 Nov 24, 2025
0b6a0ac
Apply requested changes
lbushi25 Dec 2, 2025
a7c592e
Merge branch 'enqueue_free_functions' of https://github.com/lbushi25/…
lbushi25 Dec 2, 2025
c65ffbc
Some more refactoring
lbushi25 Dec 2, 2025
5b7c7de
Apply feedback
lbushi25 Dec 10, 2025
356b55a
Remove dead code
lbushi25 Dec 10, 2025
9830ba4
Remove more dead code
lbushi25 Dec 10, 2025
34b0b17
Add more tests
lbushi25 Dec 11, 2025
d42cbdc
Merge branch 'sycl' into enqueue_free_functions
lbushi25 Dec 15, 2025
1acde56
Address feedback
lbushi25 Dec 15, 2025
07d5043
Merge branch 'enqueue_free_functions' of https://github.com/lbushi25/…
lbushi25 Dec 15, 2025
efd5cbf
Fix formatting
lbushi25 Dec 15, 2025
92671f1
Update free_function_kernels_enqueue.cpp
lbushi25 Dec 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <sycl/detail/common.hpp>
#include <sycl/event.hpp>
#include <sycl/ext/oneapi/experimental/enqueue_types.hpp>
#include <sycl/ext/oneapi/experimental/free_function_traits.hpp>
#include <sycl/ext/oneapi/experimental/graph.hpp>
#include <sycl/ext/oneapi/properties/properties.hpp>
#include <sycl/handler.hpp>
Expand Down Expand Up @@ -179,6 +180,21 @@ void single_task(queue Q, const kernel &KernelObj, ArgsT &&...Args) {
});
}

// Free function kernel single_task enqueue functions
template <auto *Func, typename... ArgsT>
void single_task(queue Q, [[maybe_unused]] kernel_function_s<Func> KernelFunc,
ArgsT &&...Args) {
detail::submit_kernel_direct_single_task(std::move(Q),
[Args...]() { Func(Args...); });
}

template <auto *Func, typename... ArgsT>
void single_task(handler &CGH,
[[maybe_unused]] kernel_function_s<Func> KernelFunc,
ArgsT &&...Args) {
CGH.single_task([Args...]() { Func(Args...); });
}

// TODO: Make overloads for scalar arguments for range.
template <typename KernelName = sycl::detail::auto_name, int Dimensions,
typename KernelType, typename... ReductionsT>
Expand Down Expand Up @@ -357,6 +373,48 @@ void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
});
}

// Free function kernel nd_launch enqueue functions
template <auto *Func, int Dimensions, typename... ArgsT>
void nd_launch(queue Q, nd_range<Dimensions> Range,
[[maybe_unused]] kernel_function_s<Func> KernelFunc,
ArgsT &&...Args) {
detail::submit_kernel_direct_parallel_for(
std::move(Q), Range, [Args...](sycl::nd_item<>) { Func(Args...); });
}

template <auto *Func, int Dimensions, typename... ArgsT>
void nd_launch(handler &CGH, nd_range<Dimensions> Range,
[[maybe_unused]] kernel_function_s<Func> KernelFunc,
ArgsT &&...Args) {
CGH.parallel_for(Range, [Args...](sycl::nd_item<>) { Func(Args...); });
}

template <auto *Func, int Dimensions, typename Properties, typename... ArgsT>
void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
[[maybe_unused]] kernel_function_s<Func> KernelFunc,
ArgsT &&...Args) {

ext::oneapi::experimental::detail::LaunchConfigAccess<nd_range<Dimensions>,
Properties>
ConfigAccess(Config);
detail::submit_kernel_direct_parallel_for(
std::move(Q), ConfigAccess.getRange(),
[Args...](sycl::nd_item<>) { Func(Args...); }, {},
ConfigAccess.getProperties());
}

template <auto *Func, int Dimensions, typename Properties, typename... ArgsT>
void nd_launch(handler &CGH,
launch_config<nd_range<Dimensions>, Properties> Config,
[[maybe_unused]] kernel_function_s<Func> KernelFunc,
ArgsT &&...Args) {
ext::oneapi::experimental::detail::LaunchConfigAccess<nd_range<Dimensions>,
Properties>
ConfigAccess(Config);
CGH.parallel_for(ConfigAccess.getRange(), ConfigAccess.getProperties(),
[Args...](sycl::nd_item<>) { Func(Args...); });
}

inline void memcpy(handler &CGH, void *Dest, const void *Src, size_t NumBytes) {
CGH.memcpy(Dest, Src, NumBytes);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ template <typename T> struct is_struct_with_special_type {
};

} // namespace detail

template <auto *Func> struct kernel_function_s {};

template <auto *Func> inline constexpr kernel_function_s<Func> kernel_function;
} // namespace ext::oneapi::experimental

template <typename T> struct is_device_copyable;
Expand Down
189 changes: 189 additions & 0 deletions sycl/test-e2e/FreeFunctionKernels/free_function_kernels_enqueue.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
// REQUIRES: aspect-usm_shared_allocations
// UNSUPPORTED: target-amd
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16072

// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

// XFAIL: target-native_cpu
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/20142

// This test checks that free function kernels can be submitted using the
// enqueued functions defined in the free function kernel extension, namely the
// single_task and the nd_launch functions that take a queue/handler as an
// argument. These were added in https://github.com/intel/llvm/pull/19995.

#include <cassert>
#include <sycl/ext/oneapi/experimental/enqueue_functions.hpp>
#include <sycl/ext/oneapi/experimental/free_function_traits.hpp>
#include <sycl/ext/oneapi/free_function_queries.hpp>
#include <sycl/ext/oneapi/work_group_static.hpp>
#include <sycl/usm.hpp>

namespace syclext = sycl::ext::oneapi;
namespace syclexp = sycl::ext::oneapi::experimental;

using accType =
sycl::accessor<int, 1, sycl::access_mode::read_write, sycl::target::device,
sycl::access::placeholder::true_t>;

SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel))
void empty() {}

SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
void initialize(int *ptr) {
size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id();
ptr[Lid] = Lid;
}

SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel))
void successor(int *src, int *dst) { *dst = *src + 1; }

SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
void square(int *src, int *dst) {
size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id();
dst[Lid] = src[Lid] * src[Lid];
}

template <typename T>
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
void squareWithScratchMemoryTemplated(T *src, T *dst) {
size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id();
T *LocalMem = reinterpret_cast<T *>(syclexp::get_work_group_scratch_memory());
LocalMem[Lid] = src[Lid] * src[Lid];
dst[Lid] = LocalMem[Lid];
}

SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
void squareWithAccessor(accType src, accType dst) {
size_t Lid = syclext::this_work_item::get_nd_item<1>().get_local_linear_id();
dst[Lid] = src[Lid] * src[Lid];
}

constexpr int SIZE = 16;

int main() {
sycl::queue Q;
int *Src = sycl::malloc_shared<int>(SIZE, Q);
int *Dst = sycl::malloc_shared<int>(SIZE, Q);

syclexp::single_task(Q, syclexp::kernel_function_s<empty>{});

syclexp::nd_launch(
Q, ::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)),
syclexp::kernel_function<initialize>, Src);
Q.wait();

syclexp::launch_config Config{
::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)),
syclexp::properties{
syclexp::work_group_scratch_size(SIZE * sizeof(int))}};

static_assert(
std::is_same_v<
decltype(syclexp::nd_launch(
Q, Config,
syclexp::kernel_function<squareWithScratchMemoryTemplated<int>>,
Src, Dst)),
void>);

syclexp::nd_launch(
Q, Config,
syclexp::kernel_function<squareWithScratchMemoryTemplated<int>>, Src,
Dst);
Q.wait();

for (int I = 0; I < SIZE; I++) {
assert(Dst[I] == Src[I] * Src[I]);
}

syclexp::nd_launch(
Q, ::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)),
syclexp::kernel_function<square>, Src, Dst);
Q.wait();

for (int I = 0; I < SIZE; I++) {
assert(Dst[I] == Src[I] * Src[I]);
}

static_assert(
std::is_same_v<decltype(syclexp::single_task(
Q, syclexp::kernel_function<successor>, Src, Dst)),
void>);
syclexp::single_task(Q, syclexp::kernel_function<successor>, Src, Dst);
Q.wait();
assert(Dst[0] == Src[0] + 1);

int SrcData[SIZE];
int DstData[SIZE];
for (int I = 0; I < SIZE; ++I) {
SrcData[I] = I;
}

{ // Test with accessors
sycl::buffer<int> SrcBuf{&SrcData[0], SIZE};
sycl::buffer<int> DstBuf{&DstData[0], SIZE};
accType SrcAcc{SrcBuf};
accType DstAcc{DstBuf};

Q.submit([&](sycl::handler &CGH) {
CGH.require(SrcAcc);
CGH.require(DstAcc);
syclexp::nd_launch(CGH, Config,
syclexp::kernel_function<squareWithAccessor>, SrcAcc,
DstAcc);
});
}
for (int I = 0; I < SIZE; ++I) {
assert(DstData[I] == SrcData[I] * SrcData[I]);
}

Q.submit([&](sycl::handler &CGH) {
static_assert(
std::is_same_v<decltype(syclexp::nd_launch(
CGH, Config,
syclexp::kernel_function<
squareWithScratchMemoryTemplated<int>>,
Src, Dst)),
void>);
syclexp::nd_launch(
CGH, Config,
syclexp::kernel_function<squareWithScratchMemoryTemplated<int>>, Src,
Dst);
}).wait();

for (int I = 0; I < SIZE; I++) {
assert(Dst[I] == Src[I] * Src[I]);
}

Q.submit([&](sycl::handler &CGH) {
static_assert(
std::is_same_v<decltype(syclexp::nd_launch(
CGH,
::sycl::nd_range<1>(::sycl::range<1>(SIZE),
::sycl::range<1>(SIZE)),
syclexp::kernel_function<square>, Src, Dst)),
void>);

syclexp::nd_launch(
CGH,
::sycl::nd_range<1>(::sycl::range<1>(SIZE), ::sycl::range<1>(SIZE)),
syclexp::kernel_function<square>, Src, Dst);
}).wait();

for (int I = 0; I < SIZE; I++) {
assert(Dst[I] == Src[I] * Src[I]);
}

Q.submit([&](sycl::handler &CGH) {
static_assert(std::is_same_v<decltype(syclexp::single_task(
CGH, syclexp::kernel_function<successor>,
Src, Dst)),
void>);
syclexp::single_task(CGH, syclexp::kernel_function<successor>, Src, Dst);
}).wait();

assert(Dst[0] == Src[0] + 1);

return 0;
}