Skip to content

Commit 0caeeae

Browse files
alexeyvoronov-intelbader
authored andcommitted
[SYCL] Indexation alignment between the host and the no host devices.
Added offset logic for host parallel_for invokes. Fixed initialisation of the offset from item<dimensions, true> class for device parallel_for invoke where the item is used an indexer. Fixed parallel_for invoke with item<dimensions, false> which is used an indexer. Signed-off-by: Alexey Voronov <alexey.voronov@intel.com>
1 parent e46d919 commit 0caeeae

File tree

3 files changed

+187
-6
lines changed

3 files changed

+187
-6
lines changed

sycl/include/CL/sycl/detail/cg.hpp

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,7 @@ class HostKernel : public HostKernelBase {
198198

199199
template <class ArgT = KernelArgType>
200200
typename std::enable_if<
201-
(std::is_same<ArgT, item<Dims, /*Offset=*/false>>::value ||
202-
std::is_same<ArgT, item<Dims, /*Offset=*/true>>::value)>::type
201+
std::is_same<ArgT, item<Dims, /*Offset=*/false>>::value>::type
203202
runOnHost(const NDRDescT &NDRDesc) {
204203
size_t XYZ[3] = {0};
205204
sycl::id<Dims> ID;
@@ -224,10 +223,36 @@ class HostKernel : public HostKernelBase {
224223
}
225224

226225
template <class ArgT = KernelArgType>
227-
typename std::enable_if<std::is_same<ArgT, nd_item<Dims>>::value>::type
226+
typename std::enable_if<
227+
std::is_same<ArgT, item<Dims, /*Offset=*/true>>::value>::type
228228
runOnHost(const NDRDescT &NDRDesc) {
229-
// TODO add offset logic
229+
sycl::range<Dims> Range;
230+
sycl::id<Dims> Offset;
231+
for (int I = 0; I < Dims; ++I) {
232+
Range[I] = NDRDesc.GlobalSize[I];
233+
Offset[I] = NDRDesc.GlobalOffset[I];
234+
}
235+
size_t XYZ[3] = {0};
236+
sycl::id<Dims> ID;
237+
for (; XYZ[2] < NDRDesc.GlobalSize[2]; ++XYZ[2]) {
238+
XYZ[1] = 0;
239+
for (; XYZ[1] < NDRDesc.GlobalSize[1]; ++XYZ[1]) {
240+
XYZ[0] = 0;
241+
for (; XYZ[0] < NDRDesc.GlobalSize[0]; ++XYZ[0]) {
242+
for (int I = 0; I < Dims; ++I)
243+
ID[I] = XYZ[I] + Offset[I];
230244

245+
sycl::item<Dims, /*Offset=*/true> Item =
246+
IDBuilder::createItem<Dims, true>(Range, ID, Offset);
247+
MKernel(Item);
248+
}
249+
}
250+
}
251+
}
252+
253+
template <class ArgT = KernelArgType>
254+
typename std::enable_if<std::is_same<ArgT, nd_item<Dims>>::value>::type
255+
runOnHost(const NDRDescT &NDRDesc) {
231256
sycl::range<Dims> GroupSize;
232257
for (int I = 0; I < Dims; ++I) {
233258
GroupSize[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I];
@@ -249,7 +274,7 @@ class HostKernel : public HostKernelBase {
249274
GlobalSize, LocalSize, GroupSize, GroupID);
250275

251276
detail::NDLoop<Dims>::iterate(LocalSize, [&](const id<Dims> &LocalID) {
252-
id<Dims> GlobalID = GroupID * LocalSize + LocalID;
277+
id<Dims> GlobalID = GroupID * LocalSize + LocalID + GlobalOffset;
253278
const sycl::item<Dims, /*Offset=*/true> GlobalItem =
254279
IDBuilder::createItem<Dims, true>(GlobalSize, GlobalID,
255280
GlobalOffset);

sycl/include/CL/sycl/handler.hpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,7 @@ class handler {
578578
template <typename KernelName, typename KernelType, int dimensions>
579579
__attribute__((sycl_kernel)) void kernel_parallel_for(
580580
typename std::enable_if<std::is_same<detail::lambda_arg_type<KernelType>,
581-
item<dimensions>>::value &&
581+
item<dimensions, false>>::value &&
582582
(dimensions > 0 && dimensions < 4),
583583
KernelType>::type KernelFunc) {
584584
id<dimensions> global_id{
@@ -591,6 +591,24 @@ class handler {
591591
KernelFunc(Item);
592592
}
593593

594+
template <typename KernelName, typename KernelType, int dimensions>
595+
__attribute__((sycl_kernel)) void kernel_parallel_for(
596+
typename std::enable_if<std::is_same<detail::lambda_arg_type<KernelType>,
597+
item<dimensions, true>>::value &&
598+
(dimensions > 0 && dimensions < 4),
599+
KernelType>::type KernelFunc) {
600+
id<dimensions> global_id{
601+
__spirv::initGlobalInvocationId<dimensions, id<dimensions>>()};
602+
range<dimensions> global_size{
603+
__spirv::initGlobalSize<dimensions, range<dimensions>>()};
604+
id<dimensions> global_offset{
605+
__spirv::initGlobalOffset<dimensions, id<dimensions>>()};
606+
607+
item<dimensions, true> Item = detail::Builder::createItem<dimensions, true>(
608+
global_size, global_id, global_offset);
609+
KernelFunc(Item);
610+
}
611+
594612
template <typename KernelName, typename KernelType, int dimensions>
595613
__attribute__((sycl_kernel)) void kernel_parallel_for(
596614
typename std::enable_if<std::is_same<detail::lambda_arg_type<KernelType>,
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
// RUN: %clangxx %s -o %t1.out -lOpenCL -lsycl
2+
// RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
3+
// RUN: %clangxx -fsycl %s -o %t2.out -lOpenCL
4+
// RUN: env SYCL_DEVICE_TYPE=HOST %t2.out
5+
// RUN: %CPU_RUN_PLACEHOLDER %t2.out
6+
// RUN: %GPU_RUN_PLACEHOLDER %t2.out
7+
// RUN: %ACC_RUN_PLACEHOLDER %t2.out
8+
9+
#include <CL/sycl.hpp>
10+
11+
#include <cassert>
12+
#include <memory>
13+
14+
using namespace cl::sycl;
15+
16+
// TODO add cases with dimensions more than 1
17+
int main() {
18+
// Id indexer
19+
{
20+
vector_class<int> data(10, -1);
21+
const range<1> globalRange(6);
22+
{
23+
buffer<int, 1> b(data.data(), range<1>(10),
24+
{property::buffer::use_host_ptr()});
25+
queue myQueue;
26+
myQueue.submit([&](handler &cgh) {
27+
auto B = b.get_access<access::mode::read_write>(cgh);
28+
cgh.parallel_for<class id1>(globalRange,
29+
[=](id<1> index) { B[index] = index[0]; });
30+
});
31+
}
32+
for (int i = 0; i < data.size(); i++) {
33+
const int id = data[i];
34+
if (i < globalRange[0]) {
35+
assert(id == i);
36+
} else {
37+
assert(id == -1);
38+
}
39+
}
40+
}
41+
// Item indexer without offset
42+
{
43+
vector_class<int2> data(10, int2{-1});
44+
const range<1> globalRange(6);
45+
{
46+
buffer<int2, 1> b(data.data(), range<1>(10),
47+
{property::buffer::use_host_ptr()});
48+
queue myQueue;
49+
myQueue.submit([&](handler &cgh) {
50+
auto B = b.get_access<access::mode::read_write>(cgh);
51+
cgh.parallel_for<class item1_nooffset>(
52+
globalRange, [=](item<1, false> index) {
53+
B[index.get_id()] = int2{index.get_id()[0], index.get_range()[0]};
54+
});
55+
});
56+
}
57+
for (int i = 0; i < data.size(); i++) {
58+
const int id = data[i].s0();
59+
const int range = data[i].s1();
60+
if (i < globalRange[0]) {
61+
assert(id == i);
62+
assert(range == globalRange[0]);
63+
} else {
64+
assert(id == -1);
65+
assert(range == -1);
66+
}
67+
}
68+
}
69+
// Item indexer with offset
70+
{
71+
vector_class<int3> data(10, int3{-1});
72+
const range<1> globalRange(6);
73+
const id<1> globalOffset(4);
74+
{
75+
buffer<int3, 1> b(data.data(), range<1>(10),
76+
{property::buffer::use_host_ptr()});
77+
queue myQueue;
78+
myQueue.submit([&](handler &cgh) {
79+
auto B = b.get_access<access::mode::read_write>(cgh);
80+
cgh.parallel_for<class item1_offset>(
81+
globalRange, globalOffset, [=](item<1> index) {
82+
B[index.get_id()] = int3{index.get_id()[0], index.get_range()[0],
83+
index.get_offset()[0]};
84+
});
85+
});
86+
}
87+
for (int i = 0; i < data.size(); i++) {
88+
const int id = data[i].s0();
89+
const int range = data[i].s1();
90+
const int offset = data[i].s2();
91+
if (i < globalOffset[0]) {
92+
assert(id == -1);
93+
assert(range == -1);
94+
assert(offset == -1);
95+
} else {
96+
assert(id == i);
97+
assert(range == globalRange[0]);
98+
assert(offset == globalOffset[0]);
99+
}
100+
}
101+
}
102+
// ND_Item indexer
103+
{
104+
vector_class<int3> data(10, int3{-1});
105+
const range<1> globalRange(6);
106+
const range<1> localRange(3);
107+
const id<1> globalOffset(4);
108+
const nd_range<1> ndRange(globalRange, localRange, globalOffset);
109+
{
110+
buffer<int3, 1> b(data.data(), range<1>(10),
111+
{property::buffer::use_host_ptr()});
112+
queue myQueue;
113+
myQueue.submit([&](handler &cgh) {
114+
auto B = b.get_access<access::mode::read_write>(cgh);
115+
cgh.parallel_for<class item1_nd_range>(ndRange, [=](nd_item<1> index) {
116+
B[index.get_global_id()] =
117+
int3{index.get_global_id()[0], index.get_global_range()[0],
118+
index.get_offset()[0]};
119+
});
120+
});
121+
}
122+
for (int i = 0; i < data.size(); i++) {
123+
const int id = data[i].s0();
124+
const int range = data[i].s1();
125+
const int offset = data[i].s2();
126+
if (i < globalOffset[0]) {
127+
assert(id == -1);
128+
assert(range == -1);
129+
assert(offset == -1);
130+
} else {
131+
assert(id == i);
132+
assert(range == globalRange[0]);
133+
assert(offset == globalOffset[0]);
134+
}
135+
}
136+
}
137+
return 0;
138+
}

0 commit comments

Comments
 (0)