Skip to content

Commit 9d4c284

Browse files
authored
[SYCL][ABI] Subgroup Extension update (#1600)
Implements the change as per the spec update at https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/SubGroup/SYCL_INTEL_sub_group.asciidoc and update from : #1565 ABI update: Functions corresponding to the deleted APIs have been deleted. Edit info.cpp test to expect max sub-group size based on the spec update. Changed pi.h document and changed the plugin to always return a 32 bit value. Signed-off-by: Garima Gupta <garima.gupta@intel.com>
1 parent 5ad2b58 commit 9d4c284

File tree

11 files changed

+87
-107
lines changed

11 files changed

+87
-107
lines changed

sycl/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ option(SYCL_ADD_DEV_VERSION_POSTFIX "Adds -V postfix to version string" ON)
1111
set(SYCL_MAJOR_VERSION 1)
1212
set(SYCL_MINOR_VERSION 0)
1313
set(SYCL_PATCH_VERSION 0)
14-
set(SYCL_DEV_ABI_VERSION 0)
14+
set(SYCL_DEV_ABI_VERSION 1)
1515
if (SYCL_ADD_DEV_VERSION_POSTFIX)
1616
set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}")
1717
endif()

sycl/include/CL/sycl/detail/pi.h

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
// TODO: we need a mapping of PI to OpenCL somewhere, and this can be done
4646
// elsewhere, e.g. in the pi_opencl, but constants/enums mapping is now
4747
// done here, for efficiency and simplicity.
48-
48+
#include <CL/cl_ext_intel.h>
4949
#include <CL/cl_usm_ext.h>
5050
#include <CL/sycl/detail/cl.h>
5151
#include <CL/sycl/detail/export.hpp>
@@ -309,6 +309,14 @@ typedef enum {
309309
PI_IMAGE_INFO_DEPTH = CL_IMAGE_DEPTH
310310
} _pi_image_info;
311311

312+
typedef enum {
313+
PI_KERNEL_MAX_SUB_GROUP_SIZE = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
314+
PI_KERNEL_MAX_NUM_SUB_GROUPS = CL_KERNEL_MAX_NUM_SUB_GROUPS,
315+
PI_KERNEL_COMPILE_NUM_SUB_GROUPS = CL_KERNEL_COMPILE_NUM_SUB_GROUPS,
316+
PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL =
317+
CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL
318+
} _pi_kernel_sub_group_info;
319+
312320
typedef enum {
313321
PI_EVENT_INFO_COMMAND_QUEUE = CL_EVENT_COMMAND_QUEUE,
314322
PI_EVENT_INFO_CONTEXT = CL_EVENT_CONTEXT,
@@ -503,6 +511,7 @@ using pi_queue_info = _pi_queue_info;
503511
using pi_image_info = _pi_image_info;
504512
using pi_kernel_info = _pi_kernel_info;
505513
using pi_kernel_group_info = _pi_kernel_group_info;
514+
using pi_kernel_sub_group_info = _pi_kernel_sub_group_info;
506515
using pi_event_info = _pi_event_info;
507516
using pi_command_type = _pi_command_type;
508517
using pi_mem_type = _pi_mem_type;
@@ -1049,9 +1058,26 @@ __SYCL_EXPORT pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
10491058
void *param_value,
10501059
size_t *param_value_size_ret);
10511060

1061+
/// API to query information from the sub-group from a kernel
1062+
///
1063+
/// \param kernel is the pi_kernel to query
1064+
/// \param device is the device the kernel is executed on
1065+
/// \param param_name is a pi_kernel_sub_group_info enum value that
1066+
/// specifies the informtation queried for.
1067+
/// \param input_value_size is the size of input value passed in
1068+
/// ptr input_value param
1069+
/// \param input_value is the ptr to the input value passed.
1070+
/// \param param_value_size is the size of the value in bytes.
1071+
/// \param param_value is a pointer to the value to set.
1072+
/// \param param_value_size_ret is a pointer to return the size of data in
1073+
/// param_value ptr.
1074+
///
1075+
/// All queries expect a return of 4 bytes in param_value_size,
1076+
/// param_value_size_ret, and a uint32_t value should to be written in
1077+
/// param_value ptr.
1078+
/// Note: This behaviour differs from OpenCL. OpenCL returns size_t.
10521079
__SYCL_EXPORT pi_result piKernelGetSubGroupInfo(
1053-
pi_kernel kernel, pi_device device,
1054-
cl_kernel_sub_group_info param_name, // TODO: untie from OpenCL
1080+
pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name,
10551081
size_t input_value_size, const void *input_value, size_t param_value_size,
10561082
void *param_value, size_t *param_value_size_ret);
10571083

sycl/include/CL/sycl/info/info_desc.hpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -206,9 +206,7 @@ enum class kernel_work_group : cl_kernel_work_group_info {
206206
};
207207

208208
enum class kernel_sub_group : cl_kernel_sub_group_info {
209-
max_sub_group_size_for_ndrange = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
210-
sub_group_count_for_ndrange = CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
211-
local_size_for_sub_group_count = CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
209+
max_sub_group_size = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
212210
max_num_sub_groups = CL_KERNEL_MAX_NUM_SUB_GROUPS,
213211
compile_num_sub_groups = CL_KERNEL_COMPILE_NUM_SUB_GROUPS,
214212
compile_sub_group_size = CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL
Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,5 @@
1-
PARAM_TRAITS_SPEC_WITH_INPUT(kernel_sub_group, max_sub_group_size_for_ndrange,
2-
size_t, cl::sycl::range<3>)
3-
PARAM_TRAITS_SPEC_WITH_INPUT(kernel_sub_group, sub_group_count_for_ndrange,
4-
size_t, cl::sycl::range<3>)
5-
PARAM_TRAITS_SPEC_WITH_INPUT(kernel_sub_group, local_size_for_sub_group_count,
6-
cl::sycl::range<3>, size_t)
7-
PARAM_TRAITS_SPEC(kernel_sub_group, max_num_sub_groups, size_t)
8-
PARAM_TRAITS_SPEC(kernel_sub_group, compile_num_sub_groups, size_t)
9-
PARAM_TRAITS_SPEC(kernel_sub_group, compile_sub_group_size, size_t)
10-
1+
PARAM_TRAITS_SPEC_WITH_INPUT(kernel_sub_group, max_sub_group_size,
2+
uint32_t, cl::sycl::range<3>)
3+
PARAM_TRAITS_SPEC(kernel_sub_group, max_num_sub_groups, uint32_t)
4+
PARAM_TRAITS_SPEC(kernel_sub_group, compile_num_sub_groups, uint32_t)
5+
PARAM_TRAITS_SPEC(kernel_sub_group, compile_sub_group_size, uint32_t)

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,28 @@ pi_result piKernelCreate(pi_program program, const char *kernel_name,
598598
return ret_err;
599599
}
600600

601+
pi_result piKernelGetSubGroupInfo(pi_kernel kernel, pi_device device,
602+
pi_kernel_sub_group_info param_name,
603+
size_t input_value_size,
604+
const void *input_value,
605+
size_t param_value_size, void *param_value,
606+
size_t *param_value_size_ret) {
607+
size_t ret_val;
608+
cl_int ret_err;
609+
ret_err = cast<pi_result>(clGetKernelSubGroupInfo(
610+
cast<cl_kernel>(kernel), cast<cl_device_id>(device),
611+
cast<cl_kernel_sub_group_info>(param_name), input_value_size, input_value,
612+
sizeof(size_t), &ret_val, param_value_size_ret));
613+
614+
if (ret_err != CL_SUCCESS)
615+
return cast<pi_result>(ret_err);
616+
617+
*(static_cast<uint32_t *>(param_value)) = static_cast<uint32_t>(ret_val);
618+
if (param_value_size_ret)
619+
*param_value_size_ret = sizeof(uint32_t);
620+
return PI_SUCCESS;
621+
}
622+
601623
pi_result piEventCreate(pi_context context, pi_event *ret_event) {
602624

603625
pi_result ret_err = PI_INVALID_OPERATION;
@@ -1145,7 +1167,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
11451167
_PI_CL(piKernelSetArg, clSetKernelArg)
11461168
_PI_CL(piKernelGetInfo, clGetKernelInfo)
11471169
_PI_CL(piKernelGetGroupInfo, clGetKernelWorkGroupInfo)
1148-
_PI_CL(piKernelGetSubGroupInfo, clGetKernelSubGroupInfo)
1170+
_PI_CL(piKernelGetSubGroupInfo, piKernelGetSubGroupInfo)
11491171
_PI_CL(piKernelRetain, clRetainKernel)
11501172
_PI_CL(piKernelRelease, clReleaseKernel)
11511173
_PI_CL(piKernelSetExecInfo, piKernelSetExecInfo)

sycl/source/detail/kernel_impl.cpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,9 @@ kernel_impl::get_sub_group_info(const device &Device) const {
9292
throw runtime_error("Sub-group feature is not supported on HOST device.",
9393
PI_INVALID_DEVICE);
9494
}
95-
return get_kernel_sub_group_info<
96-
typename info::param_traits<info::kernel_sub_group, param>::return_type,
97-
param>::get(this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(),
98-
getPlugin());
95+
return get_kernel_sub_group_info<param>::get(
96+
this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(),
97+
getPlugin());
9998
}
10099

101100
template <info::kernel_sub_group param>
@@ -108,12 +107,9 @@ kernel_impl::get_sub_group_info(
108107
throw runtime_error("Sub-group feature is not supported on HOST device.",
109108
PI_INVALID_DEVICE);
110109
}
111-
return get_kernel_sub_group_info_with_input<
112-
typename info::param_traits<info::kernel_sub_group, param>::return_type,
113-
param,
114-
typename info::param_traits<info::kernel_sub_group, param>::input_type>::
115-
get(this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), Value,
116-
getPlugin());
110+
return get_kernel_sub_group_info_with_input<param>::get(
111+
this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), Value,
112+
getPlugin());
117113
}
118114

119115
#define PARAM_TRAITS_SPEC(param_type, param, ret_type) \

sycl/source/detail/kernel_info.hpp

Lines changed: 13 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -120,55 +120,30 @@ get_kernel_work_group_info_host<info::kernel_work_group::private_mem_size>(
120120
}
121121
// The kernel sub-group methods
122122

123-
template <typename TOut, info::kernel_sub_group Param>
124-
struct get_kernel_sub_group_info {
125-
static TOut get(RT::PiKernel Kernel, RT::PiDevice Device,
126-
const plugin &Plugin) {
127-
TOut Result;
123+
template <info::kernel_sub_group Param> struct get_kernel_sub_group_info {
124+
static uint32_t get(RT::PiKernel Kernel, RT::PiDevice Device,
125+
const plugin &Plugin) {
126+
uint32_t Result;
128127
// TODO catch an exception and put it to list of asynchronous exceptions
129128
Plugin.call<PiApiKind::piKernelGetSubGroupInfo>(
130-
Kernel, Device, cl_kernel_sub_group_info(Param), 0, nullptr,
131-
sizeof(TOut), &Result, nullptr);
132-
return Result;
133-
}
134-
};
129+
Kernel, Device, pi_kernel_sub_group_info(Param), 0, nullptr,
130+
sizeof(uint32_t), &Result, nullptr);
135131

136-
template <typename TOut, info::kernel_sub_group Param, typename TIn>
137-
struct get_kernel_sub_group_info_with_input {
138-
static TOut get(RT::PiKernel Kernel, RT::PiDevice Device, TIn In,
139-
const plugin &Plugin) {
140-
TOut Result;
141-
// TODO catch an exception and put it to list of asynchronous exceptions
142-
Plugin.call<PiApiKind::piKernelGetSubGroupInfo>(
143-
Kernel, Device, cl_kernel_sub_group_info(Param), sizeof(TIn), &In,
144-
sizeof(TOut), &Result, nullptr);
145132
return Result;
146133
}
147134
};
148135

149136
template <info::kernel_sub_group Param>
150-
struct get_kernel_sub_group_info_with_input<cl::sycl::range<3>, Param, size_t> {
151-
static cl::sycl::range<3> get(RT::PiKernel Kernel, RT::PiDevice Device,
152-
size_t In, const plugin &Plugin) {
153-
size_t Result[3];
154-
// TODO catch an exception and put it to list of asynchronous exceptions
155-
Plugin.call<PiApiKind::piKernelGetSubGroupInfo>(
156-
Kernel, Device, cl_kernel_sub_group_info(Param), sizeof(size_t), &In,
157-
sizeof(size_t) * 3, Result, nullptr);
158-
return cl::sycl::range<3>(Result[0], Result[1], Result[2]);
159-
}
160-
};
161-
162-
template <info::kernel_sub_group Param>
163-
struct get_kernel_sub_group_info_with_input<size_t, Param, cl::sycl::range<3>> {
164-
static size_t get(RT::PiKernel Kernel, RT::PiDevice Device,
165-
cl::sycl::range<3> In, const plugin &Plugin) {
137+
struct get_kernel_sub_group_info_with_input {
138+
static uint32_t get(RT::PiKernel Kernel, RT::PiDevice Device,
139+
cl::sycl::range<3> In, const plugin &Plugin) {
166140
size_t Input[3] = {In[0], In[1], In[2]};
167-
size_t Result;
141+
uint32_t Result;
168142
// TODO catch an exception and put it to list of asynchronous exceptions
169143
Plugin.call<PiApiKind::piKernelGetSubGroupInfo>(
170-
Kernel, Device, cl_kernel_sub_group_info(Param), sizeof(size_t) * 3,
171-
Input, sizeof(size_t), &Result, nullptr);
144+
Kernel, Device, pi_kernel_sub_group_info(Param), sizeof(size_t) * 3,
145+
Input, sizeof(uint32_t), &Result, nullptr);
146+
172147
return Result;
173148
}
174149
};

sycl/test/abi/pi_opencl_symbol_check.dump

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ piDevicesGet
66
piEnqueueMemBufferMap
77
piEventCreate
88
piKernelCreate
9+
piKernelGetSubGroupInfo
910
piKernelSetExecInfo
1011
piMemBufferCreate
1112
piMemBufferPartition
@@ -42,4 +43,4 @@ piextUSMEnqueuePrefetch
4243
piextUSMFree
4344
piextUSMGetMemAllocInfo
4445
piextUSMHostAlloc
45-
piextUSMSharedAlloc
46+
piextUSMSharedAlloc

sycl/test/abi/sycl_symbols_linux.dump

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3467,11 +3467,9 @@ _ZNK2cl4sycl6device9getNativeEv
34673467
_ZNK2cl4sycl6kernel11get_contextEv
34683468
_ZNK2cl4sycl6kernel11get_programEv
34693469
_ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE16650EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE
3470-
_ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE4536EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceENS6_10input_typeE
34713470
_ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE4537EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE
34723471
_ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE4538EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE
34733472
_ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE8243EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceENS6_10input_typeE
3474-
_ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE8244EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceENS6_10input_typeE
34753473
_ZNK2cl4sycl6kernel19get_work_group_infoILNS0_4info17kernel_work_groupE4528EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE
34763474
_ZNK2cl4sycl6kernel19get_work_group_infoILNS0_4info17kernel_work_groupE4529EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE
34773475
_ZNK2cl4sycl6kernel19get_work_group_infoILNS0_4info17kernel_work_groupE4531EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE

sycl/test/sub_group/common_ocl.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,6 @@ void check(queue &Queue, const int G, const int L, const char *SpvFile) {
6060
cgh.set_args(oclacc);
6161
cgh.parallel_for(NdRange, Prog.get_kernel("ocl_subgr"));
6262
});
63-
size_t NumSG = Prog.get_kernel("ocl_subgr")
64-
.get_sub_group_info<
65-
info::kernel_sub_group::sub_group_count_for_ndrange>(
66-
Queue.get_device(), range<3>(G, 1, 1));
6763
auto oclacc = oclbuf.get_access<access::mode::read_write>();
6864

6965
Queue.submit([&](handler &cgh) {

sycl/test/sub_group/info.cpp

Lines changed: 7 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -55,25 +55,21 @@ int main() {
5555
"kernel_sg(global double* a, global double* b, "
5656
"global double* c) {*a=*b+*c; }\n");
5757
kernel Kernel = Prog.get_kernel("kernel_sg");
58-
size_t Res = 0;
58+
uint32_t Res = 0;
5959
for (auto r : {range<3>(3, 4, 5), range<3>(1, 1, 1), range<3>(4, 2, 1),
6060
range<3>(32, 3, 4), range<3>(7, 9, 11)}) {
6161
Res = Kernel.get_sub_group_info<
62-
info::kernel_sub_group::max_sub_group_size_for_ndrange>(Device, r);
63-
exit_if_not_equal(Res, min(r.size(), max_sg_num),
64-
"max_sub_group_size_for_ndrange");
65-
Res = Kernel.get_sub_group_info<
66-
info::kernel_sub_group::sub_group_count_for_ndrange>(Device, r);
67-
exit_if_not_equal<size_t>(
68-
Res, r.size() / max_sg_num + (r.size() % max_sg_num ? 1 : 0),
69-
"sub_group_count_for_ndrange");
62+
info::kernel_sub_group::max_sub_group_size>(Device, r);
63+
bool Expected = (Res == r.size() || Res == max_sg_num);
64+
exit_if_not_equal<bool>(Expected, true,
65+
"max_sub_group_size");
7066
}
7167

7268
Res = Kernel.get_sub_group_info<
7369
info::kernel_sub_group::compile_num_sub_groups>(Device);
7470

7571
/* Sub-group size is not specified in kernel or IL*/
76-
exit_if_not_equal<size_t>(Res, 0, "compile_num_sub_groups");
72+
exit_if_not_equal<uint32_t>(Res, 0, "compile_num_sub_groups");
7773

7874
// According to specification, this kernel query requires `cl_khr_subgroups`
7975
// or `cl_intel_subgroups`
@@ -84,32 +80,9 @@ int main() {
8480
info::kernel_sub_group::compile_sub_group_size>(Device);
8581

8682
/* Required sub-group size is not specified in kernel or IL*/
87-
exit_if_not_equal<size_t>(Res, 0, "compile_sub_group_size");
83+
exit_if_not_equal<uint32_t>(Res, 0, "compile_sub_group_size");
8884
}
8985

90-
/* Check work-group sizea which can accommodate the requested number of
91-
* sub-groups*/
92-
for (auto s : {(size_t)200, (size_t)1, (size_t)3, (size_t)5, (size_t)7,
93-
(size_t)13, max_sg_num, max_sg_num + 1}) {
94-
range<3> ResRange = Kernel.get_sub_group_info<
95-
info::kernel_sub_group::local_size_for_sub_group_count>(Device, s);
96-
if (s * max_sg_num <= max_wg_size) {
97-
exit_if_not_equal<size_t>(ResRange[0], s * max_sg_num,
98-
"local_size_for_sub_group_count[0]");
99-
exit_if_not_equal<size_t>(ResRange[1], 1,
100-
"local_size_for_sub_group_count[1]");
101-
exit_if_not_equal<size_t>(ResRange[2], 1,
102-
"local_size_for_sub_group_count[2]");
103-
104-
} else {
105-
exit_if_not_equal<size_t>(ResRange[0], 0,
106-
"local_size_for_sub_group_count[0]");
107-
exit_if_not_equal<size_t>(ResRange[1], 0,
108-
"local_size_for_sub_group_count[1]");
109-
exit_if_not_equal<size_t>(ResRange[2], 0,
110-
"local_size_for_sub_group_count[2]");
111-
}
112-
}
11386
} catch (exception e) {
11487
std::cout << "SYCL exception caught: " << e.what();
11588
return 1;

0 commit comments

Comments
 (0)