Skip to content

[SYCL][ABI] Subgroup Extension spec update #1600

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sycl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ option(SYCL_ADD_DEV_VERSION_POSTFIX "Adds -V postfix to version string" ON)
set(SYCL_MAJOR_VERSION 1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@garimagu the major version is updated once per release. You only need to update DEV_ABI version.

Suggested change
set(SYCL_MAJOR_VERSION 1)
set(SYCL_MAJOR_VERSION 0)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

uploaded the change. please review and approve. smaslov-intel has approved the changes other than the ABI version update.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should (semi-)automate SYCL version update.
It's hard to manage multiple commits requiring version update.

@alexbatashev, could you create a post-commit job, which will create a PR with the version update if it's required, please?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bader I'll take a look into it

set(SYCL_MINOR_VERSION 0)
set(SYCL_PATCH_VERSION 0)
set(SYCL_DEV_ABI_VERSION 0)
set(SYCL_DEV_ABI_VERSION 1)
if (SYCL_ADD_DEV_VERSION_POSTFIX)
set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}")
endif()
Expand Down
32 changes: 29 additions & 3 deletions sycl/include/CL/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
// TODO: we need a mapping of PI to OpenCL somewhere, and this can be done
// elsewhere, e.g. in the pi_opencl, but constants/enums mapping is now
// done here, for efficiency and simplicity.

#include <CL/cl_ext_intel.h>
#include <CL/cl_usm_ext.h>
#include <CL/sycl/detail/cl.h>
#include <CL/sycl/detail/export.hpp>
Expand Down Expand Up @@ -309,6 +309,14 @@ typedef enum {
PI_IMAGE_INFO_DEPTH = CL_IMAGE_DEPTH
} _pi_image_info;

typedef enum {
PI_KERNEL_MAX_SUB_GROUP_SIZE = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
PI_KERNEL_MAX_NUM_SUB_GROUPS = CL_KERNEL_MAX_NUM_SUB_GROUPS,
PI_KERNEL_COMPILE_NUM_SUB_GROUPS = CL_KERNEL_COMPILE_NUM_SUB_GROUPS,
PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL =
CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL
} _pi_kernel_sub_group_info;

typedef enum {
PI_EVENT_INFO_COMMAND_QUEUE = CL_EVENT_COMMAND_QUEUE,
PI_EVENT_INFO_CONTEXT = CL_EVENT_CONTEXT,
Expand Down Expand Up @@ -503,6 +511,7 @@ using pi_queue_info = _pi_queue_info;
using pi_image_info = _pi_image_info;
using pi_kernel_info = _pi_kernel_info;
using pi_kernel_group_info = _pi_kernel_group_info;
using pi_kernel_sub_group_info = _pi_kernel_sub_group_info;
using pi_event_info = _pi_event_info;
using pi_command_type = _pi_command_type;
using pi_mem_type = _pi_mem_type;
Expand Down Expand Up @@ -1049,9 +1058,26 @@ __SYCL_EXPORT pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
void *param_value,
size_t *param_value_size_ret);

/// API to query information from the sub-group from a kernel
///
/// \param kernel is the pi_kernel to query
/// \param device is the device the kernel is executed on
/// \param param_name is a pi_kernel_sub_group_info enum value that
/// specifies the informtation queried for.
/// \param input_value_size is the size of input value passed in
/// ptr input_value param
/// \param input_value is the ptr to the input value passed.
/// \param param_value_size is the size of the value in bytes.
/// \param param_value is a pointer to the value to set.
/// \param param_value_size_ret is a pointer to return the size of data in
/// param_value ptr.
///
/// All queries expect a return of 4 bytes in param_value_size,
/// param_value_size_ret, and a uint32_t value should to be written in
/// param_value ptr.
/// Note: This behaviour differs from OpenCL. OpenCL returns size_t.
__SYCL_EXPORT pi_result piKernelGetSubGroupInfo(
pi_kernel kernel, pi_device device,
cl_kernel_sub_group_info param_name, // TODO: untie from OpenCL
pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name,
size_t input_value_size, const void *input_value, size_t param_value_size,
void *param_value, size_t *param_value_size_ret);

Expand Down
4 changes: 1 addition & 3 deletions sycl/include/CL/sycl/info/info_desc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,7 @@ enum class kernel_work_group : cl_kernel_work_group_info {
};

enum class kernel_sub_group : cl_kernel_sub_group_info {
max_sub_group_size_for_ndrange = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
sub_group_count_for_ndrange = CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
local_size_for_sub_group_count = CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
max_sub_group_size = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
max_num_sub_groups = CL_KERNEL_MAX_NUM_SUB_GROUPS,
compile_num_sub_groups = CL_KERNEL_COMPILE_NUM_SUB_GROUPS,
compile_sub_group_size = CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL
Expand Down
15 changes: 5 additions & 10 deletions sycl/include/CL/sycl/info/kernel_sub_group_traits.def
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
PARAM_TRAITS_SPEC_WITH_INPUT(kernel_sub_group, max_sub_group_size_for_ndrange,
size_t, cl::sycl::range<3>)
PARAM_TRAITS_SPEC_WITH_INPUT(kernel_sub_group, sub_group_count_for_ndrange,
size_t, cl::sycl::range<3>)
PARAM_TRAITS_SPEC_WITH_INPUT(kernel_sub_group, local_size_for_sub_group_count,
cl::sycl::range<3>, size_t)
PARAM_TRAITS_SPEC(kernel_sub_group, max_num_sub_groups, size_t)
PARAM_TRAITS_SPEC(kernel_sub_group, compile_num_sub_groups, size_t)
PARAM_TRAITS_SPEC(kernel_sub_group, compile_sub_group_size, size_t)

PARAM_TRAITS_SPEC_WITH_INPUT(kernel_sub_group, max_sub_group_size,
uint32_t, cl::sycl::range<3>)
PARAM_TRAITS_SPEC(kernel_sub_group, max_num_sub_groups, uint32_t)
PARAM_TRAITS_SPEC(kernel_sub_group, compile_num_sub_groups, uint32_t)
PARAM_TRAITS_SPEC(kernel_sub_group, compile_sub_group_size, uint32_t)
24 changes: 23 additions & 1 deletion sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,28 @@ pi_result piKernelCreate(pi_program program, const char *kernel_name,
return ret_err;
}

pi_result piKernelGetSubGroupInfo(pi_kernel kernel, pi_device device,
pi_kernel_sub_group_info param_name,
size_t input_value_size,
const void *input_value,
size_t param_value_size, void *param_value,
size_t *param_value_size_ret) {
size_t ret_val;
cl_int ret_err;
ret_err = cast<pi_result>(clGetKernelSubGroupInfo(
cast<cl_kernel>(kernel), cast<cl_device_id>(device),
cast<cl_kernel_sub_group_info>(param_name), input_value_size, input_value,
sizeof(size_t), &ret_val, param_value_size_ret));

if (ret_err != CL_SUCCESS)
return cast<pi_result>(ret_err);

*(static_cast<uint32_t *>(param_value)) = static_cast<uint32_t>(ret_val);
if (param_value_size_ret)
*param_value_size_ret = sizeof(uint32_t);
return PI_SUCCESS;
}

pi_result piEventCreate(pi_context context, pi_event *ret_event) {

pi_result ret_err = PI_INVALID_OPERATION;
Expand Down Expand Up @@ -1145,7 +1167,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
_PI_CL(piKernelSetArg, clSetKernelArg)
_PI_CL(piKernelGetInfo, clGetKernelInfo)
_PI_CL(piKernelGetGroupInfo, clGetKernelWorkGroupInfo)
_PI_CL(piKernelGetSubGroupInfo, clGetKernelSubGroupInfo)
_PI_CL(piKernelGetSubGroupInfo, piKernelGetSubGroupInfo)
_PI_CL(piKernelRetain, clRetainKernel)
_PI_CL(piKernelRelease, clReleaseKernel)
_PI_CL(piKernelSetExecInfo, piKernelSetExecInfo)
Expand Down
16 changes: 6 additions & 10 deletions sycl/source/detail/kernel_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,9 @@ kernel_impl::get_sub_group_info(const device &Device) const {
throw runtime_error("Sub-group feature is not supported on HOST device.",
PI_INVALID_DEVICE);
}
return get_kernel_sub_group_info<
typename info::param_traits<info::kernel_sub_group, param>::return_type,
param>::get(this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(),
getPlugin());
return get_kernel_sub_group_info<param>::get(
this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(),
getPlugin());
}

template <info::kernel_sub_group param>
Expand All @@ -108,12 +107,9 @@ kernel_impl::get_sub_group_info(
throw runtime_error("Sub-group feature is not supported on HOST device.",
PI_INVALID_DEVICE);
}
return get_kernel_sub_group_info_with_input<
typename info::param_traits<info::kernel_sub_group, param>::return_type,
param,
typename info::param_traits<info::kernel_sub_group, param>::input_type>::
get(this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), Value,
getPlugin());
return get_kernel_sub_group_info_with_input<param>::get(
this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), Value,
getPlugin());
}

#define PARAM_TRAITS_SPEC(param_type, param, ret_type) \
Expand Down
51 changes: 13 additions & 38 deletions sycl/source/detail/kernel_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,55 +120,30 @@ get_kernel_work_group_info_host<info::kernel_work_group::private_mem_size>(
}
// The kernel sub-group methods

template <typename TOut, info::kernel_sub_group Param>
struct get_kernel_sub_group_info {
static TOut get(RT::PiKernel Kernel, RT::PiDevice Device,
const plugin &Plugin) {
TOut Result;
template <info::kernel_sub_group Param> struct get_kernel_sub_group_info {
static uint32_t get(RT::PiKernel Kernel, RT::PiDevice Device,
const plugin &Plugin) {
uint32_t Result;
// TODO catch an exception and put it to list of asynchronous exceptions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// TODO catch an exception and put it to list of asynchronous exceptions

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this comment be removed?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK, this comment is not valid anymore. Most of the errors now reported through synchronous exceptions.
Although, I'm not the owner of the runtime and let @intel/llvm-reviewers-runtime to decide.

Plugin.call<PiApiKind::piKernelGetSubGroupInfo>(
Kernel, Device, cl_kernel_sub_group_info(Param), 0, nullptr,
sizeof(TOut), &Result, nullptr);
return Result;
}
};
Kernel, Device, pi_kernel_sub_group_info(Param), 0, nullptr,
sizeof(uint32_t), &Result, nullptr);

template <typename TOut, info::kernel_sub_group Param, typename TIn>
struct get_kernel_sub_group_info_with_input {
static TOut get(RT::PiKernel Kernel, RT::PiDevice Device, TIn In,
const plugin &Plugin) {
TOut Result;
// TODO catch an exception and put it to list of asynchronous exceptions
Plugin.call<PiApiKind::piKernelGetSubGroupInfo>(
Kernel, Device, cl_kernel_sub_group_info(Param), sizeof(TIn), &In,
sizeof(TOut), &Result, nullptr);
return Result;
}
};

template <info::kernel_sub_group Param>
struct get_kernel_sub_group_info_with_input<cl::sycl::range<3>, Param, size_t> {
static cl::sycl::range<3> get(RT::PiKernel Kernel, RT::PiDevice Device,
size_t In, const plugin &Plugin) {
size_t Result[3];
// TODO catch an exception and put it to list of asynchronous exceptions
Plugin.call<PiApiKind::piKernelGetSubGroupInfo>(
Kernel, Device, cl_kernel_sub_group_info(Param), sizeof(size_t), &In,
sizeof(size_t) * 3, Result, nullptr);
return cl::sycl::range<3>(Result[0], Result[1], Result[2]);
}
};

template <info::kernel_sub_group Param>
struct get_kernel_sub_group_info_with_input<size_t, Param, cl::sycl::range<3>> {
static size_t get(RT::PiKernel Kernel, RT::PiDevice Device,
cl::sycl::range<3> In, const plugin &Plugin) {
struct get_kernel_sub_group_info_with_input {
static uint32_t get(RT::PiKernel Kernel, RT::PiDevice Device,
cl::sycl::range<3> In, const plugin &Plugin) {
size_t Input[3] = {In[0], In[1], In[2]};
size_t Result;
uint32_t Result;
// TODO catch an exception and put it to list of asynchronous exceptions
Plugin.call<PiApiKind::piKernelGetSubGroupInfo>(
Kernel, Device, cl_kernel_sub_group_info(Param), sizeof(size_t) * 3,
Input, sizeof(size_t), &Result, nullptr);
Kernel, Device, pi_kernel_sub_group_info(Param), sizeof(size_t) * 3,
Input, sizeof(uint32_t), &Result, nullptr);

return Result;
}
};
Expand Down
3 changes: 2 additions & 1 deletion sycl/test/abi/pi_opencl_symbol_check.dump
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ piDevicesGet
piEnqueueMemBufferMap
piEventCreate
piKernelCreate
piKernelGetSubGroupInfo
piKernelSetExecInfo
piMemBufferCreate
piMemBufferPartition
Expand Down Expand Up @@ -42,4 +43,4 @@ piextUSMEnqueuePrefetch
piextUSMFree
piextUSMGetMemAllocInfo
piextUSMHostAlloc
piextUSMSharedAlloc
piextUSMSharedAlloc
2 changes: 0 additions & 2 deletions sycl/test/abi/sycl_symbols_linux.dump
Original file line number Diff line number Diff line change
Expand Up @@ -3491,11 +3491,9 @@ _ZNK2cl4sycl6device9getNativeEv
_ZNK2cl4sycl6kernel11get_contextEv
_ZNK2cl4sycl6kernel11get_programEv
_ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE16650EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE
_ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE4536EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceENS6_10input_typeE
_ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE4537EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE
_ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE4538EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE
_ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE8243EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceENS6_10input_typeE
_ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE8244EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceENS6_10input_typeE
_ZNK2cl4sycl6kernel19get_work_group_infoILNS0_4info17kernel_work_groupE4528EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE
_ZNK2cl4sycl6kernel19get_work_group_infoILNS0_4info17kernel_work_groupE4529EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE
_ZNK2cl4sycl6kernel19get_work_group_infoILNS0_4info17kernel_work_groupE4531EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE
Expand Down
4 changes: 0 additions & 4 deletions sycl/test/sub_group/common_ocl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,6 @@ void check(queue &Queue, const int G, const int L, const char *SpvFile) {
cgh.set_args(oclacc);
cgh.parallel_for(NdRange, Prog.get_kernel("ocl_subgr"));
});
size_t NumSG = Prog.get_kernel("ocl_subgr")
.get_sub_group_info<
info::kernel_sub_group::sub_group_count_for_ndrange>(
Queue.get_device(), range<3>(G, 1, 1));
auto oclacc = oclbuf.get_access<access::mode::read_write>();

Queue.submit([&](handler &cgh) {
Expand Down
41 changes: 7 additions & 34 deletions sycl/test/sub_group/info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,25 +55,21 @@ int main() {
"kernel_sg(global double* a, global double* b, "
"global double* c) {*a=*b+*c; }\n");
kernel Kernel = Prog.get_kernel("kernel_sg");
size_t Res = 0;
uint32_t Res = 0;
for (auto r : {range<3>(3, 4, 5), range<3>(1, 1, 1), range<3>(4, 2, 1),
range<3>(32, 3, 4), range<3>(7, 9, 11)}) {
Res = Kernel.get_sub_group_info<
info::kernel_sub_group::max_sub_group_size_for_ndrange>(Device, r);
exit_if_not_equal(Res, min(r.size(), max_sg_num),
"max_sub_group_size_for_ndrange");
Res = Kernel.get_sub_group_info<
info::kernel_sub_group::sub_group_count_for_ndrange>(Device, r);
exit_if_not_equal<size_t>(
Res, r.size() / max_sg_num + (r.size() % max_sg_num ? 1 : 0),
"sub_group_count_for_ndrange");
info::kernel_sub_group::max_sub_group_size>(Device, r);
bool Expected = (Res == r.size() || Res == max_sg_num);
exit_if_not_equal<bool>(Expected, true,
"max_sub_group_size");
}

Res = Kernel.get_sub_group_info<
info::kernel_sub_group::compile_num_sub_groups>(Device);

/* Sub-group size is not specified in kernel or IL*/
exit_if_not_equal<size_t>(Res, 0, "compile_num_sub_groups");
exit_if_not_equal<uint32_t>(Res, 0, "compile_num_sub_groups");

// According to specification, this kernel query requires `cl_khr_subgroups`
// or `cl_intel_subgroups`
Expand All @@ -84,32 +80,9 @@ int main() {
info::kernel_sub_group::compile_sub_group_size>(Device);

/* Required sub-group size is not specified in kernel or IL*/
exit_if_not_equal<size_t>(Res, 0, "compile_sub_group_size");
exit_if_not_equal<uint32_t>(Res, 0, "compile_sub_group_size");
}

/* Check work-group sizea which can accommodate the requested number of
* sub-groups*/
for (auto s : {(size_t)200, (size_t)1, (size_t)3, (size_t)5, (size_t)7,
(size_t)13, max_sg_num, max_sg_num + 1}) {
range<3> ResRange = Kernel.get_sub_group_info<
info::kernel_sub_group::local_size_for_sub_group_count>(Device, s);
if (s * max_sg_num <= max_wg_size) {
exit_if_not_equal<size_t>(ResRange[0], s * max_sg_num,
"local_size_for_sub_group_count[0]");
exit_if_not_equal<size_t>(ResRange[1], 1,
"local_size_for_sub_group_count[1]");
exit_if_not_equal<size_t>(ResRange[2], 1,
"local_size_for_sub_group_count[2]");

} else {
exit_if_not_equal<size_t>(ResRange[0], 0,
"local_size_for_sub_group_count[0]");
exit_if_not_equal<size_t>(ResRange[1], 0,
"local_size_for_sub_group_count[1]");
exit_if_not_equal<size_t>(ResRange[2], 0,
"local_size_for_sub_group_count[2]");
}
}
} catch (exception e) {
std::cout << "SYCL exception caught: " << e.what();
return 1;
Expand Down