From 21c9f6f71d107e8b4c583e6e271fde73d74d5a23 Mon Sep 17 00:00:00 2001 From: Garima Gupta Date: Tue, 28 Apr 2020 09:57:15 -0700 Subject: [PATCH 1/3] [SYCL][ABI] Subgroup Extension spec update Implements the change as per the spec update at https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/SubGroup/SYCL_INTEL_sub_group.asciidoc and update from : #1565 ABI update: Functions corresponding to the deleted APIs have been deleted. Edit info.cpp test to expect max sub-group size based on the spec update. Changed pi.h document and changed the plugin to always return a 32 bit value. ABI version update. Signed-off-by: Garima Gupta --- sycl/CMakeLists.txt | 2 +- sycl/include/CL/sycl/detail/pi.h | 32 ++++++++++-- sycl/include/CL/sycl/info/info_desc.hpp | 4 +- .../CL/sycl/info/kernel_sub_group_traits.def | 15 ++---- sycl/plugins/opencl/pi_opencl.cpp | 24 ++++++++- sycl/source/detail/kernel_impl.cpp | 16 +++--- sycl/source/detail/kernel_info.hpp | 51 +++++-------------- sycl/test/abi/sycl_symbols_linux.dump | 2 - sycl/test/sub_group/common_ocl.cpp | 4 -- sycl/test/sub_group/info.cpp | 43 ++++------------ 10 files changed, 87 insertions(+), 106 deletions(-) diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index bce776bdcb9e1..0b3cb1e94acfb 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -11,7 +11,7 @@ option(SYCL_ADD_DEV_VERSION_POSTFIX "Adds -V postfix to version string" ON) set(SYCL_MAJOR_VERSION 1) set(SYCL_MINOR_VERSION 0) set(SYCL_PATCH_VERSION 0) -set(SYCL_DEV_ABI_VERSION 0) +set(SYCL_DEV_ABI_VERSION 1) if (SYCL_ADD_DEV_VERSION_POSTFIX) set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}") endif() diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 854c7387817cf..301d3c9d8f85a 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -45,7 +45,7 @@ // TODO: we need a mapping of PI to OpenCL somewhere, and this can be done // elsewhere, e.g. in the pi_opencl, but constants/enums mapping is now // done here, for efficiency and simplicity. - +#include #include #include #include @@ -309,6 +309,14 @@ typedef enum { PI_IMAGE_INFO_DEPTH = CL_IMAGE_DEPTH } _pi_image_info; +typedef enum { + PI_KERNEL_MAX_SUB_GROUP_SIZE = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, + PI_KERNEL_MAX_NUM_SUB_GROUPS = CL_KERNEL_MAX_NUM_SUB_GROUPS, + PI_KERNEL_COMPILE_NUM_SUB_GROUPS = CL_KERNEL_COMPILE_NUM_SUB_GROUPS, + PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL = + CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL +} _pi_kernel_sub_group_info; + typedef enum { PI_EVENT_INFO_COMMAND_QUEUE = CL_EVENT_COMMAND_QUEUE, PI_EVENT_INFO_CONTEXT = CL_EVENT_CONTEXT, @@ -503,6 +511,7 @@ using pi_queue_info = _pi_queue_info; using pi_image_info = _pi_image_info; using pi_kernel_info = _pi_kernel_info; using pi_kernel_group_info = _pi_kernel_group_info; +using pi_kernel_sub_group_info = _pi_kernel_sub_group_info; using pi_event_info = _pi_event_info; using pi_command_type = _pi_command_type; using pi_mem_type = _pi_mem_type; @@ -1049,9 +1058,26 @@ __SYCL_EXPORT pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device, void *param_value, size_t *param_value_size_ret); +/// API to query information from the sub-group from a kernel +/// +/// \param kernel is the pi_kernel to query +/// \param device is the device the kernel is executed on +/// \param param_name is a pi_kernel_sub_group_info enum value that +/// specifies the informtation queried for. +/// \param input_value_size is the size of input value passed in +/// ptr input_value param +/// \param input_value is the ptr to the input value passed. +/// \param param_value_size is the size of the value in bytes. +/// \param param_value is a pointer to the value to set. +/// \param param_value_size_ret is a pointer to return the size of data in +/// param_value ptr. +/// +/// All queries expect a return of 4 bytes in param_value_size, +/// param_value_size_ret, and a uint32_t value should to be written in +/// param_value ptr. +/// Note: This behaviour differs from OpenCL. OpenCL returns size_t. __SYCL_EXPORT pi_result piKernelGetSubGroupInfo( - pi_kernel kernel, pi_device device, - cl_kernel_sub_group_info param_name, // TODO: untie from OpenCL + pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret); diff --git a/sycl/include/CL/sycl/info/info_desc.hpp b/sycl/include/CL/sycl/info/info_desc.hpp index df5765c3c79ba..a411c53dc17d3 100644 --- a/sycl/include/CL/sycl/info/info_desc.hpp +++ b/sycl/include/CL/sycl/info/info_desc.hpp @@ -211,9 +211,7 @@ enum class kernel_work_group : cl_kernel_work_group_info { }; enum class kernel_sub_group : cl_kernel_sub_group_info { - max_sub_group_size_for_ndrange = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, - sub_group_count_for_ndrange = CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, - local_size_for_sub_group_count = CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, + max_sub_group_size = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, max_num_sub_groups = CL_KERNEL_MAX_NUM_SUB_GROUPS, compile_num_sub_groups = CL_KERNEL_COMPILE_NUM_SUB_GROUPS, compile_sub_group_size = CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL diff --git a/sycl/include/CL/sycl/info/kernel_sub_group_traits.def b/sycl/include/CL/sycl/info/kernel_sub_group_traits.def index b2e8a8e82a750..d4ec1a661a348 100644 --- a/sycl/include/CL/sycl/info/kernel_sub_group_traits.def +++ b/sycl/include/CL/sycl/info/kernel_sub_group_traits.def @@ -1,10 +1,5 @@ -PARAM_TRAITS_SPEC_WITH_INPUT(kernel_sub_group, max_sub_group_size_for_ndrange, - size_t, cl::sycl::range<3>) -PARAM_TRAITS_SPEC_WITH_INPUT(kernel_sub_group, sub_group_count_for_ndrange, - size_t, cl::sycl::range<3>) -PARAM_TRAITS_SPEC_WITH_INPUT(kernel_sub_group, local_size_for_sub_group_count, - cl::sycl::range<3>, size_t) -PARAM_TRAITS_SPEC(kernel_sub_group, max_num_sub_groups, size_t) -PARAM_TRAITS_SPEC(kernel_sub_group, compile_num_sub_groups, size_t) -PARAM_TRAITS_SPEC(kernel_sub_group, compile_sub_group_size, size_t) - +PARAM_TRAITS_SPEC_WITH_INPUT(kernel_sub_group, max_sub_group_size, + uint32_t, cl::sycl::range<3>) +PARAM_TRAITS_SPEC(kernel_sub_group, max_num_sub_groups, uint32_t) +PARAM_TRAITS_SPEC(kernel_sub_group, compile_num_sub_groups, uint32_t) +PARAM_TRAITS_SPEC(kernel_sub_group, compile_sub_group_size, uint32_t) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index bee9ad4b5ba90..855137f905e3d 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -598,6 +598,28 @@ pi_result piKernelCreate(pi_program program, const char *kernel_name, return ret_err; } +pi_result piKernelGetSubGroupInfo(pi_kernel kernel, pi_device device, + pi_kernel_sub_group_info param_name, + size_t input_value_size, + const void *input_value, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + size_t ret_val; + cl_int ret_err; + ret_err = cast(clGetKernelSubGroupInfo( + cast(kernel), cast(device), + cast(param_name), input_value_size, input_value, + sizeof(size_t), &ret_val, param_value_size_ret)); + + if (ret_err != CL_SUCCESS) + return cast(ret_err); + + *(static_cast(param_value)) = static_cast(ret_val); + if (param_value_size_ret) + *param_value_size_ret = sizeof(uint32_t); + return PI_SUCCESS; +} + pi_result piEventCreate(pi_context context, pi_event *ret_event) { pi_result ret_err = PI_INVALID_OPERATION; @@ -1145,7 +1167,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piKernelSetArg, clSetKernelArg) _PI_CL(piKernelGetInfo, clGetKernelInfo) _PI_CL(piKernelGetGroupInfo, clGetKernelWorkGroupInfo) - _PI_CL(piKernelGetSubGroupInfo, clGetKernelSubGroupInfo) + _PI_CL(piKernelGetSubGroupInfo, piKernelGetSubGroupInfo) _PI_CL(piKernelRetain, clRetainKernel) _PI_CL(piKernelRelease, clReleaseKernel) _PI_CL(piKernelSetExecInfo, piKernelSetExecInfo) diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 9b66422978eb1..f21d18818dacf 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -92,10 +92,9 @@ kernel_impl::get_sub_group_info(const device &Device) const { throw runtime_error("Sub-group feature is not supported on HOST device.", PI_INVALID_DEVICE); } - return get_kernel_sub_group_info< - typename info::param_traits::return_type, - param>::get(this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), - getPlugin()); + return get_kernel_sub_group_info::get( + this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), + getPlugin()); } template @@ -108,12 +107,9 @@ kernel_impl::get_sub_group_info( throw runtime_error("Sub-group feature is not supported on HOST device.", PI_INVALID_DEVICE); } - return get_kernel_sub_group_info_with_input< - typename info::param_traits::return_type, - param, - typename info::param_traits::input_type>:: - get(this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), Value, - getPlugin()); + return get_kernel_sub_group_info_with_input::get( + this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), Value, + getPlugin()); } #define PARAM_TRAITS_SPEC(param_type, param, ret_type) \ diff --git a/sycl/source/detail/kernel_info.hpp b/sycl/source/detail/kernel_info.hpp index e5a8adf242ddd..99b51339cbeb2 100644 --- a/sycl/source/detail/kernel_info.hpp +++ b/sycl/source/detail/kernel_info.hpp @@ -120,55 +120,30 @@ get_kernel_work_group_info_host( } // The kernel sub-group methods -template -struct get_kernel_sub_group_info { - static TOut get(RT::PiKernel Kernel, RT::PiDevice Device, - const plugin &Plugin) { - TOut Result; +template struct get_kernel_sub_group_info { + static uint32_t get(RT::PiKernel Kernel, RT::PiDevice Device, + const plugin &Plugin) { + uint32_t Result; // TODO catch an exception and put it to list of asynchronous exceptions Plugin.call( - Kernel, Device, cl_kernel_sub_group_info(Param), 0, nullptr, - sizeof(TOut), &Result, nullptr); - return Result; - } -}; + Kernel, Device, pi_kernel_sub_group_info(Param), 0, nullptr, + sizeof(uint32_t), &Result, nullptr); -template -struct get_kernel_sub_group_info_with_input { - static TOut get(RT::PiKernel Kernel, RT::PiDevice Device, TIn In, - const plugin &Plugin) { - TOut Result; - // TODO catch an exception and put it to list of asynchronous exceptions - Plugin.call( - Kernel, Device, cl_kernel_sub_group_info(Param), sizeof(TIn), &In, - sizeof(TOut), &Result, nullptr); return Result; } }; template -struct get_kernel_sub_group_info_with_input, Param, size_t> { - static cl::sycl::range<3> get(RT::PiKernel Kernel, RT::PiDevice Device, - size_t In, const plugin &Plugin) { - size_t Result[3]; - // TODO catch an exception and put it to list of asynchronous exceptions - Plugin.call( - Kernel, Device, cl_kernel_sub_group_info(Param), sizeof(size_t), &In, - sizeof(size_t) * 3, Result, nullptr); - return cl::sycl::range<3>(Result[0], Result[1], Result[2]); - } -}; - -template -struct get_kernel_sub_group_info_with_input> { - static size_t get(RT::PiKernel Kernel, RT::PiDevice Device, - cl::sycl::range<3> In, const plugin &Plugin) { +struct get_kernel_sub_group_info_with_input { + static uint32_t get(RT::PiKernel Kernel, RT::PiDevice Device, + cl::sycl::range<3> In, const plugin &Plugin) { size_t Input[3] = {In[0], In[1], In[2]}; - size_t Result; + uint32_t Result; // TODO catch an exception and put it to list of asynchronous exceptions Plugin.call( - Kernel, Device, cl_kernel_sub_group_info(Param), sizeof(size_t) * 3, - Input, sizeof(size_t), &Result, nullptr); + Kernel, Device, pi_kernel_sub_group_info(Param), sizeof(size_t) * 3, + Input, sizeof(uint32_t), &Result, nullptr); + return Result; } }; diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 4412683cc11ac..f880aaf6fb73c 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3491,11 +3491,9 @@ _ZNK2cl4sycl6device9getNativeEv _ZNK2cl4sycl6kernel11get_contextEv _ZNK2cl4sycl6kernel11get_programEv _ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE16650EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE -_ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE4536EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceENS6_10input_typeE _ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE4537EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE _ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE4538EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE _ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE8243EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceENS6_10input_typeE -_ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE8244EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceENS6_10input_typeE _ZNK2cl4sycl6kernel19get_work_group_infoILNS0_4info17kernel_work_groupE4528EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE _ZNK2cl4sycl6kernel19get_work_group_infoILNS0_4info17kernel_work_groupE4529EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE _ZNK2cl4sycl6kernel19get_work_group_infoILNS0_4info17kernel_work_groupE4531EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE diff --git a/sycl/test/sub_group/common_ocl.cpp b/sycl/test/sub_group/common_ocl.cpp index d999b446cb671..3e4cb3a7d664b 100644 --- a/sycl/test/sub_group/common_ocl.cpp +++ b/sycl/test/sub_group/common_ocl.cpp @@ -60,10 +60,6 @@ void check(queue &Queue, const int G, const int L, const char *SpvFile) { cgh.set_args(oclacc); cgh.parallel_for(NdRange, Prog.get_kernel("ocl_subgr")); }); - size_t NumSG = Prog.get_kernel("ocl_subgr") - .get_sub_group_info< - info::kernel_sub_group::sub_group_count_for_ndrange>( - Queue.get_device(), range<3>(G, 1, 1)); auto oclacc = oclbuf.get_access(); Queue.submit([&](handler &cgh) { diff --git a/sycl/test/sub_group/info.cpp b/sycl/test/sub_group/info.cpp index 16a6fa1f38a1b..6874eb398b3de 100644 --- a/sycl/test/sub_group/info.cpp +++ b/sycl/test/sub_group/info.cpp @@ -55,25 +55,23 @@ int main() { "kernel_sg(global double* a, global double* b, " "global double* c) {*a=*b+*c; }\n"); kernel Kernel = Prog.get_kernel("kernel_sg"); - size_t Res = 0; + uint32_t Res = 0; for (auto r : {range<3>(3, 4, 5), range<3>(1, 1, 1), range<3>(4, 2, 1), range<3>(32, 3, 4), range<3>(7, 9, 11)}) { + bool CorrectResult = false; Res = Kernel.get_sub_group_info< - info::kernel_sub_group::max_sub_group_size_for_ndrange>(Device, r); - exit_if_not_equal(Res, min(r.size(), max_sg_num), - "max_sub_group_size_for_ndrange"); - Res = Kernel.get_sub_group_info< - info::kernel_sub_group::sub_group_count_for_ndrange>(Device, r); - exit_if_not_equal( - Res, r.size() / max_sg_num + (r.size() % max_sg_num ? 1 : 0), - "sub_group_count_for_ndrange"); + info::kernel_sub_group::max_sub_group_size>(Device, r); + if (Res == r.size() || Res == max_sg_num) + CorrectResult = true; + exit_if_not_equal(CorrectResult, true, + "max_sub_group_size"); } Res = Kernel.get_sub_group_info< info::kernel_sub_group::compile_num_sub_groups>(Device); /* Sub-group size is not specified in kernel or IL*/ - exit_if_not_equal(Res, 0, "compile_num_sub_groups"); + exit_if_not_equal(Res, 0, "compile_num_sub_groups"); // According to specification, this kernel query requires `cl_khr_subgroups` // or `cl_intel_subgroups` @@ -84,32 +82,9 @@ int main() { info::kernel_sub_group::compile_sub_group_size>(Device); /* Required sub-group size is not specified in kernel or IL*/ - exit_if_not_equal(Res, 0, "compile_sub_group_size"); + exit_if_not_equal(Res, 0, "compile_sub_group_size"); } - /* Check work-group sizea which can accommodate the requested number of - * sub-groups*/ - for (auto s : {(size_t)200, (size_t)1, (size_t)3, (size_t)5, (size_t)7, - (size_t)13, max_sg_num, max_sg_num + 1}) { - range<3> ResRange = Kernel.get_sub_group_info< - info::kernel_sub_group::local_size_for_sub_group_count>(Device, s); - if (s * max_sg_num <= max_wg_size) { - exit_if_not_equal(ResRange[0], s * max_sg_num, - "local_size_for_sub_group_count[0]"); - exit_if_not_equal(ResRange[1], 1, - "local_size_for_sub_group_count[1]"); - exit_if_not_equal(ResRange[2], 1, - "local_size_for_sub_group_count[2]"); - - } else { - exit_if_not_equal(ResRange[0], 0, - "local_size_for_sub_group_count[0]"); - exit_if_not_equal(ResRange[1], 0, - "local_size_for_sub_group_count[1]"); - exit_if_not_equal(ResRange[2], 0, - "local_size_for_sub_group_count[2]"); - } - } } catch (exception e) { std::cout << "SYCL exception caught: " << e.what(); return 1; From 06a3073c6e4aec038f614f09ebd9049977d32af0 Mon Sep 17 00:00:00 2001 From: Garima Gupta Date: Tue, 12 May 2020 10:57:21 -0700 Subject: [PATCH 2/3] Edit test pi_opencl_symbol_check.dump Signed-off-by: Garima Gupta --- sycl/test/abi/pi_opencl_symbol_check.dump | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index 5ad52bd49911e..2576d05ba970f 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -6,6 +6,7 @@ piDevicesGet piEnqueueMemBufferMap piEventCreate piKernelCreate +piKernelGetSubGroupInfo piKernelSetExecInfo piMemBufferCreate piMemBufferPartition @@ -42,4 +43,4 @@ piextUSMEnqueuePrefetch piextUSMFree piextUSMGetMemAllocInfo piextUSMHostAlloc -piextUSMSharedAlloc \ No newline at end of file +piextUSMSharedAlloc From 2fcb115c0752f9918857ca1a0b91b974c7b2f1ef Mon Sep 17 00:00:00 2001 From: Garima Gupta Date: Wed, 13 May 2020 10:58:34 -0700 Subject: [PATCH 3/3] Suggested changed to sub_group/info.cpp Signed-off-by: Garima Gupta --- sycl/test/sub_group/info.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sycl/test/sub_group/info.cpp b/sycl/test/sub_group/info.cpp index 6874eb398b3de..2791f66ce42d1 100644 --- a/sycl/test/sub_group/info.cpp +++ b/sycl/test/sub_group/info.cpp @@ -58,13 +58,11 @@ int main() { uint32_t Res = 0; for (auto r : {range<3>(3, 4, 5), range<3>(1, 1, 1), range<3>(4, 2, 1), range<3>(32, 3, 4), range<3>(7, 9, 11)}) { - bool CorrectResult = false; Res = Kernel.get_sub_group_info< info::kernel_sub_group::max_sub_group_size>(Device, r); - if (Res == r.size() || Res == max_sg_num) - CorrectResult = true; - exit_if_not_equal(CorrectResult, true, - "max_sub_group_size"); + bool Expected = (Res == r.size() || Res == max_sg_num); + exit_if_not_equal(Expected, true, + "max_sub_group_size"); } Res = Kernel.get_sub_group_info<