From b88bd7a2d8c65f8b765714e45888dc06e5c5ad2c Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 9 Nov 2022 08:18:55 -0600 Subject: [PATCH 1/4] Added size_t *DPCTLDevice_GetSubGroupSizes(DRef, size_t res_len) The function exposes `device::get_info()` which returns `std::vector`. DPCTLDevice_GetSubGroupSizes returns pointer to allocated array, populated with the content of the result std::vector. res_len is set with the size of the result std::vector. --- .../include/dpctl_sycl_device_interface.h | 13 +++++++++ .../source/dpctl_sycl_device_interface.cpp | 27 +++++++++++++++++++ .../tests/test_sycl_device_interface.cpp | 26 ++++++++++++++++++ 3 files changed, 66 insertions(+) diff --git a/libsyclinterface/include/dpctl_sycl_device_interface.h b/libsyclinterface/include/dpctl_sycl_device_interface.h index 2288fb433d..80b460c53e 100644 --- a/libsyclinterface/include/dpctl_sycl_device_interface.h +++ b/libsyclinterface/include/dpctl_sycl_device_interface.h @@ -651,4 +651,17 @@ DPCTL_API DPCTLGlobalMemCacheType DPCTLDevice_GetGlobalMemCacheType(__dpctl_keep const DPCTLSyclDeviceRef DRef); +/*! + * @brief Wrapper for get_info(). + * + * @param DRef Opaque pointer to a ``sycl::device`` + * @param res_len Populated with size of the returned array + * @return Returns the valid result if device exists else returns NULL. + * @ingroup DeviceInterface + */ +DPCTL_API +__dpctl_keep size_t * +DPCTLDevice_GetSubGroupSizes(__dpctl_keep const DPCTLSyclDeviceRef DRef, + size_t *res_len); + DPCTL_C_EXTERN_C_END diff --git a/libsyclinterface/source/dpctl_sycl_device_interface.cpp b/libsyclinterface/source/dpctl_sycl_device_interface.cpp index 985e8b5719..cc313512f0 100644 --- a/libsyclinterface/source/dpctl_sycl_device_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_device_interface.cpp @@ -743,3 +743,30 @@ DPCTLDevice_GetGlobalMemCacheType(__dpctl_keep const DPCTLSyclDeviceRef DRef) return DPCTL_MEM_CACHE_TYPE_INDETERMINATE; } } + +__dpctl_keep size_t * +DPCTLDevice_GetSubGroupSizes(__dpctl_keep const DPCTLSyclDeviceRef DRef, + size_t *res_len) +{ + size_t *sizes = nullptr; + std::vector sg_sizes; + *res_len = 0; + auto D = unwrap(DRef); + if (D) { + try { + sg_sizes = D->get_info(); + *res_len = sg_sizes.size(); + } catch (std::exception const &e) { + error_handler(e, __FILE__, __func__, __LINE__); + } + try { + sizes = new size_t[sg_sizes.size()]; + } catch (std::exception const &e) { + error_handler(e, __FILE__, __func__, __LINE__); + } + for (auto i = 0ul; (sizes != nullptr) && i < sg_sizes.size(); ++i) { + sizes[i] = sg_sizes[i]; + } + } + return sizes; +} diff --git a/libsyclinterface/tests/test_sycl_device_interface.cpp b/libsyclinterface/tests/test_sycl_device_interface.cpp index 0441177633..4d407a9be3 100644 --- a/libsyclinterface/tests/test_sycl_device_interface.cpp +++ b/libsyclinterface/tests/test_sycl_device_interface.cpp @@ -205,6 +205,22 @@ TEST_P(TestDPCTLSyclDeviceInterface, ChkGetMaxNumSubGroups) EXPECT_TRUE(n > 0); } +TEST_P(TestDPCTLSyclDeviceInterface, ChkGetSubGroupSizes) +{ + size_t sg_sizes_len = 0; + size_t *sg_sizes = nullptr; + EXPECT_NO_FATAL_FAILURE( + sg_sizes = DPCTLDevice_GetSubGroupSizes(DRef, &sg_sizes_len)); + if (DPCTLDevice_IsAccelerator(DRef)) + EXPECT_TRUE(sg_sizes_len >= 0); + else + EXPECT_TRUE(sg_sizes_len > 0); + for (size_t i = 0; i < sg_sizes_len; ++i) { + EXPECT_TRUE(sg_sizes > 0); + } + EXPECT_NO_FATAL_FAILURE(DPCTLSize_t_Array_Delete(sg_sizes)); +} + TEST_P(TestDPCTLSyclDeviceInterface, ChkGetPlatform) { DPCTLSyclPlatformRef PRef = nullptr; @@ -751,3 +767,13 @@ TEST_F(TestDPCTLSyclDeviceNullArgs, ChkGetGlobalMemCacheType) EXPECT_NO_FATAL_FAILURE(res = DPCTLDevice_GetGlobalMemCacheType(Null_DRef)); ASSERT_TRUE(res == DPCTL_MEM_CACHE_TYPE_INDETERMINATE); } + +TEST_F(TestDPCTLSyclDeviceNullArgs, ChkGetSubGroupSizes) +{ + size_t *sg_sizes = nullptr; + size_t sg_sizes_len = 0; + EXPECT_NO_FATAL_FAILURE( + sg_sizes = DPCTLDevice_GetSubGroupSizes(Null_DRef, &sg_sizes_len)); + ASSERT_TRUE(sg_sizes == nullptr); + ASSERT_TRUE(sg_sizes_len == 0); +} From 298dd863e390a69131a8702724e178bf01c43dac Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 9 Nov 2022 09:50:48 -0600 Subject: [PATCH 2/4] Exposed dpctl.SyclDevice.sub_group_sizes property The property returns a vector of supported sub-group sizes. The relationship to `SyclDevice.max_num_sub_groups` is not clear at the moment. --- dpctl/_backend.pxd | 2 ++ dpctl/_sycl_device.pyx | 23 +++++++++++++++++++++++ dpctl/tests/_device_attributes_checks.py | 6 ++++++ 3 files changed, 31 insertions(+) diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd index f4ce41f6a2..387f16a730 100644 --- a/dpctl/_backend.pxd +++ b/dpctl/_backend.pxd @@ -204,6 +204,8 @@ cdef extern from "syclinterface/dpctl_sycl_device_interface.h": cdef uint64_t DPCTLDevice_GetGlobalMemCacheSize(const DPCTLSyclDeviceRef DRef) cdef _global_mem_cache_type DPCTLDevice_GetGlobalMemCacheType( const DPCTLSyclDeviceRef DRef) + cdef size_t *DPCTLDevice_GetSubGroupSizes(const DPCTLSyclDeviceRef DRef, + size_t *res_len) cdef extern from "syclinterface/dpctl_sycl_device_manager.h": diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index 72eba89059..cd45d632e9 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -65,6 +65,7 @@ from ._backend cimport ( # noqa: E211 DPCTLDevice_GetPreferredVectorWidthShort, DPCTLDevice_GetProfilingTimerResolution, DPCTLDevice_GetSubGroupIndependentForwardProgress, + DPCTLDevice_GetSubGroupSizes, DPCTLDevice_GetVendor, DPCTLDevice_HasAspect, DPCTLDevice_Hash, @@ -884,6 +885,28 @@ cdef class SyclDevice(_SyclDevice): self._device_ref ) + @property + def sub_group_sizes(self): + """ Returns list of supported sub-group sizes for this device. + + Returns: + List[int]: List of supported sub-group sizes. + """ + cdef size_t *sg_sizes = NULL + cdef size_t sg_sizes_len = 0 + cdef size_t i + + sg_sizes = DPCTLDevice_GetSubGroupSizes( + self._device_ref, &sg_sizes_len) + if (sg_sizes is not NULL and sg_sizes_len > 0): + res = list() + for i in range(sg_sizes_len): + res.append(sg_sizes[i]) + DPCTLSize_t_Array_Delete(sg_sizes) + return res + else: + return [] + @property def sycl_platform(self): """ Returns the platform associated with this device. diff --git a/dpctl/tests/_device_attributes_checks.py b/dpctl/tests/_device_attributes_checks.py index f3ecf5b199..970aff8811 100644 --- a/dpctl/tests/_device_attributes_checks.py +++ b/dpctl/tests/_device_attributes_checks.py @@ -115,6 +115,11 @@ def check_max_num_sub_groups(device): assert max_num_sub_groups > 0 +def check_sub_group_sizes(device): + sg_sizes = device.sub_group_sizes + assert all(el > 0 for el in sg_sizes) + + def check_has_aspect_host(device): try: device.has_aspect_host @@ -605,6 +610,7 @@ def check_global_mem_cache_line_size(device): check_max_work_item_sizes, check_max_work_group_size, check_max_num_sub_groups, + check_sub_group_sizes, check_is_accelerator, check_is_cpu, check_is_gpu, From d1bb117c91e126c98e3184c774ecc240b4cf7c87 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 9 Nov 2022 10:02:01 -0600 Subject: [PATCH 3/4] Extended examples/pybind11/use_dpctl_syclqueue/use_queue_device Added a function that retrieves sub_group_sizes property of the device. --- .../use_dpctl_syclqueue/tests/test_queue_device.py | 8 ++++++++ .../use_dpctl_syclqueue/use_queue_device/__init__.py | 2 ++ .../use_dpctl_syclqueue/use_queue_device/_example.cpp | 8 ++++++++ 3 files changed, 18 insertions(+) diff --git a/examples/pybind11/use_dpctl_syclqueue/tests/test_queue_device.py b/examples/pybind11/use_dpctl_syclqueue/tests/test_queue_device.py index 120cc1fd9e..c47a3696ff 100644 --- a/examples/pybind11/use_dpctl_syclqueue/tests/test_queue_device.py +++ b/examples/pybind11/use_dpctl_syclqueue/tests/test_queue_device.py @@ -55,3 +55,11 @@ def test_offload_array_mod(): Ynp = X % modulus_p assert np.array_equal(Y, Ynp) + + +def test_get_sub_group_sizes(): + d = dpctl.SyclDevice() + szs = uqd.get_sub_group_sizes(d) + assert type(szs) is list + assert all(type(el) is int for el in szs) + szs == d.sub_group_sizes diff --git a/examples/pybind11/use_dpctl_syclqueue/use_queue_device/__init__.py b/examples/pybind11/use_dpctl_syclqueue/use_queue_device/__init__.py index ccbca8fd73..80673fe776 100644 --- a/examples/pybind11/use_dpctl_syclqueue/use_queue_device/__init__.py +++ b/examples/pybind11/use_dpctl_syclqueue/use_queue_device/__init__.py @@ -20,6 +20,7 @@ get_device_global_mem_size, get_device_local_mem_size, get_max_compute_units, + get_sub_group_sizes, offloaded_array_mod, ) @@ -28,6 +29,7 @@ "get_device_global_mem_size", "get_device_local_mem_size", "offloaded_array_mod", + "get_sub_group_sizes", ] __doc__ = """ diff --git a/examples/pybind11/use_dpctl_syclqueue/use_queue_device/_example.cpp b/examples/pybind11/use_dpctl_syclqueue/use_queue_device/_example.cpp index 436046a08b..9146799c42 100644 --- a/examples/pybind11/use_dpctl_syclqueue/use_queue_device/_example.cpp +++ b/examples/pybind11/use_dpctl_syclqueue/use_queue_device/_example.cpp @@ -31,6 +31,7 @@ #include #include #include +#include namespace py = pybind11; @@ -84,6 +85,11 @@ offloaded_array_mod(sycl::queue q, return res; } +std::vector get_sub_group_sizes(const sycl::device &d) +{ + return d.get_info(); +} + PYBIND11_MODULE(_use_queue_device, m) { m.def( @@ -100,4 +106,6 @@ PYBIND11_MODULE(_use_queue_device, m) "Computes amount of local memory of the given dpctl.SyclDevice"); m.def("offloaded_array_mod", &offloaded_array_mod, "Compute offloaded modular reduction of integer-valued NumPy array"); + m.def("get_sub_group_sizes", &get_sub_group_sizes, + "Gets info::device::sub_group_sizes property of given device"); } From 99f8a91bd28a3e7391668b9a6d3924a23713e09b Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 9 Nov 2022 15:21:06 -0600 Subject: [PATCH 4/4] Added entry about this PR to change-log --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc4a2d9ec8..fce096ab4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Added C-API to `dpctl.program.SyclKernel` and `dpctl.program.SyclProgram`. Added type casters for new types to "dpctl4pybind11" and added an example demonstrating its use [#970](https://github.com/IntelPython/dpctl/pull/970). * Introduced "dpctl/sycl.pxd" Cython declaration file to streamline use of SYCL functions from Cython, and added an example demonstrating its use [#981](https://github.com/IntelPython/dpctl/pull/981). * Added experimental support for sharing data allocated on sub-devices via dlpack [#984](https://github.com/IntelPython/dpctl/pull/984). +* Added `dpctl.SyclDevice.sub_group_sizes` property to retrieve supported sizes of sub-group by the device [#985](https://github.com/IntelPython/dpctl/pull/985). ### Changed * Improved queue compatibility testing in `dpctl.tensor`'s implementation module [#900](https://github.com/IntelPython/dpctl/pull/900).