Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sycl device sub group sizes property #985

Merged
merged 4 commits into from
Nov 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* Added C-API to `dpctl.program.SyclKernel` and `dpctl.program.SyclProgram`. Added type casters for new types to "dpctl4pybind11" and added an example demonstrating its use [#970](https://github.com/IntelPython/dpctl/pull/970).
* Introduced "dpctl/sycl.pxd" Cython declaration file to streamline use of SYCL functions from Cython, and added an example demonstrating its use [#981](https://github.com/IntelPython/dpctl/pull/981).
* Added experimental support for sharing data allocated on sub-devices via dlpack [#984](https://github.com/IntelPython/dpctl/pull/984).
* Added `dpctl.SyclDevice.sub_group_sizes` property to retrieve supported sizes of sub-group by the device [#985](https://github.com/IntelPython/dpctl/pull/985).

### Changed
* Improved queue compatibility testing in `dpctl.tensor`'s implementation module [#900](https://github.com/IntelPython/dpctl/pull/900).
Expand Down
2 changes: 2 additions & 0 deletions dpctl/_backend.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,8 @@ cdef extern from "syclinterface/dpctl_sycl_device_interface.h":
cdef uint64_t DPCTLDevice_GetGlobalMemCacheSize(const DPCTLSyclDeviceRef DRef)
cdef _global_mem_cache_type DPCTLDevice_GetGlobalMemCacheType(
const DPCTLSyclDeviceRef DRef)
cdef size_t *DPCTLDevice_GetSubGroupSizes(const DPCTLSyclDeviceRef DRef,
size_t *res_len)


cdef extern from "syclinterface/dpctl_sycl_device_manager.h":
Expand Down
23 changes: 23 additions & 0 deletions dpctl/_sycl_device.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ from ._backend cimport ( # noqa: E211
DPCTLDevice_GetPreferredVectorWidthShort,
DPCTLDevice_GetProfilingTimerResolution,
DPCTLDevice_GetSubGroupIndependentForwardProgress,
DPCTLDevice_GetSubGroupSizes,
DPCTLDevice_GetVendor,
DPCTLDevice_HasAspect,
DPCTLDevice_Hash,
Expand Down Expand Up @@ -884,6 +885,28 @@ cdef class SyclDevice(_SyclDevice):
self._device_ref
)

@property
def sub_group_sizes(self):
""" Returns list of supported sub-group sizes for this device.

Returns:
List[int]: List of supported sub-group sizes.
"""
cdef size_t *sg_sizes = NULL
cdef size_t sg_sizes_len = 0
cdef size_t i

sg_sizes = DPCTLDevice_GetSubGroupSizes(
self._device_ref, &sg_sizes_len)
if (sg_sizes is not NULL and sg_sizes_len > 0):
res = list()
for i in range(sg_sizes_len):
res.append(sg_sizes[i])
DPCTLSize_t_Array_Delete(sg_sizes)
return res
else:
return []

@property
def sycl_platform(self):
""" Returns the platform associated with this device.
Expand Down
6 changes: 6 additions & 0 deletions dpctl/tests/_device_attributes_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@ def check_max_num_sub_groups(device):
assert max_num_sub_groups > 0


def check_sub_group_sizes(device):
sg_sizes = device.sub_group_sizes
assert all(el > 0 for el in sg_sizes)


def check_has_aspect_host(device):
try:
device.has_aspect_host
Expand Down Expand Up @@ -605,6 +610,7 @@ def check_global_mem_cache_line_size(device):
check_max_work_item_sizes,
check_max_work_group_size,
check_max_num_sub_groups,
check_sub_group_sizes,
check_is_accelerator,
check_is_cpu,
check_is_gpu,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,11 @@ def test_offload_array_mod():
Ynp = X % modulus_p

assert np.array_equal(Y, Ynp)


def test_get_sub_group_sizes():
d = dpctl.SyclDevice()
szs = uqd.get_sub_group_sizes(d)
assert type(szs) is list
assert all(type(el) is int for el in szs)
szs == d.sub_group_sizes
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
get_device_global_mem_size,
get_device_local_mem_size,
get_max_compute_units,
get_sub_group_sizes,
offloaded_array_mod,
)

Expand All @@ -28,6 +29,7 @@
"get_device_global_mem_size",
"get_device_local_mem_size",
"offloaded_array_mod",
"get_sub_group_sizes",
]

__doc__ = """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <cstdint>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>

namespace py = pybind11;

Expand Down Expand Up @@ -84,6 +85,11 @@ offloaded_array_mod(sycl::queue q,
return res;
}

std::vector<std::size_t> get_sub_group_sizes(const sycl::device &d)
{
return d.get_info<sycl::info::device::sub_group_sizes>();
}

PYBIND11_MODULE(_use_queue_device, m)
{
m.def(
Expand All @@ -100,4 +106,6 @@ PYBIND11_MODULE(_use_queue_device, m)
"Computes amount of local memory of the given dpctl.SyclDevice");
m.def("offloaded_array_mod", &offloaded_array_mod,
"Compute offloaded modular reduction of integer-valued NumPy array");
m.def("get_sub_group_sizes", &get_sub_group_sizes,
"Gets info::device::sub_group_sizes property of given device");
}
13 changes: 13 additions & 0 deletions libsyclinterface/include/dpctl_sycl_device_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -651,4 +651,17 @@ DPCTL_API
DPCTLGlobalMemCacheType
DPCTLDevice_GetGlobalMemCacheType(__dpctl_keep const DPCTLSyclDeviceRef DRef);

/*!
* @brief Wrapper for get_info<info::device::sub_group_sizes>().
*
* @param DRef Opaque pointer to a ``sycl::device``
* @param res_len Populated with size of the returned array
* @return Returns the valid result if device exists else returns NULL.
* @ingroup DeviceInterface
*/
DPCTL_API
__dpctl_keep size_t *
DPCTLDevice_GetSubGroupSizes(__dpctl_keep const DPCTLSyclDeviceRef DRef,
size_t *res_len);

DPCTL_C_EXTERN_C_END
27 changes: 27 additions & 0 deletions libsyclinterface/source/dpctl_sycl_device_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -743,3 +743,30 @@ DPCTLDevice_GetGlobalMemCacheType(__dpctl_keep const DPCTLSyclDeviceRef DRef)
return DPCTL_MEM_CACHE_TYPE_INDETERMINATE;
}
}

__dpctl_keep size_t *
DPCTLDevice_GetSubGroupSizes(__dpctl_keep const DPCTLSyclDeviceRef DRef,
size_t *res_len)
{
size_t *sizes = nullptr;
std::vector<size_t> sg_sizes;
*res_len = 0;
auto D = unwrap<device>(DRef);
if (D) {
try {
sg_sizes = D->get_info<info::device::sub_group_sizes>();
*res_len = sg_sizes.size();
} catch (std::exception const &e) {
error_handler(e, __FILE__, __func__, __LINE__);
}
try {
sizes = new size_t[sg_sizes.size()];
} catch (std::exception const &e) {
error_handler(e, __FILE__, __func__, __LINE__);
}
for (auto i = 0ul; (sizes != nullptr) && i < sg_sizes.size(); ++i) {
sizes[i] = sg_sizes[i];
}
}
return sizes;
}
26 changes: 26 additions & 0 deletions libsyclinterface/tests/test_sycl_device_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,22 @@ TEST_P(TestDPCTLSyclDeviceInterface, ChkGetMaxNumSubGroups)
EXPECT_TRUE(n > 0);
}

TEST_P(TestDPCTLSyclDeviceInterface, ChkGetSubGroupSizes)
{
size_t sg_sizes_len = 0;
size_t *sg_sizes = nullptr;
EXPECT_NO_FATAL_FAILURE(
sg_sizes = DPCTLDevice_GetSubGroupSizes(DRef, &sg_sizes_len));
if (DPCTLDevice_IsAccelerator(DRef))
EXPECT_TRUE(sg_sizes_len >= 0);
else
EXPECT_TRUE(sg_sizes_len > 0);
for (size_t i = 0; i < sg_sizes_len; ++i) {
EXPECT_TRUE(sg_sizes > 0);
}
EXPECT_NO_FATAL_FAILURE(DPCTLSize_t_Array_Delete(sg_sizes));
}

TEST_P(TestDPCTLSyclDeviceInterface, ChkGetPlatform)
{
DPCTLSyclPlatformRef PRef = nullptr;
Expand Down Expand Up @@ -751,3 +767,13 @@ TEST_F(TestDPCTLSyclDeviceNullArgs, ChkGetGlobalMemCacheType)
EXPECT_NO_FATAL_FAILURE(res = DPCTLDevice_GetGlobalMemCacheType(Null_DRef));
ASSERT_TRUE(res == DPCTL_MEM_CACHE_TYPE_INDETERMINATE);
}

TEST_F(TestDPCTLSyclDeviceNullArgs, ChkGetSubGroupSizes)
{
size_t *sg_sizes = nullptr;
size_t sg_sizes_len = 0;
EXPECT_NO_FATAL_FAILURE(
sg_sizes = DPCTLDevice_GetSubGroupSizes(Null_DRef, &sg_sizes_len));
ASSERT_TRUE(sg_sizes == nullptr);
ASSERT_TRUE(sg_sizes_len == 0);
}