Skip to content

[SYCL] Implement SYCL_INTEL_mem_channel_property extension #2762

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Dec 22, 2020
3 changes: 2 additions & 1 deletion sycl/include/CL/sycl/aspects.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ enum class aspect {
ext_intel_gpu_slices,
ext_intel_gpu_subslices_per_slice,
ext_intel_gpu_eu_count_per_subslice,
ext_intel_max_mem_bandwidth
ext_intel_max_mem_bandwidth,
ext_intel_mem_channel
};

} // namespace sycl
Expand Down
3 changes: 1 addition & 2 deletions sycl/include/CL/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -517,9 +517,8 @@ constexpr pi_map_flags PI_MAP_WRITE_INVALIDATE_REGION =

// NOTE: this is made 64-bit to match the size of cl_mem_properties_intel to
// make the translation to OpenCL transparent.
// TODO: populate
//
using pi_mem_properties = pi_bitfield;
constexpr pi_mem_properties PI_MEM_PROPERTIES_CHANNEL = CL_MEM_CHANNEL_INTEL;

// NOTE: queue properties are implemented this way to better support bit
// manipulations
Expand Down
23 changes: 12 additions & 11 deletions sycl/include/CL/sycl/detail/property_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,23 @@ namespace detail {
// List of all dataless properties' IDs
enum DataLessPropKind {
BufferUseHostPtr = 0,
ImageUseHostPtr,
QueueEnableProfiling,
InOrder,
NoInit,
BufferUsePinnedHostMemory,
UsePrimaryContext,
DataLessPropKindSize
ImageUseHostPtr = 1,
QueueEnableProfiling = 2,
InOrder = 3,
NoInit = 4,
BufferUsePinnedHostMemory = 5,
UsePrimaryContext = 6,
DataLessPropKindSize = 7
};

// List of all properties with data IDs
enum PropWithDataKind {
BufferUseMutex = 0,
BufferContextBound,
ImageUseMutex,
ImageContextBound,
PropWithDataKindSize
BufferContextBound = 1,
ImageUseMutex = 2,
ImageContextBound = 3,
BufferMemChannel = 4,
PropWithDataKindSize = 5
};

// Base class for dataless properties, needed to check that the type of an
Expand Down
1 change: 1 addition & 0 deletions sycl/include/CL/sycl/info/device_traits.def
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,4 @@ __SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_slices, pi_uint32)
__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_subslices_per_slice, pi_uint32)
__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_eu_count_per_subslice, pi_uint32)
__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_max_mem_bandwidth, pi_uint64)
__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_mem_channel, bool)
4 changes: 3 additions & 1 deletion sycl/include/CL/sycl/info/info_desc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ enum class device : cl_device_info {
usm_shared_allocations = PI_USM_SINGLE_SHARED_SUPPORT,
usm_restricted_shared_allocations = PI_USM_CROSS_SHARED_SUPPORT,
usm_system_allocator = PI_USM_SYSTEM_SHARED_SUPPORT,

// intel extensions
ext_intel_pci_address = PI_DEVICE_INFO_PCI_ADDRESS,
ext_intel_gpu_eu_count = PI_DEVICE_INFO_GPU_EU_COUNT,
Expand All @@ -139,7 +140,8 @@ enum class device : cl_device_info {
ext_intel_gpu_subslices_per_slice = PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE,
ext_intel_gpu_eu_count_per_subslice =
PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE,
ext_intel_max_mem_bandwidth = PI_DEVICE_INFO_MAX_MEM_BANDWIDTH
ext_intel_max_mem_bandwidth = PI_DEVICE_INFO_MAX_MEM_BANDWIDTH,
ext_intel_mem_channel = PI_MEM_PROPERTIES_CHANNEL
};

enum class device_type : pi_uint64 {
Expand Down
11 changes: 11 additions & 0 deletions sycl/include/CL/sycl/properties/buffer_properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,17 @@ class context_bound
private:
sycl::context MCtx;
};

class mem_channel : public detail::PropertyWithData<
detail::PropWithDataKind::BufferMemChannel> {
public:
mem_channel(uint32_t Channel) : MChannel(Channel) {}
uint32_t get_channel() const { return MChannel; }

private:
uint32_t MChannel;
};

} // namespace buffer
} // namespace property

Expand Down
27 changes: 16 additions & 11 deletions sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
#include <CL/sycl/detail/cl.h>
#include <CL/sycl/detail/pi.h>

#include <algorithm>
#include <cassert>
#include <cstring>
#include <iostream>
#include <limits>
#include <map>
#include <sstream>
#include <string>
#include <vector>

Expand Down Expand Up @@ -546,22 +548,25 @@ pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size,
void *host_ptr, pi_mem *ret_mem,
const pi_mem_properties *properties) {
pi_result ret_err = PI_INVALID_OPERATION;
clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr;

if (properties)
if (properties) {
// TODO: need to check if all properties are supported by OpenCL RT and
// ignore unsupported
clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr;
// First we need to look up the function pointer
ret_err = getExtFuncFromContext<clCreateBufferWithPropertiesName,
clCreateBufferWithPropertiesINTEL_fn>(
context, &FuncPtr);
if (FuncPtr) {
*ret_mem = cast<pi_mem>(FuncPtr(cast<cl_context>(context), properties,
cast<cl_mem_flags>(flags), size, host_ptr,
cast<cl_int *>(&ret_err)));
return ret_err;
}
}

if (FuncPtr)
*ret_mem = cast<pi_mem>(FuncPtr(cast<cl_context>(context), properties,
cast<cl_mem_flags>(flags), size, host_ptr,
cast<cl_int *>(&ret_err)));
else
*ret_mem = cast<pi_mem>(clCreateBuffer(cast<cl_context>(context),
cast<cl_mem_flags>(flags), size,
host_ptr, cast<cl_int *>(&ret_err)));
*ret_mem = cast<pi_mem>(clCreateBuffer(cast<cl_context>(context),
cast<cl_mem_flags>(flags), size,
host_ptr, cast<cl_int *>(&ret_err)));
return ret_err;
}

Expand Down
16 changes: 16 additions & 0 deletions sycl/source/detail/device_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,11 @@ inline bool get_device_info_host<info::device::usm_system_allocator>() {
return true;
}

template <>
inline bool get_device_info_host<info::device::ext_intel_mem_channel>() {
return false;
}

cl_uint get_native_vector_width(size_t idx);

// USM
Expand Down Expand Up @@ -1003,6 +1008,17 @@ template <> struct get_device_info<bool, info::device::usm_system_allocator> {
}
};

// Specialization for memory channel query
template <> struct get_device_info<bool, info::device::ext_intel_mem_channel> {
static bool get(RT::PiDevice dev, const plugin &Plugin) {
pi_mem_properties caps;
pi_result Err = Plugin.call_nocheck<PiApiKind::piDeviceGetInfo>(
dev, pi::cast<RT::PiDeviceInfo>(info::device::ext_intel_mem_channel),
sizeof(pi_mem_properties), &caps, nullptr);
return (Err != PI_SUCCESS) ? false : (caps & PI_MEM_PROPERTIES_CHANNEL);
}
};

// Specializations for intel extensions for Level Zero low-level
// detail device descriptors (not support on host).
template <>
Expand Down
1 change: 1 addition & 0 deletions sycl/test/abi/sycl_symbols_linux.dump
Original file line number Diff line number Diff line change
Expand Up @@ -4036,6 +4036,7 @@ _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE4168EEENS3_12param_traitsIS4_XT_E
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE4169EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE4188EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE4189EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE16915EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65568EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65569EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65570EEENS3_12param_traitsIS4_XT_EE11return_typeEv
Expand Down
17 changes: 17 additions & 0 deletions sycl/test/basic_tests/property_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,23 @@ int main() {
}
}

{
cl::sycl::property_list MemChannelProp{
sycl_property::buffer::mem_channel(2)};
if (!MemChannelProp.has_property<sycl_property::buffer::mem_channel>()) {
std::cerr << "Error: property list has no property while should have."
<< std::endl;
Failed = true;
}
auto Prop =
MemChannelProp.get_property<sycl_property::buffer::mem_channel>();
if (Prop.get_channel() != 2) {
std::cerr << "Error: mem_channel property is not equal to 2."
<< std::endl;
Failed = true;
}
}

std::cerr << "Test status : " << (Failed ? "FAILED" : "PASSED") << std::endl;

return Failed;
Expand Down
19 changes: 19 additions & 0 deletions sycl/test/on-device/basic_tests/buffer/buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,25 @@ int main() {
assert(data1[i] == 0);
}

{
int data1[10] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
{
buffer<int, 1> b(data1, range<1>(10), {property::buffer::mem_channel{3}});
queue myQueue;
myQueue.submit([&](handler &cgh) {
auto B = b.get_access<access::mode::read_write>(cgh);
cgh.parallel_for<class init_a_2>(range<1>{10},
[=](id<1> index) { B[index] = 0; });
});
assert(b.has_property<property::buffer::mem_channel>());
auto prop = b.get_property<property::buffer::mem_channel>();
assert(prop.get_channel() == 3 && "oops it's not 3");

} // Data is copied back because there is a user side shared_ptr
for (int i = 0; i < 10; i++)
assert(data1[i] == 0);
}

{
std::vector<int> data1(10, -1);
{
Expand Down