diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 9ae07e72c4d5e..cd179afc4dc3a 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -92,11 +92,13 @@ // 12.30 Added PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT device info query. // 12.31 Added PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP device // info query. -// 12.32 Removed backwards compatibility of piextQueueCreateWithNativeHandle and +// 13.32 Removed backwards compatibility of piextQueueCreateWithNativeHandle and // piextQueueGetNativeHandle +// 14.33 Added new parameter (memory object properties) to +// piextKernelSetArgMemObj -#define _PI_H_VERSION_MAJOR 13 -#define _PI_H_VERSION_MINOR 32 +#define _PI_H_VERSION_MAJOR 14 +#define _PI_H_VERSION_MINOR 33 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -1709,13 +1711,38 @@ __SYCL_EXPORT pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, const pi_event *event_wait_list, pi_event *event); +#ifndef PI_BIT +#define PI_BIT(_i) (1 << _i) +#endif // PI_BIT + +typedef enum { + PI_ACCESS_READ_WRITE = PI_BIT(0), + PI_ACCESS_WRITE_ONLY = PI_BIT(1), + PI_ACCESS_READ_ONLY = PI_BIT(2) +} _pi_mem_obj_access; +using pi_mem_obj_access = _pi_mem_obj_access; +typedef uint32_t pi_mem_access_flag; + +typedef enum { + PI_KERNEL_ARG_MEM_OBJ_ACCESS = 27, + PI_ENUM_FORCE_UINT32 = 0x7fffffff +} _pi_mem_obj_property_type; +using pi_mem_obj_property_type = _pi_mem_obj_property_type; + +typedef struct { + pi_mem_obj_property_type type; + void *pNext; + pi_mem_access_flag mem_access; +} _pi_mem_obj_property; +using pi_mem_obj_property = _pi_mem_obj_property; + // Extension to allow backends to process a PI memory object before adding it // as an argument for a kernel. // Note: This is needed by the CUDA backend to extract the device pointer to // the memory as the kernels uses it rather than the PI object itself. -__SYCL_EXPORT pi_result piextKernelSetArgMemObj(pi_kernel kernel, - pi_uint32 arg_index, - const pi_mem *arg_value); +__SYCL_EXPORT pi_result piextKernelSetArgMemObj( + pi_kernel kernel, pi_uint32 arg_index, + const pi_mem_obj_property *arg_properties, const pi_mem *arg_value); // Extension to allow backends to process a PI sampler object before adding it // as an argument for a kernel. diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 574852b103ae0..1248f0ac3f402 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -1409,7 +1409,8 @@ pi_result piKernelSetArg(pi_kernel, pi_uint32, size_t, const void *) { DIE_NO_IMPLEMENTATION; } -pi_result piextKernelSetArgMemObj(pi_kernel, pi_uint32, const pi_mem *) { +pi_result piextKernelSetArgMemObj(pi_kernel, pi_uint32, + const pi_mem_obj_property *, const pi_mem *) { DIE_NO_IMPLEMENTATION; } diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 5ad0279b217f6..ded91d264f46b 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -2007,9 +2007,10 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, sycl::detail::pi::assertion( hipDeviceGetPCIBusId(AddressBuffer, AddressBufferSize, device->get()) == hipSuccess); - // A typical PCI address is 12 bytes + \0: "1234:67:90.2", but the HIP API is not - // guaranteed to use this format. In practice, it uses this format, at least - // in 5.3-5.5. To be on the safe side, we make sure the terminating \0 is set. + // A typical PCI address is 12 bytes + \0: "1234:67:90.2", but the HIP API + // is not guaranteed to use this format. In practice, it uses this format, + // at least in 5.3-5.5. To be on the safe side, we make sure the terminating + // \0 is set. AddressBuffer[AddressBufferSize - 1] = '\0'; sycl::detail::pi::assertion(strnlen(AddressBuffer, AddressBufferSize) > 0); return getInfoArray(strnlen(AddressBuffer, AddressBufferSize - 1) + 1, @@ -2961,7 +2962,9 @@ pi_result hip_piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index, } pi_result hip_piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, + const pi_mem_obj_property *arg_properties, const pi_mem *arg_value) { + std::ignore = arg_properties; assert(kernel != nullptr); assert(arg_value != nullptr); diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index e7fd7128c319e..7df0ff7d21eab 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -370,9 +370,10 @@ pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, size_t ArgSize, // Special version of piKernelSetArg to accept pi_mem. pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex, + const pi_mem_obj_property *ArgProperties, const pi_mem *ArgValue) { - - return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgValue); + return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgProperties, + ArgValue); } // Special version of piKernelSetArg to accept pi_sampler. diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 13193318f7ef2..9f04858b472e4 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1116,7 +1116,9 @@ pi_result piSamplerCreate(pi_context context, } pi_result piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, + const pi_mem_obj_property *arg_properties, const pi_mem *arg_value) { + std::ignore = arg_properties; return cast( clSetKernelArg(cast(kernel), cast(arg_index), sizeof(arg_value), cast(arg_value))); diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 346fa1dcf9923..077f5cb2eb155 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -2003,8 +2003,10 @@ inline pi_result piextGetDeviceFunctionPointer(pi_device Device, } // Special version of piKernelSetArg to accept pi_mem. -inline pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex, - const pi_mem *ArgValue) { +inline pi_result +piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex, + const pi_mem_obj_property *ArgProperties, + const pi_mem *ArgValue) { // TODO: the better way would probably be to add a new PI API for // extracting native PI object from PI handle, and have SYCL @@ -2017,21 +2019,43 @@ inline pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex, if (ArgValue) UrMemory = reinterpret_cast(*ArgValue); - ur_kernel_arg_mem_obj_properties_t Properties{}; - // We don't yet know the device where this kernel will next be run on. // Thus we can't know the actual memory allocation that needs to be used. // Remember the memory object being used as an argument for this kernel // to process it later when the device is known (at the kernel enqueue). // - // TODO: for now we have to conservatively assume the access as read-write. - // Improve that by passing SYCL buffer accessor type into - // piextKernelSetArgMemObj. - // - ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - HANDLE_ERRORS( - urKernelSetArgMemObj(UrKernel, ArgIndex, &Properties, UrMemory)); + // the only applicable type, just ignore anything else + if (ArgProperties && ArgProperties->type == PI_KERNEL_ARG_MEM_OBJ_ACCESS) { + // following structure layout checks to be replaced with + // std::is_layout_compatible after move to C++20 + static_assert(sizeof(pi_mem_obj_property) == + sizeof(ur_kernel_arg_mem_obj_properties_t)); + static_assert(sizeof(pi_mem_obj_property::type) == + sizeof(ur_kernel_arg_mem_obj_properties_t::stype)); + static_assert(sizeof(pi_mem_obj_property::pNext) == + sizeof(ur_kernel_arg_mem_obj_properties_t::pNext)); + static_assert(sizeof(pi_mem_obj_property::mem_access) == + sizeof(ur_kernel_arg_mem_obj_properties_t::memoryAccess)); + + static_assert(uint32_t(PI_ACCESS_READ_WRITE) == + uint32_t(UR_MEM_FLAG_READ_WRITE)); + static_assert(uint32_t(PI_ACCESS_READ_ONLY) == + uint32_t(UR_MEM_FLAG_READ_ONLY)); + static_assert(uint32_t(PI_ACCESS_WRITE_ONLY) == + uint32_t(UR_MEM_FLAG_WRITE_ONLY)); + static_assert(uint32_t(PI_KERNEL_ARG_MEM_OBJ_ACCESS) == + uint32_t(UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES)); + + const ur_kernel_arg_mem_obj_properties_t *UrMemProperties = + reinterpret_cast( + ArgProperties); + HANDLE_ERRORS( + urKernelSetArgMemObj(UrKernel, ArgIndex, UrMemProperties, UrMemory)); + } else { + HANDLE_ERRORS(urKernelSetArgMemObj(UrKernel, ArgIndex, nullptr, UrMemory)); + } + return PI_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp index 20fe7384a9c63..d89be52061a0d 100644 --- a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp +++ b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp @@ -150,11 +150,12 @@ __SYCL_EXPORT pi_result piKernelCreate(pi_program Program, } // Special version of piKernelSetArg to accept pi_mem. -__SYCL_EXPORT pi_result piextKernelSetArgMemObj(pi_kernel Kernel, - pi_uint32 ArgIndex, - const pi_mem *ArgValue) { +__SYCL_EXPORT pi_result piextKernelSetArgMemObj( + pi_kernel Kernel, pi_uint32 ArgIndex, + const pi_mem_obj_property *ArgProperties, const pi_mem *ArgValue) { - return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgValue); + return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgProperties, + ArgValue); } __SYCL_EXPORT pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_kernel.cpp index 04bef4242b1b7..367008fcc6f23 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_kernel.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_kernel.cpp @@ -699,9 +699,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( ur_mem_handle_t_ *UrMem = ur_cast(ArgValue); + ur_mem_handle_t_::access_mode_t UrAccessMode = ur_mem_handle_t_::read_write; + if (Properties) { + switch (Properties->memoryAccess) { + case UR_MEM_FLAG_READ_WRITE: + UrAccessMode = ur_mem_handle_t_::read_write; + break; + case UR_MEM_FLAG_WRITE_ONLY: + UrAccessMode = ur_mem_handle_t_::write_only; + break; + case UR_MEM_FLAG_READ_ONLY: + UrAccessMode = ur_mem_handle_t_::read_only; + break; + default: + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } + } auto Arg = UrMem ? UrMem : nullptr; Kernel->PendingArguments.push_back( - {ArgIndex, sizeof(void *), Arg, ur_mem_handle_t_::read_write}); + {ArgIndex, sizeof(void *), Arg, UrAccessMode}); return UR_RESULT_SUCCESS; } diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index dfc9e136df8e3..f4cc72e3310d5 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2178,6 +2178,18 @@ static void ReverseRangeDimensionsForKernel(NDRDescT &NDR) { } } +pi_mem_obj_access AccessModeToPi(access::mode AccessorMode) { + switch (AccessorMode) { + case access::mode::read: + return PI_ACCESS_READ_ONLY; + case access::mode::write: + case access::mode::discard_write: + return PI_ACCESS_WRITE_ONLY; + default: + return PI_ACCESS_READ_WRITE; + } +} + static pi_result SetKernelParamsAndLaunch( const QueueImplPtr &Queue, std::vector &Args, const std::shared_ptr &DeviceImageImpl, @@ -2212,8 +2224,11 @@ static pi_result SetKernelParamsAndLaunch( Plugin->call( Kernel, NextTrueIndex, sizeof(sycl::detail::pi::PiMem), &MemArg); } else { + pi_mem_obj_property MemObjData{}; + MemObjData.mem_access = AccessModeToPi(Req->MAccessMode); + MemObjData.type = PI_KERNEL_ARG_MEM_OBJ_ACCESS; Plugin->call(Kernel, NextTrueIndex, - &MemArg); + &MemObjData, &MemArg); } break; } @@ -2250,8 +2265,12 @@ static pi_result SetKernelParamsAndLaunch( // Avoid taking an address of nullptr sycl::detail::pi::PiMem *SpecConstsBufferArg = SpecConstsBuffer ? &SpecConstsBuffer : nullptr; - Plugin->call(Kernel, NextTrueIndex, - SpecConstsBufferArg); + + pi_mem_obj_property MemObjData{}; + MemObjData.mem_access = PI_ACCESS_READ_ONLY; + MemObjData.type = PI_KERNEL_ARG_MEM_OBJ_ACCESS; + Plugin->call( + Kernel, NextTrueIndex, &MemObjData, SpecConstsBufferArg); break; } case kernel_param_kind_t::kind_invalid: diff --git a/sycl/unittests/buffer/CMakeLists.txt b/sycl/unittests/buffer/CMakeLists.txt index 2d1585458a6fb..9d01b5750ff07 100644 --- a/sycl/unittests/buffer/CMakeLists.txt +++ b/sycl/unittests/buffer/CMakeLists.txt @@ -3,4 +3,5 @@ add_sycl_unittest(BufferTests OBJECT Image.cpp BufferDestructionCheck.cpp MemChannel.cpp + KernelArgMemObj.cpp ) diff --git a/sycl/unittests/buffer/KernelArgMemObj.cpp b/sycl/unittests/buffer/KernelArgMemObj.cpp new file mode 100644 index 0000000000000..811995bc92e73 --- /dev/null +++ b/sycl/unittests/buffer/KernelArgMemObj.cpp @@ -0,0 +1,129 @@ +//==----------- KernelArgMemObj.cpp ---- Scheduler unit tests ---------- ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include +#include +#include + +class TestKernelWithMemObj; + +namespace sycl { +__SYCL_INLINE_VER_NAMESPACE(_V1) { +namespace detail { +template <> +struct KernelInfo : public unittest::MockKernelInfoBase { + static constexpr const char *getName() { return "TestKernelWithMemObj"; } + static constexpr unsigned getNumParams() { return 1; } + static const detail::kernel_param_desc_t &getParamDesc(int) { + static detail::kernel_param_desc_t desc{ + detail::kernel_param_kind_t::kind_accessor, + int(access::target::device) /*info*/, 0 /*offset*/}; + return desc; + } + static constexpr uint32_t getKernelSize() { return 32; } +}; +} // namespace detail +} // __SYCL_INLINE_VER_NAMESPACE(_V1) +} // namespace sycl + +static sycl::unittest::PiImage generateImage() { + using namespace sycl::unittest; + + PiPropertySet PropSet; + + std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data + + PiArray Entries = makeEmptyKernels({"TestKernelWithMemObj"}); + + PiImage Img{PI_DEVICE_BINARY_TYPE_SPIRV, // Format + __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec + "", // Compile options + "", // Link options + std::move(Bin), + std::move(Entries), + std::move(PropSet)}; + + return Img; +} + +static sycl::unittest::PiImage Img = generateImage(); +static sycl::unittest::PiImageArray<1> ImgArray{&Img}; + +using namespace sycl; + +bool PropertyPresent = false; +pi_mem_obj_property PropsCopy{}; + +pi_result redefinedKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, + const pi_mem_obj_property *arg_properties, + const pi_mem *arg_value) { + PropertyPresent = arg_properties != nullptr; + if (PropertyPresent) + PropsCopy = *arg_properties; + return PI_SUCCESS; +} + +class BufferTestPiArgs : public ::testing::Test { +public: + BufferTestPiArgs() + : Mock(sycl::backend::ext_oneapi_level_zero), Plt{Mock.getPlatform()} {} + +protected: + void SetUp() override { + PropertyPresent = false; + PropsCopy = {}; + Mock.redefineBefore( + redefinedKernelSetArgMemObj); + } + + template + void TestFunc(pi_mem_obj_access ExpectedAccessMode) { + queue Queue(context(Plt), default_selector_v); + sycl::buffer Buf(3); + Queue + .submit([&](sycl::handler &cgh) { + auto acc = Buf.get_access(cgh); + cgh.single_task([=]() { + if constexpr (AccessMode != sycl::access::mode::read) + acc[0] = 4; + else + std::ignore = acc[0]; + }); + }) + .wait(); + ASSERT_TRUE(PropertyPresent); + EXPECT_EQ(PropsCopy.type, PI_KERNEL_ARG_MEM_OBJ_ACCESS); + EXPECT_EQ(PropsCopy.mem_access, ExpectedAccessMode); + } + +protected: + sycl::unittest::PiMock Mock; + sycl::platform Plt; +}; + +TEST_F(BufferTestPiArgs, KernelSetArgMemObjReadWrite) { + TestFunc(PI_ACCESS_READ_WRITE); +} + +TEST_F(BufferTestPiArgs, KernelSetArgMemObjDiscardReadWrite) { + TestFunc(PI_ACCESS_READ_WRITE); +} + +TEST_F(BufferTestPiArgs, KernelSetArgMemObjRead) { + TestFunc(PI_ACCESS_READ_ONLY); +} + +TEST_F(BufferTestPiArgs, KernelSetArgMemObjWrite) { + TestFunc(PI_ACCESS_WRITE_ONLY); +} + +TEST_F(BufferTestPiArgs, KernelSetArgMemObjDiscardWrite) { + TestFunc(PI_ACCESS_WRITE_ONLY); +} diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index b9f5fcc1778dc..73d0f03dca4fd 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -998,9 +998,10 @@ inline pi_result mock_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, return PI_SUCCESS; } -inline pi_result mock_piextKernelSetArgMemObj(pi_kernel kernel, - pi_uint32 arg_index, - const pi_mem *arg_value) { +inline pi_result +mock_piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, + const pi_mem_obj_property *arg_properties, + const pi_mem *arg_value) { return PI_SUCCESS; } diff --git a/sycl/unittests/pi/cuda/test_kernels.cpp b/sycl/unittests/pi/cuda/test_kernels.cpp index af681ae9bf74f..736e266b6566e 100644 --- a/sycl/unittests/pi/cuda/test_kernels.cpp +++ b/sycl/unittests/pi/cuda/test_kernels.cpp @@ -347,7 +347,7 @@ TEST_F(CudaKernelsTest, PIkerneldispatch) { PI_SUCCESS); ASSERT_EQ((plugin->call_nocheck( - kern, 0, &memObj)), + kern, 0, nullptr, &memObj)), PI_SUCCESS); size_t workDim = 1; @@ -397,11 +397,11 @@ TEST_F(CudaKernelsTest, PIkerneldispatchTwo) { PI_SUCCESS); ASSERT_EQ((plugin->call_nocheck( - kern, 0, &memObj)), + kern, 0, nullptr, &memObj)), PI_SUCCESS); ASSERT_EQ((plugin->call_nocheck( - kern, 1, &memObj2)), + kern, 1, nullptr, &memObj2)), PI_SUCCESS); size_t workDim = 1;