Skip to content

Commit

Permalink
[SYCL] atomic_memory_order_capabilities query for device and context (#…
Browse files Browse the repository at this point in the history
…8517)

This patch implements the `atomic_memory_order_capabilities` query in
the OpenCL and Level Zero backends/plugins for `device` and `context`

Specifically: 
- OpenCL <2.0 returns the minimum required capability set (`relaxed`)
defined in [Section 4.2 of the OpenCL 3.0
specification](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES).
- OpenCL <3.0 and Level Zero backends return all memory order
capabilities.
- OpenCL >=3.0 queries the actual device to get the supported memory
order capabilities.

E2E test have also been updated to reflect these changes:
intel/llvm-test-suite#1627
  • Loading branch information
Andrew Lamzed-Short authored Mar 23, 2023
1 parent 7663dc2 commit b18e6ea
Show file tree
Hide file tree
Showing 8 changed files with 166 additions and 22 deletions.
8 changes: 8 additions & 0 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <set>
#include <sstream>
#include <string>
#include <sycl/detail/pi.h>
#include <sycl/detail/spinlock.hpp>
#include <thread>
#include <utility>
Expand Down Expand Up @@ -2308,6 +2309,13 @@ pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName,
case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT:
// 2D USM fill and memset is not supported.
return ReturnValue(pi_bool{false});
case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
pi_memory_order_capabilities capabilities =
PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL |
PI_MEMORY_ORDER_SEQ_CST;
return ReturnValue(capabilities);
}
case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
default:
// TODO: implement other parameters
Expand Down
59 changes: 58 additions & 1 deletion sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,65 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName,
// For details about Intel UUID extension, see
// sycl/doc/extensions/supported/sycl_ext_intel_device_info.md
case PI_DEVICE_INFO_UUID:
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES:
return PI_ERROR_INVALID_VALUE;
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
// This query is missing beore OpenCL 3.0
// Check version and handle appropriately
OCLV::OpenCLVersion devVer;
cl_device_id deviceID = cast<cl_device_id>(device);
cl_int ret_err = getDeviceVersion(deviceID, devVer);
if (ret_err != CL_SUCCESS) {
return cast<pi_result>(ret_err);
}

// Minimum required capability to be returned
// For OpenCL 1.2, this is all that is required
pi_memory_order_capabilities capabilities = PI_MEMORY_ORDER_RELAXED;

if (devVer >= OCLV::V3_0) {
// For OpenCL >=3.0, the query should be implemented
cl_device_atomic_capabilities cl_capabilities = 0;
cl_int ret_err = clGetDeviceInfo(
deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr);
if (ret_err != CL_SUCCESS)
return cast<pi_result>(ret_err);

// Mask operation to only consider atomic_memory_order* capabilities
cl_int mask = CL_DEVICE_ATOMIC_ORDER_RELAXED |
CL_DEVICE_ATOMIC_ORDER_ACQ_REL |
CL_DEVICE_ATOMIC_ORDER_SEQ_CST;
cl_capabilities &= mask;

// The memory order capabilities are hierarchical, if one is implied, all
// preceding capbilities are implied as well. Especially in the case of
// ACQ_REL.
if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) {
capabilities |= PI_MEMORY_ORDER_SEQ_CST;
}
if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) {
capabilities |= PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_ACQUIRE |
PI_MEMORY_ORDER_RELEASE;
}
} else if (devVer >= OCLV::V2_0) {
// For OpenCL 2.x, return all capabilities
// (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_memory_consistency_model)
capabilities |= PI_MEMORY_ORDER_ACQUIRE | PI_MEMORY_ORDER_RELEASE |
PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_SEQ_CST;
}

if (paramValue) {
if (paramValueSize < sizeof(pi_memory_order_capabilities))
return static_cast<pi_result>(CL_INVALID_VALUE);

std::memcpy(paramValue, &capabilities, sizeof(capabilities));
}

if (paramValueSizeRet)
*paramValueSizeRet = sizeof(capabilities);

return static_cast<pi_result>(CL_SUCCESS);
}
case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
// Initialize result to minimum mandated capabilities according to
// SYCL2020 4.6.3.2
Expand Down
2 changes: 2 additions & 0 deletions sycl/plugins/unified_runtime/pi2ur.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,8 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
(ur_device_info_t)UR_DEVICE_INFO_BFLOAT16},
{PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES,
(ur_device_info_t)UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES},
{PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES,
(ur_device_info_t)UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES},
};

auto InfoType = InfoMapping.find(ParamName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1186,6 +1186,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
return ReturnValue(result);
}

case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
ur_memory_order_capability_flags_t capabilities =
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED |
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE |
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE |
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL |
UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST;
return ReturnValue(capabilities);
}

// TODO: Implement.
default:
zePrint("Unsupported ParamName in piGetDeviceInfo\n");
zePrint("ParamName=%d(0x%x)\n", ParamName, ParamName);
Expand Down Expand Up @@ -1716,7 +1727,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition(
// Currently supported partitioning (by affinity domain/numa) would always
// partition to all sub-devices.
//
if (NumDevices !=0)
if (NumDevices != 0)
PI_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE);

for (uint32_t I = 0; I < NumDevices; I++) {
Expand Down
29 changes: 20 additions & 9 deletions sycl/source/detail/context_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
#include <sycl/property_list.hpp>
#include <sycl/stl.hpp>

#include <algorithm>

namespace sycl {
__SYCL_INLINE_VER_NAMESPACE(_V1) {
namespace detail {
Expand Down Expand Up @@ -166,17 +168,26 @@ template <>
std::vector<sycl::memory_order>
context_impl::get_info<info::context::atomic_memory_order_capabilities>()
const {
std::vector<sycl::memory_order> CapabilityList{
sycl::memory_order::relaxed, sycl::memory_order::acquire,
sycl::memory_order::release, sycl::memory_order::acq_rel,
sycl::memory_order::seq_cst};
if (is_host())
return {sycl::memory_order::relaxed, sycl::memory_order::acquire,
sycl::memory_order::release, sycl::memory_order::acq_rel,
sycl::memory_order::seq_cst};
return CapabilityList;

for (const sycl::device &Device : MDevices) {
std::vector<sycl::memory_order> NewCapabilityList(CapabilityList.size());
std::vector<sycl::memory_order> DeviceCapabilities =
Device.get_info<info::device::atomic_memory_order_capabilities>();
std::set_intersection(
CapabilityList.begin(), CapabilityList.end(),
DeviceCapabilities.begin(), DeviceCapabilities.end(),
std::inserter(NewCapabilityList, NewCapabilityList.begin()));
CapabilityList = NewCapabilityList;
}
CapabilityList.shrink_to_fit();

pi_memory_order_capabilities Result;
getPlugin().call<PiApiKind::piContextGetInfo>(
MContext,
PiInfoCode<info::context::atomic_memory_order_capabilities>::value,
sizeof(Result), &Result, nullptr);
return readMemoryOrderBitfield(Result);
return CapabilityList;
}
template <>
std::vector<sycl::memory_scope>
Expand Down
11 changes: 0 additions & 11 deletions sycl/source/detail/context_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,6 @@ typename Param::return_type get_context_info(RT::PiContext Ctx,
return Result;
}

// Specialization for atomic_memory_order_capabilities, PI returns a bitfield
template <>
std::vector<sycl::memory_order>
get_context_info<info::context::atomic_memory_order_capabilities>(
RT::PiContext Ctx, const plugin &Plugin) {
pi_memory_order_capabilities Result;
Plugin.call<PiApiKind::piContextGetInfo>(
Ctx, PiInfoCode<info::context::atomic_memory_order_capabilities>::value,
sizeof(Result), &Result, nullptr);
return readMemoryOrderBitfield(Result);
}
} // namespace detail
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
65 changes: 65 additions & 0 deletions sycl/unittests/SYCL2020/AtomicMemoryOrderCapabilities.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
//==---- AtomicMemoryOrderCapabilities.cpp --- memory order query test -----==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <algorithm>
#include <gtest/gtest.h>
#include <helpers/PiMock.hpp>
#include <sycl/sycl.hpp>

using namespace sycl;

namespace {

static constexpr size_t expectedCapabilityVecSize = 5;
static thread_local bool deviceGetInfoCalled = false;

static bool has_capability(const std::vector<memory_order> &deviceCapabilities,
memory_order capabilityToFind) {
return std::find(deviceCapabilities.begin(), deviceCapabilities.end(),
capabilityToFind) != deviceCapabilities.end();
}

pi_result redefinedDeviceGetInfo(pi_device device, pi_device_info param_name,
size_t param_value_size, void *param_value,
size_t *param_value_size_ret) {
if (param_name == PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) {
deviceGetInfoCalled = true;
if (param_value) {
pi_memory_order_capabilities *Capabilities =
reinterpret_cast<pi_memory_order_capabilities *>(param_value);
*Capabilities = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL |
PI_MEMORY_ORDER_SEQ_CST;
}
}
return PI_SUCCESS;
}

TEST(AtomicMemoryOrderCapabilities, DeviceQueryReturnsCorrectCapabilities) {
unittest::PiMock Mock;
platform Plt = Mock.getPlatform();

Mock.redefineAfter<detail::PiApiKind::piDeviceGetInfo>(
redefinedDeviceGetInfo);

const device Dev = Plt.get_devices()[0];
context Ctx{Dev};

auto Capabilities =
Dev.get_info<info::device::atomic_memory_order_capabilities>();
EXPECT_TRUE(deviceGetInfoCalled);
EXPECT_EQ(Capabilities.size(), expectedCapabilityVecSize);

EXPECT_TRUE(has_capability(Capabilities, memory_order::relaxed));
EXPECT_TRUE(has_capability(Capabilities, memory_order::acquire));
EXPECT_TRUE(has_capability(Capabilities, memory_order::release));
EXPECT_TRUE(has_capability(Capabilities, memory_order::acq_rel));
EXPECT_TRUE(has_capability(Capabilities, memory_order::seq_cst));
}

} // namespace
1 change: 1 addition & 0 deletions sycl/unittests/SYCL2020/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ add_sycl_unittest(SYCL2020Tests OBJECT
IsCompatible.cpp
DeviceGetInfoAspects.cpp
DeviceAspectTraits.cpp
AtomicMemoryOrderCapabilities.cpp
AtomicMemoryScopeCapabilities.cpp
)

0 comments on commit b18e6ea

Please sign in to comment.