diff --git a/sycl/include/sycl/ext/intel/esimd.hpp b/sycl/include/sycl/ext/intel/esimd.hpp index 8bed5092f571e..07b5ace4a5ab8 100644 --- a/sycl/include/sycl/ext/intel/esimd.hpp +++ b/sycl/include/sycl/ext/intel/esimd.hpp @@ -81,8 +81,6 @@ #include #include -#include -#include #include #include #include diff --git a/sycl/include/sycl/ext/intel/esimd/common.hpp b/sycl/include/sycl/ext/intel/esimd/common.hpp index 3af8e419c9f3d..2f144bced5a03 100644 --- a/sycl/include/sycl/ext/intel/esimd/common.hpp +++ b/sycl/include/sycl/ext/intel/esimd/common.hpp @@ -10,6 +10,10 @@ #pragma once +#include +#include +#include + #include #include // for uint* types @@ -18,59 +22,13 @@ /// @cond ESIMD_DETAIL #ifdef __SYCL_DEVICE_ONLY__ -#define SYCL_ESIMD_KERNEL __attribute__((sycl_explicit_simd)) -#define SYCL_ESIMD_FUNCTION __attribute__((sycl_explicit_simd)) - -// Mark a function being nodebug. -#define ESIMD_NODEBUG __attribute__((nodebug)) -// Mark a "ESIMD global": accessible from all functions in current translation -// unit, separate copy per subgroup (work-item), mapped to SPIR-V private -// storage class. -#define ESIMD_PRIVATE \ - __attribute__((opencl_private)) __attribute__((sycl_explicit_simd)) -// Bind a ESIMD global variable to a specific register. -#define ESIMD_REGISTER(n) __attribute__((register_num(n))) - -#define __ESIMD_API ESIMD_NODEBUG ESIMD_INLINE - #define __ESIMD_UNSUPPORTED_ON_HOST - #else // __SYCL_DEVICE_ONLY__ -#define SYCL_ESIMD_KERNEL -#define SYCL_ESIMD_FUNCTION - -// TODO ESIMD define what this means on Windows host -#define ESIMD_NODEBUG -// On host device ESIMD global is a thread local static var. This assumes that -// each work-item is mapped to a separate OS thread on host device. -#define ESIMD_PRIVATE thread_local -#define ESIMD_REGISTER(n) - -#define __ESIMD_API ESIMD_INLINE - #define __ESIMD_UNSUPPORTED_ON_HOST \ throw sycl::exception(sycl::errc::feature_not_supported, \ "This ESIMD feature is not supported on HOST") - #endif // __SYCL_DEVICE_ONLY__ -// Mark a function being noinline -#define ESIMD_NOINLINE __attribute__((noinline)) -// Force a function to be inlined. 'inline' is used to preserve ODR for -// functions defined in a header. -#define ESIMD_INLINE inline __attribute__((always_inline)) - -// Macros for internal use -#define __ESIMD_NS sycl::ext::intel::esimd -#define __ESIMD_DNS sycl::ext::intel::esimd::detail -#define __ESIMD_EMU_DNS sycl::ext::intel::esimd::emu::detail - -#define __ESIMD_QUOTE1(m) #m -#define __ESIMD_QUOTE(m) __ESIMD_QUOTE1(m) -#define __ESIMD_NS_QUOTED __ESIMD_QUOTE(__ESIMD_NS) -#define __ESIMD_DEPRECATED(new_api) \ - __SYCL_DEPRECATED("use " __ESIMD_NS_QUOTED "::" __ESIMD_QUOTE(new_api)) - /// @endcond ESIMD_DETAIL namespace sycl { @@ -106,6 +64,19 @@ enum class rgba_channel : uint8_t { R, G, B, A }; using SurfaceIndex = unsigned int; namespace detail { + +/// Check if a given 32 bit positive integer is a power of 2 at compile time. +ESIMD_INLINE constexpr bool isPowerOf2(unsigned int n) { + return (n & (n - 1)) == 0; +} + +/// Check at compile time if given 32 bit positive integer is both: +/// - a power of 2 +/// - less or equal to given limit +ESIMD_INLINE constexpr bool isPowerOf2(unsigned int n, unsigned int limit) { + return (n & (n - 1)) == 0 && n <= limit; +} + template static inline constexpr uint8_t ch = 1 << static_cast(Ch); static inline constexpr uint8_t chR = ch; @@ -151,6 +122,10 @@ constexpr int get_num_channels_enabled(rgba_channel_mask M) { is_channel_enabled(M, rgba_channel::A); } +#define __ESIMD_USM_DWORD_ATOMIC_TO_LSC \ + " is supported only on ACM, PVC. USM-based atomic will be auto-converted " \ + "to LSC version." + /// Represents an atomic operation. Operations always return the old value(s) of /// the target memory location(s) as it was before the operation was applied. /// Each operation is annotated with a pseudocode illustrating its semantics, @@ -167,9 +142,11 @@ enum class atomic_op : uint8_t { /// Decrement: *addr = *addr - 1. dec = 0x3, /// Minimum: *addr = min(*addr, src0). - min = 0x4, + umin = 0x4, + min __SYCL_DEPRECATED("use umin") = umin, /// Maximum: *addr = max(*addr, src0). - max = 0x5, + umax = 0x5, + max __SYCL_DEPRECATED("use smax") = umax, /// Exchange. *addr == src0; xchg = 0x6, /// Compare and exchange. if (*addr == src0) *sddr = src1; @@ -181,18 +158,21 @@ enum class atomic_op : uint8_t { /// Bit \c xor: *addr = *addr | src0. bit_xor = 0xa, /// Minimum (signed integer): *addr = min(*addr, src0). - minsint = 0xb, + smin = 0xb, + minsint __SYCL_DEPRECATED("use smin") = smin, /// Maximum (signed integer): *addr = max(*addr, src0). - maxsint = 0xc, + smax = 0xc, + maxsint __SYCL_DEPRECATED("use smax") = 0xc, /// Minimum (floating point): *addr = min(*addr, src0). - fmax = 0x10, + fmax __SYCL_DEPRECATED("fmax" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x10, /// Maximum (floating point): *addr = max(*addr, src0). - fmin = 0x11, + fmin __SYCL_DEPRECATED("fmin" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x11, /// Compare and exchange (floating point). /// if (*addr == src0) *addr = src1; - fcmpwr = 0x12, - fadd = 0x13, - fsub = 0x14, + fcmpxchg = 0x12, + fcmpwr __SYCL_DEPRECATED("fcmpwr" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = fcmpxchg, + fadd __SYCL_DEPRECATED("fadd" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x13, + fsub __SYCL_DEPRECATED("fsub" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x14, load = 0x15, store = 0x16, /// Decrement: *addr = *addr - 1. The only operation which @@ -200,8 +180,155 @@ enum class atomic_op : uint8_t { predec = 0xff, }; +#undef __ESIMD_USM_DWORD_TO_LSC_MSG + /// @} sycl_esimd_core +namespace detail { +template <__ESIMD_NS::native::lsc::atomic_op Op> constexpr int get_num_args() { + if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::inc || + Op == __ESIMD_NS::native::lsc::atomic_op::dec || + Op == __ESIMD_NS::native::lsc::atomic_op::load) { + return 0; + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::store || + Op == __ESIMD_NS::native::lsc::atomic_op::add || + Op == __ESIMD_NS::native::lsc::atomic_op::sub || + Op == __ESIMD_NS::native::lsc::atomic_op::smin || + Op == __ESIMD_NS::native::lsc::atomic_op::smax || + Op == __ESIMD_NS::native::lsc::atomic_op::umin || + Op == __ESIMD_NS::native::lsc::atomic_op::umax || + Op == __ESIMD_NS::native::lsc::atomic_op::fadd || + Op == __ESIMD_NS::native::lsc::atomic_op::fsub || + Op == __ESIMD_NS::native::lsc::atomic_op::fmin || + Op == __ESIMD_NS::native::lsc::atomic_op::fmax || + Op == __ESIMD_NS::native::lsc::atomic_op::bit_and || + Op == __ESIMD_NS::native::lsc::atomic_op::bit_or || + Op == __ESIMD_NS::native::lsc::atomic_op::bit_xor) { + return 1; + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::cmpxchg || + Op == __ESIMD_NS::native::lsc::atomic_op::fcmpxchg) { + return 2; + } else { + return -1; // error + } +} + +template <__ESIMD_NS::atomic_op Op> constexpr bool has_lsc_equivalent() { + switch (Op) { + case __ESIMD_NS::atomic_op::xchg: + case __ESIMD_NS::atomic_op::predec: + return false; + default: + return true; + } +} + +template <__ESIMD_NS::atomic_op Op> +constexpr __ESIMD_NS::native::lsc::atomic_op to_lsc_atomic_op() { + switch (Op) { + case __ESIMD_NS::atomic_op::add: + return __ESIMD_NS::native::lsc::atomic_op::add; + case __ESIMD_NS::atomic_op::sub: + return __ESIMD_NS::native::lsc::atomic_op::sub; + case __ESIMD_NS::atomic_op::inc: + return __ESIMD_NS::native::lsc::atomic_op::inc; + case __ESIMD_NS::atomic_op::dec: + return __ESIMD_NS::native::lsc::atomic_op::dec; + case __ESIMD_NS::atomic_op::min: + return __ESIMD_NS::native::lsc::atomic_op::umin; + case __ESIMD_NS::atomic_op::max: + return __ESIMD_NS::native::lsc::atomic_op::umax; + case __ESIMD_NS::atomic_op::cmpxchg: + return __ESIMD_NS::native::lsc::atomic_op::cmpxchg; + case __ESIMD_NS::atomic_op::bit_and: + return __ESIMD_NS::native::lsc::atomic_op::bit_and; + case __ESIMD_NS::atomic_op::bit_or: + return __ESIMD_NS::native::lsc::atomic_op::bit_or; + case __ESIMD_NS::atomic_op::bit_xor: + return __ESIMD_NS::native::lsc::atomic_op::bit_xor; + case __ESIMD_NS::atomic_op::minsint: + return __ESIMD_NS::native::lsc::atomic_op::smin; + case __ESIMD_NS::atomic_op::maxsint: + return __ESIMD_NS::native::lsc::atomic_op::smax; + case __ESIMD_NS::atomic_op::fmax: + return __ESIMD_NS::native::lsc::atomic_op::fmax; + case __ESIMD_NS::atomic_op::fmin: + return __ESIMD_NS::native::lsc::atomic_op::fmin; + case __ESIMD_NS::atomic_op::fcmpwr: + return __ESIMD_NS::native::lsc::atomic_op::fcmpxchg; + case __ESIMD_NS::atomic_op::fadd: + return __ESIMD_NS::native::lsc::atomic_op::fadd; + case __ESIMD_NS::atomic_op::fsub: + return __ESIMD_NS::native::lsc::atomic_op::fsub; + case __ESIMD_NS::atomic_op::load: + return __ESIMD_NS::native::lsc::atomic_op::load; + case __ESIMD_NS::atomic_op::store: + return __ESIMD_NS::native::lsc::atomic_op::store; + default: + static_assert(has_lsc_equivalent() && "Unsupported LSC atomic op"); + } +} + +template <__ESIMD_NS::native::lsc::atomic_op Op> +constexpr __ESIMD_NS::atomic_op to_atomic_op() { + switch (Op) { + case __ESIMD_NS::native::lsc::atomic_op::add: + return __ESIMD_NS::atomic_op::add; + case __ESIMD_NS::native::lsc::atomic_op::sub: + return __ESIMD_NS::atomic_op::sub; + case __ESIMD_NS::native::lsc::atomic_op::inc: + return __ESIMD_NS::atomic_op::inc; + case __ESIMD_NS::native::lsc::atomic_op::dec: + return __ESIMD_NS::atomic_op::dec; + case __ESIMD_NS::native::lsc::atomic_op::umin: + return __ESIMD_NS::atomic_op::min; + case __ESIMD_NS::native::lsc::atomic_op::umax: + return __ESIMD_NS::atomic_op::max; + case __ESIMD_NS::native::lsc::atomic_op::cmpxchg: + return __ESIMD_NS::atomic_op::cmpxchg; + case __ESIMD_NS::native::lsc::atomic_op::bit_and: + return __ESIMD_NS::atomic_op::bit_and; + case __ESIMD_NS::native::lsc::atomic_op::bit_or: + return __ESIMD_NS::atomic_op::bit_or; + case __ESIMD_NS::native::lsc::atomic_op::bit_xor: + return __ESIMD_NS::atomic_op::bit_xor; + case __ESIMD_NS::native::lsc::atomic_op::smin: + return __ESIMD_NS::atomic_op::minsint; + case __ESIMD_NS::native::lsc::atomic_op::smax: + return __ESIMD_NS::atomic_op::maxsint; + case __ESIMD_NS::native::lsc::atomic_op::fmax: + return __ESIMD_NS::atomic_op::fmax; + case __ESIMD_NS::native::lsc::atomic_op::fmin: + return __ESIMD_NS::atomic_op::fmin; + case __ESIMD_NS::native::lsc::atomic_op::fcmpxchg: + return __ESIMD_NS::atomic_op::fcmpwr; + case __ESIMD_NS::native::lsc::atomic_op::fadd: + return __ESIMD_NS::atomic_op::fadd; + case __ESIMD_NS::native::lsc::atomic_op::fsub: + return __ESIMD_NS::atomic_op::fsub; + case __ESIMD_NS::native::lsc::atomic_op::load: + return __ESIMD_NS::atomic_op::load; + case __ESIMD_NS::native::lsc::atomic_op::store: + return __ESIMD_NS::atomic_op::store; + } +} + +template <__ESIMD_NS::atomic_op Op> constexpr int get_num_args() { + if constexpr (has_lsc_equivalent()) { + return get_num_args()>(); + } else { + switch (Op) { + case __ESIMD_NS::atomic_op::xchg: + case __ESIMD_NS::atomic_op::predec: + return 1; + default: + return -1; // error + } + } +} + +} // namespace detail + } // namespace ext::intel::esimd } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp b/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp index 6c4e9b2fd4b2d..283048a5c5d05 100644 --- a/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp +++ b/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp @@ -84,13 +84,18 @@ template Ty atomic_min(Ty *ptr, Ty val) { // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - Ty _old, _new; - do { - _old = *ptr; - _new = std::min(_old, val); - } while (!__atomic_compare_exchange_n(ptr, &_old, _new, false, - __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)); - return _new; + // TODO FIXME: fix implementation for FP types. + if constexpr (std::is_integral_v) { + Ty _old, _new; + do { + _old = *ptr; + _new = std::min(_old, val); + } while (!__atomic_compare_exchange_n(ptr, &_old, _new, false, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)); + return _new; + } else { + __ESIMD_UNSUPPORTED_ON_HOST; + } #endif } @@ -99,13 +104,18 @@ template Ty atomic_max(Ty *ptr, Ty val) { // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - Ty _old, _new; - do { - _old = *ptr; - _new = std::max(_old, val); - } while (!__atomic_compare_exchange_n(ptr, &_old, _new, false, - __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)); - return _new; + // TODO FIXME: fix implementation for FP types. + if constexpr (std::is_integral_v) { + Ty _old, _new; + do { + _old = *ptr; + _new = std::max(_old, val); + } while (!__atomic_compare_exchange_n(ptr, &_old, _new, false, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)); + return _new; + } else { + __ESIMD_UNSUPPORTED_ON_HOST; + } #endif } @@ -114,10 +124,15 @@ template Ty atomic_cmpxchg(Ty *ptr, Ty expected, Ty desired) { // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - Ty _old = expected; - __atomic_compare_exchange_n(ptr, &_old, desired, false, __ATOMIC_SEQ_CST, - __ATOMIC_SEQ_CST); - return *ptr; + // TODO FIXME: fix implementation for FP types. + if constexpr (std::is_integral_v) { + Ty _old = expected; + __atomic_compare_exchange_n(ptr, &_old, desired, false, __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST); + return *ptr; + } else { + __ESIMD_UNSUPPORTED_ON_HOST; + } #endif } diff --git a/sycl/include/sycl/ext/intel/esimd/detail/defines_elementary.hpp b/sycl/include/sycl/ext/intel/esimd/detail/defines_elementary.hpp new file mode 100644 index 0000000000000..cf8d0bfd69d3a --- /dev/null +++ b/sycl/include/sycl/ext/intel/esimd/detail/defines_elementary.hpp @@ -0,0 +1,63 @@ +//==---------------- defines_elementary.hpp - DPC++ Explicit SIMD API ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Elementary definitions used in Explicit SIMD APIs. +//===----------------------------------------------------------------------===// + +#pragma once + +/// @cond ESIMD_DETAIL + +#ifdef __SYCL_DEVICE_ONLY__ +#define SYCL_ESIMD_KERNEL __attribute__((sycl_explicit_simd)) +#define SYCL_ESIMD_FUNCTION __attribute__((sycl_explicit_simd)) + +// Mark a function being nodebug. +#define ESIMD_NODEBUG __attribute__((nodebug)) +// Mark a "ESIMD global": accessible from all functions in current translation +// unit, separate copy per subgroup (work-item), mapped to SPIR-V private +// storage class. +#define ESIMD_PRIVATE \ + __attribute__((opencl_private)) __attribute__((sycl_explicit_simd)) +// Bind a ESIMD global variable to a specific register. +#define ESIMD_REGISTER(n) __attribute__((register_num(n))) + +#define __ESIMD_API ESIMD_NODEBUG ESIMD_INLINE +#else // __SYCL_DEVICE_ONLY__ +#define SYCL_ESIMD_KERNEL +#define SYCL_ESIMD_FUNCTION + +// TODO ESIMD define what this means on Windows host +#define ESIMD_NODEBUG +// On host device ESIMD global is a thread local static var. This assumes that +// each work-item is mapped to a separate OS thread on host device. +#define ESIMD_PRIVATE thread_local +#define ESIMD_REGISTER(n) + +#define __ESIMD_API ESIMD_INLINE +#endif // __SYCL_DEVICE_ONLY__ + +// Mark a function being noinline +#define ESIMD_NOINLINE __attribute__((noinline)) +// Force a function to be inlined. 'inline' is used to preserve ODR for +// functions defined in a header. +#define ESIMD_INLINE inline __attribute__((always_inline)) + +// Macros for internal use +#define __ESIMD_NS sycl::ext::intel::esimd +#define __ESIMD_DNS sycl::ext::intel::esimd::detail +#define __ESIMD_EMU_DNS sycl::ext::intel::esimd::emu::detail +#define __ESIMD_ENS sycl::ext::intel::experimental::esimd +#define __ESIMD_EDNS sycl::ext::intel::experimental::esimd::detail + +#define __ESIMD_QUOTE1(m) #m +#define __ESIMD_QUOTE(m) __ESIMD_QUOTE1(m) +#define __ESIMD_NS_QUOTED __ESIMD_QUOTE(__ESIMD_NS) +#define __ESIMD_DEPRECATED(new_api) \ + __SYCL_DEPRECATED("use " __ESIMD_NS_QUOTED "::" __ESIMD_QUOTE(new_api)) + +/// @endcond ESIMD_DETAIL diff --git a/sycl/include/sycl/ext/intel/esimd/detail/types.hpp b/sycl/include/sycl/ext/intel/esimd/detail/types.hpp index ab1af7f9bbe95..55cef46f292eb 100644 --- a/sycl/include/sycl/ext/intel/esimd/detail/types.hpp +++ b/sycl/include/sycl/ext/intel/esimd/detail/types.hpp @@ -36,8 +36,6 @@ template class simd_view; namespace detail { -namespace sd = sycl::detail; - template using uint_type_t = std::conditional_t< N == 1, uint8_t, @@ -82,7 +80,8 @@ static inline constexpr bool is_clang_vector_type_v = // @} template -using remove_cvref_t = sd::remove_cv_t>; +using remove_cvref_t = + sycl::detail::remove_cv_t>; // is_esimd_arithmetic_type template struct make_esimd_void { @@ -357,14 +356,14 @@ std::enable_if_t && is_clang_vector_type_v, To> template constexpr bool is_type() { return false; } template constexpr bool is_type() { - using UU = typename sd::remove_const_t; - using TT = typename sd::remove_const_t; + using UU = typename std::remove_const_t; + using TT = typename std::remove_const_t; return std::is_same::value || is_type(); } // calculates the number of elements in "To" type template ::value>> + typename = std::enable_if_t::value>> struct bitcast_helper { static inline constexpr int nToElems() { constexpr int R1 = sizeof(ToEltTy) / sizeof(FromEltTy); @@ -376,8 +375,8 @@ struct bitcast_helper { // Change the element type of a simd vector. template ::value>> -ESIMD_INLINE typename sd::conditional_t< + typename = std::enable_if_t::value>> +ESIMD_INLINE typename std::conditional_t< std::is_same::value, vector_type_t, vector_type_t::nToElems()>> diff --git a/sycl/include/sycl/ext/intel/esimd/detail/util.hpp b/sycl/include/sycl/ext/intel/esimd/detail/util.hpp index 040bddb41b96d..36bea945a42ca 100644 --- a/sycl/include/sycl/ext/intel/esimd/detail/util.hpp +++ b/sycl/include/sycl/ext/intel/esimd/detail/util.hpp @@ -70,16 +70,6 @@ template constexpr unsigned int log2() { return Log2 1)>::get(); } -/// Check if a given 32 bit positive integer is a power of 2 at compile time. -static ESIMD_INLINE constexpr bool isPowerOf2(unsigned int n) { - return (n & (n - 1)) == 0; -} - -static ESIMD_INLINE constexpr bool isPowerOf2(unsigned int n, - unsigned int limit) { - return (n & (n - 1)) == 0 && n <= limit; -} - /// type traits template struct is_esimd_vector : public std::false_type {}; diff --git a/sycl/include/sycl/ext/intel/esimd/math.hpp b/sycl/include/sycl/ext/intel/esimd/math.hpp index d34b9f36e0e42..f10f832aa721a 100644 --- a/sycl/include/sycl/ext/intel/esimd/math.hpp +++ b/sycl/include/sycl/ext/intel/esimd/math.hpp @@ -679,10 +679,10 @@ pack_mask(simd_mask src0) { /// @return an \c uint, where each bit is set if the corresponding element of /// the source operand is non-zero and unset otherwise. template -__ESIMD_API - std::enable_if_t() && (N > 0 && N <= 32), - uint> - ballot(simd mask) { +__ESIMD_API std::enable_if_t<(std::is_same_v || + std::is_same_v)&&(N > 0 && N <= 32), + uint> +ballot(simd mask) { simd_mask cmp = (mask != 0); if constexpr (N == 8 || N == 16 || N == 32) { return __esimd_pack_mask(cmp.data()); diff --git a/sycl/include/sycl/ext/intel/esimd/memory.hpp b/sycl/include/sycl/ext/intel/esimd/memory.hpp index 36f06d91f2a91..81773fe78dc45 100644 --- a/sycl/include/sycl/ext/intel/esimd/memory.hpp +++ b/sycl/include/sycl/ext/intel/esimd/memory.hpp @@ -67,8 +67,6 @@ __ESIMD_API SurfaceIndex get_surface_index(AccessorTy acc) { } } -#define __ESIMD_GET_SURF_HANDLE(acc) get_surface_index(acc) - // TODO @Pennycook // {quote} // ...I'd like us to think more about what we can do to make these interfaces @@ -345,7 +343,7 @@ ESIMD_INLINE constexpr int TypeSizeLog2 = detail::ElemsPerAddrEncoding(); // TODO (performance) use hardware-supported scale once BE supports it constexpr int16_t scale = 0; - const auto si = __ESIMD_GET_SURF_HANDLE(acc); + const auto si = __ESIMD_NS::get_surface_index(acc); if constexpr (sizeof(T) < 4) { using Tint = std::conditional_t, T, @@ -690,103 +688,65 @@ scatter_rgba(AccessorT acc, simd offsets, /// @} sycl_esimd_memory -/// @cond ESIMD_DETAIL - namespace detail { /// Check the legality of an atomic call in terms of size and type. /// -template -constexpr bool check_atomic() { - if constexpr (!detail::isPowerOf2(N, 32)) { - static_assert((detail::isPowerOf2(N, 32)), - "Execution size 1, 2, 4, 8, 16, 32 are supported"); - return false; - } - - // No source operands. - if constexpr (Op == atomic_op::inc || Op == atomic_op::dec) { - if constexpr (NumSrc != 0) { - static_assert(NumSrc == 0, "No source operands are expected"); - return false; - } - if constexpr (!is_type()) { - static_assert((is_type()), - "Type UW, UD or UQ is expected"); - return false; - } - return true; - } - - // One source integer operand. - if constexpr (Op == atomic_op::add || Op == atomic_op::sub || - Op == atomic_op::min || Op == atomic_op::max || - Op == atomic_op::xchg || Op == atomic_op::bit_and || - Op == atomic_op::bit_or || Op == atomic_op::bit_xor || - Op == atomic_op::minsint || Op == atomic_op::maxsint) { - if constexpr (NumSrc != 1) { - static_assert(NumSrc == 1, "One source operand is expected"); - return false; - } - if constexpr ((Op != atomic_op::minsint && Op != atomic_op::maxsint) && - !is_type()) { - static_assert((is_type()), - "Type UW, UD or UQ is expected"); - return false; - } - if constexpr ((Op == atomic_op::minsint || Op == atomic_op::maxsint) && - !is_type()) { - static_assert((is_type()), - "Type W, D or Q is expected"); - return false; - } - return true; +template <__ESIMD_NS::atomic_op Op, typename T, int N, unsigned NumSrc> +constexpr void check_atomic() { + static_assert((detail::isPowerOf2(N, 32)), + "Execution size 1, 2, 4, 8, 16, 32 are supported"); + static_assert(NumSrc == __ESIMD_DNS::get_num_args(), + "wrong number of operands"); + constexpr bool IsInt2BytePlus = + std::is_integral_v && (sizeof(T) >= sizeof(uint16_t)); + + if constexpr (Op == __ESIMD_NS::atomic_op::xchg || + Op == __ESIMD_NS::atomic_op::cmpxchg || + Op == __ESIMD_NS::atomic_op::predec || + Op == __ESIMD_NS::atomic_op::inc || + Op == __ESIMD_NS::atomic_op::dec || + Op == __ESIMD_NS::atomic_op::load) { + + static_assert(IsInt2BytePlus, "Integral 16-bit or wider type is expected"); } - - // One source float operand. - if constexpr (Op == atomic_op::fmax || Op == atomic_op::fmin || - Op == atomic_op::fadd || Op == atomic_op::fsub) { - if constexpr (NumSrc != 1) { - static_assert(NumSrc == 1, "One source operand is expected"); - return false; - } - if constexpr (!is_type()) { - static_assert((is_type()), - "Type F or HF is expected"); - return false; - } - return true; + // FP ops (are always delegated to native::lsc::) + if constexpr (Op == __ESIMD_NS::atomic_op::fmax || + Op == __ESIMD_NS::atomic_op::fmin || + Op == __ESIMD_NS::atomic_op::fadd || + Op == __ESIMD_NS::atomic_op::fsub) { + static_assert((is_type()), + "Type F or HF is expected"); } - - // Two source operands. - if constexpr (Op == atomic_op::cmpxchg || Op == atomic_op::fcmpwr) { - if constexpr (NumSrc != 2) { - static_assert(NumSrc == 2, "Two source operands are expected"); - return false; - } - if constexpr (Op == atomic_op::cmpxchg && - !is_type()) { - static_assert((is_type()), - "Type UW, UD or UQ is expected"); - return false; - } - if constexpr (Op == atomic_op::fcmpwr && !is_type()) { - static_assert((is_type()), - "Type F or HF is expected"); - return false; + if constexpr (Op == __ESIMD_NS::atomic_op::add || + Op == __ESIMD_NS::atomic_op::sub || + Op == __ESIMD_NS::atomic_op::min || + Op == __ESIMD_NS::atomic_op::max || + Op == __ESIMD_NS::atomic_op::bit_and || + Op == __ESIMD_NS::atomic_op::bit_or || + Op == __ESIMD_NS::atomic_op::bit_xor || + Op == __ESIMD_NS::atomic_op::minsint || + Op == __ESIMD_NS::atomic_op::maxsint) { + static_assert(IsInt2BytePlus, "Integral 16-bit or wider type is expected"); + constexpr bool IsSignedMinmax = (Op == __ESIMD_NS::atomic_op::minsint) || + (Op == __ESIMD_NS::atomic_op::maxsint); + constexpr bool IsUnsignedMinmax = (Op == __ESIMD_NS::atomic_op::min) || + (Op == __ESIMD_NS::atomic_op::max); + + if constexpr (IsSignedMinmax || IsUnsignedMinmax) { + constexpr bool SignOK = std::is_signed_v == IsSignedMinmax; + static_assert(SignOK, "Signed/unsigned integer type expected for " + "signed/unsigned min/max operation"); } - return true; } - // Unsupported svm atomic Op. - return false; } } // namespace detail -/// @endcond ESIMD_DETAIL - /// @addtogroup sycl_esimd_memory_atomics /// @{ /// @anchor usm_atomic_update0 +/// @brief No-argument variant of the atomic update operation. +/// /// Atomically updates \c N memory locations represented by a USM pointer and /// a vector of offsets relative to the pointer, and returns a vector of old /// values found at the memory locations before update. The update operation @@ -803,16 +763,20 @@ constexpr bool check_atomic() { /// @return A vector of the old values at the memory locations before the /// update. /// -template > -__ESIMD_API std::enable_if_t(), simd> -atomic_update(Tx *p, simd offset, simd_mask mask) { +template +__ESIMD_API simd atomic_update(Tx *p, simd offset, + simd_mask mask) { + detail::check_atomic(); simd vAddr(reinterpret_cast(p)); simd offset_i1 = convert(offset); vAddr += offset_i1; + using T = typename detail::__raw_t; return __esimd_svm_atomic0(vAddr.data(), mask.data()); } /// @anchor usm_atomic_update1 +/// @brief Single-argument variant of the atomic update operation. +/// /// Atomically updates \c N memory locations represented by a USM pointer and /// a vector of offsets relative to the pointer, and returns a vector of old /// values found at the memory locations before update. The update operation @@ -833,14 +797,23 @@ atomic_update(Tx *p, simd offset, simd_mask mask) { /// @return A vector of the old values at the memory locations before the /// update. /// -template > -__ESIMD_API std::enable_if_t(), simd> -atomic_update(Tx *p, simd offset, simd src0, - simd_mask mask) { - simd vAddr(reinterpret_cast(p)); - simd offset_i1 = convert(offset); - vAddr += offset_i1; - return __esimd_svm_atomic1(vAddr.data(), src0.data(), mask.data()); +template +__ESIMD_API simd atomic_update(Tx *p, simd offset, + simd src0, simd_mask mask) { + if constexpr ((Op == atomic_op::fmin) || (Op == atomic_op::fmax) || + (Op == atomic_op::fadd) || (Op == atomic_op::fsub)) { + // Auto-convert FP atomics to LSC version. Warning is given - see enum. + return atomic_update(), Tx, N>(p, offset, src0, + mask); + } else { + detail::check_atomic(); + simd vAddr(reinterpret_cast(p)); + simd offset_i1 = convert(offset); + vAddr += offset_i1; + using T = typename detail::__raw_t; + return __esimd_svm_atomic1(vAddr.data(), src0.data(), + mask.data()); + } } /// @anchor usm_atomic_update2 @@ -862,15 +835,23 @@ atomic_update(Tx *p, simd offset, simd src0, /// @return A vector of the old values at the memory locations before the /// update. /// -template > -__ESIMD_API std::enable_if_t(), simd> -atomic_update(Tx *p, simd offset, simd src0, - simd src1, simd_mask mask) { - simd vAddr(reinterpret_cast(p)); - simd offset_i1 = convert(offset); - vAddr += offset_i1; - return __esimd_svm_atomic2(vAddr.data(), src0.data(), src1.data(), - mask.data()); +template +__ESIMD_API simd atomic_update(Tx *p, simd offset, + simd src0, simd src1, + simd_mask mask) { + if constexpr (Op == atomic_op::fcmpwr) { + // Auto-convert FP atomics to LSC version. Warning is given - see enum. + return atomic_update(), Tx, N>(p, offset, src0, + src1, mask); + } else { + detail::check_atomic(); + simd vAddr(reinterpret_cast(p)); + simd offset_i1 = convert(offset); + vAddr += offset_i1; + using T = typename detail::__raw_t; + return __esimd_svm_atomic2(vAddr.data(), src0.data(), src1.data(), + mask.data()); + } } /// @} sycl_esimd_memory_atomics @@ -999,7 +980,7 @@ __ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4), simd> slm_gather_rgba(simd offsets, simd_mask mask = 1) { - const auto SI = __ESIMD_GET_SURF_HANDLE(detail::LocalAccessorMarker()); + const auto SI = __ESIMD_NS::get_surface_index(detail::LocalAccessorMarker()); return __esimd_gather4_masked_scaled2( SI, 0 /*global_offset*/, offsets.data(), mask.data()); } @@ -1020,7 +1001,7 @@ slm_scatter_rgba(simd offsets, simd vals, simd_mask mask = 1) { detail::validate_rgba_write_channel_mask(); - const auto si = __ESIMD_GET_SURF_HANDLE(detail::LocalAccessorMarker()); + const auto si = __ESIMD_NS::get_surface_index(detail::LocalAccessorMarker()); constexpr int16_t Scale = 0; constexpr int global_offset = 0; __esimd_scatter4_scaled( @@ -1047,7 +1028,7 @@ __ESIMD_API simd slm_block_load(uint32_t offset) { static_assert(Sz <= 16 * detail::OperandSize::OWORD, "block size must be at most 16 owords"); - const auto si = __ESIMD_GET_SURF_HANDLE(detail::LocalAccessorMarker()); + const auto si = __ESIMD_NS::get_surface_index(detail::LocalAccessorMarker()); return __esimd_oword_ld, N>(si, offset >> 4); } @@ -1070,7 +1051,7 @@ __ESIMD_API void slm_block_store(uint32_t offset, simd vals) { "block must be 1, 2, 4 or 8 owords long"); static_assert(Sz <= 8 * detail::OperandSize::OWORD, "block size must be at most 8 owords"); - const auto si = __ESIMD_GET_SURF_HANDLE(detail::LocalAccessorMarker()); + const auto si = __ESIMD_NS::get_surface_index(detail::LocalAccessorMarker()); // offset in genx.oword.st is in owords __esimd_oword_st, N>(si, offset >> 4, vals.data()); } @@ -1079,9 +1060,10 @@ __ESIMD_API void slm_block_store(uint32_t offset, simd vals) { /// See description of template and function parameters in @ref /// usm_atomic_update0 "atomic update" operation docs. template > -__ESIMD_API std::enable_if_t(), simd> -slm_atomic_update(simd offsets, simd_mask mask) { - const auto si = __ESIMD_GET_SURF_HANDLE(detail::LocalAccessorMarker()); +__ESIMD_API simd slm_atomic_update(simd offsets, + simd_mask mask) { + detail::check_atomic(); + const auto si = __ESIMD_NS::get_surface_index(detail::LocalAccessorMarker()); return __esimd_dword_atomic0(mask.data(), si, offsets.data()); } @@ -1089,10 +1071,10 @@ slm_atomic_update(simd offsets, simd_mask mask) { /// See description of template and function parameters in @ref /// usm_atomic_update1 "atomic update" operation docs. template > -__ESIMD_API std::enable_if_t(), simd> -slm_atomic_update(simd offsets, simd src0, - simd_mask mask) { - const auto si = __ESIMD_GET_SURF_HANDLE(detail::LocalAccessorMarker()); +__ESIMD_API simd slm_atomic_update(simd offsets, + simd src0, simd_mask mask) { + detail::check_atomic(); + const auto si = __ESIMD_NS::get_surface_index(detail::LocalAccessorMarker()); return __esimd_dword_atomic1(mask.data(), si, offsets.data(), src0.data()); } @@ -1101,10 +1083,11 @@ slm_atomic_update(simd offsets, simd src0, /// See description of template and function parameters in @ref /// usm_atomic_update2 "atomic update" operation docs. template > -__ESIMD_API std::enable_if_t(), simd> -slm_atomic_update(simd offsets, simd src0, simd src1, - simd_mask mask) { - const auto si = __ESIMD_GET_SURF_HANDLE(detail::LocalAccessorMarker()); +__ESIMD_API simd slm_atomic_update(simd offsets, + simd src0, simd src1, + simd_mask mask) { + detail::check_atomic(); + const auto si = __ESIMD_NS::get_surface_index(detail::LocalAccessorMarker()); return __esimd_dword_atomic2(mask.data(), si, offsets.data(), src0.data(), src1.data()); } @@ -1137,7 +1120,7 @@ __ESIMD_API simd media_block_load(AccessorTy acc, unsigned x, static_assert(m <= 64u, "valid block height is in range [1, 64]"); static_assert(plane <= 3u, "valid plane index is in range [0, 3]"); - const auto si = __ESIMD_GET_SURF_HANDLE(acc); + const auto si = __ESIMD_NS::get_surface_index(acc); using SurfIndTy = decltype(si); constexpr unsigned int RoundedWidth = Width < 4 ? 4 : detail::getNextPowerOf2(); @@ -1177,7 +1160,7 @@ __ESIMD_API void media_block_store(AccessorTy acc, unsigned x, unsigned y, static_assert(Width <= 64u, "valid block width is in range [1, 64]"); static_assert(m <= 64u, "valid block height is in range [1, 64]"); static_assert(plane <= 3u, "valid plane index is in range [0, 3]"); - const auto si = __ESIMD_GET_SURF_HANDLE(acc); + const auto si = __ESIMD_NS::get_surface_index(acc); using SurfIndTy = decltype(si); constexpr unsigned int RoundedWidth = Width < 4 ? 4 : detail::getNextPowerOf2(); @@ -1201,8 +1184,6 @@ __ESIMD_API void media_block_store(AccessorTy acc, unsigned x, unsigned y, /// @} sycl_esimd_memory -#undef __ESIMD_GET_SURF_HANDLE - /// @cond EXCLUDE namespace detail { diff --git a/sycl/include/sycl/ext/intel/esimd/native/common.hpp b/sycl/include/sycl/ext/intel/esimd/native/common.hpp new file mode 100644 index 0000000000000..dab20a27515ec --- /dev/null +++ b/sycl/include/sycl/ext/intel/esimd/native/common.hpp @@ -0,0 +1,69 @@ +//==-------------- native/memory.hpp - DPC++ Explicit SIMD API -------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Explicit SIMD API types used in native ESIMD APIs. +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +#include + +namespace sycl { +__SYCL_INLINE_VER_NAMESPACE(_V1) { +namespace ext::intel::esimd { +namespace native { +namespace lsc { + +/// @addtogroup sycl_esimd_memory +/// @{ + +/// @defgroup sycl_esimd_memory_lsc LSC-specific memory access APIs. +/// This group combines types and functions specific to LSC, which is available +/// in Intel GPUs starting from PVC and ACM. + +/// @} sycl_esimd_memory + +/// @addtogroup sycl_esimd_memory_lsc +/// @{ + +// TODO move all LSC-related "common" APIs here + +/// LSC atomic operation codes. +/// atomic_update(...); is a short-cut to +/// lsc_atomic_update(...); with default cache and data +/// size controls. +enum class atomic_op : uint8_t { + inc = 0x08, // atomic integer increment + dec = 0x09, // atomic integer decrement + load = 0x0a, // atomic load + store = 0x0b, // atomic store + add = 0x0c, // atomic integer add + sub = 0x0d, // atomic integer subtract + smin = 0x0e, // atomic signed int min + smax = 0x0f, // atomic signed int max + umin = 0x10, // atomic unsigned int min + umax = 0x11, // atomic unsigned int max + cmpxchg = 0x12, // atomic int compare and swap + fadd = 0x13, // floating-point add + fsub = 0x14, // floating-point subtract + fmin = 0x15, // floating-point min + fmax = 0x16, // floating-point max + fcmpxchg = 0x17, // floating-point CAS + bit_and = 0x18, // logical (bitwise) AND + bit_or = 0x19, // logical (bitwise) OR + bit_xor = 0x1a, // logical (bitwise) XOR +}; + +/// @} sycl_esimd_memory_lsc + +} // namespace lsc +} // namespace native +} // namespace ext::intel::esimd +} // __SYCL_INLINE_VER_NAMESPACE(_V1) +} // namespace sycl diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/common.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/common.hpp index 429c37b5ea11b..85a1ad385b918 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/common.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/common.hpp @@ -5,20 +5,16 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// definitions used in experimental Explicit SIMD APIs. +// Common definitions used in experimental Explicit SIMD APIs. //===----------------------------------------------------------------------===// #pragma once -#include +#include +#include -/// @cond ESIMD_DETAIL - -// Macros for internal use -#define __ESIMD_ENS sycl::ext::intel::experimental::esimd -#define __ESIMD_EDNS sycl::ext::intel::experimental::esimd::detail - -/// @endcond ESIMD_DETAIL +#include +#include namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { @@ -87,28 +83,6 @@ enum class lsc_data_size : uint8_t { }; namespace detail { -/// LSC atomic operations op codes -enum class lsc_atomic_op : uint8_t { - iinc = 0x08, // atomic integer increment - idec = 0x09, // atomic integer decrement - load = 0x0a, // atomic load - store = 0x0b, // atomic store - iadd = 0x0c, // atomic integer add - isub = 0x0d, // atomic integer subtract - smin = 0x0e, // atomic signed int min - smax = 0x0f, // atomic signed int max - umin = 0x10, // atomic unsigned int min - umax = 0x11, // atomic unsigned int max - icas = 0x12, // atomic int compare and swap - fadd = 0x13, // floating-point add - fsub = 0x14, // floating-point subtract - fmin = 0x15, // floating-point min - fmax = 0x16, // floating-point max - fcas = 0x17, // floating-point CAS - bit_and = 0x18, // logical (bitwise) AND - bit_or = 0x19, // logical (bitwise) OR - bit_xor = 0x1a, // logical (bitwise) XOR -}; enum class lsc_vector_size : uint8_t { n1 = 1, @@ -146,106 +120,6 @@ template constexpr void check_lsc_data_size() { "Unsupported data type"); } -template <__ESIMD_NS::atomic_op Op> constexpr void check_lsc_atomic_op() { - static_assert(Op == __ESIMD_NS::atomic_op::add || - Op == __ESIMD_NS::atomic_op::sub || - Op == __ESIMD_NS::atomic_op::inc || - Op == __ESIMD_NS::atomic_op::dec || - Op == __ESIMD_NS::atomic_op::min || - Op == __ESIMD_NS::atomic_op::max || - Op == __ESIMD_NS::atomic_op::cmpxchg || - Op == __ESIMD_NS::atomic_op::bit_and || - Op == __ESIMD_NS::atomic_op::bit_or || - Op == __ESIMD_NS::atomic_op::bit_xor || - Op == __ESIMD_NS::atomic_op::minsint || - Op == __ESIMD_NS::atomic_op::maxsint || - Op == __ESIMD_NS::atomic_op::fmax || - Op == __ESIMD_NS::atomic_op::fmin || - Op == __ESIMD_NS::atomic_op::fcmpwr || - Op == __ESIMD_NS::atomic_op::fadd || - Op == __ESIMD_NS::atomic_op::fsub || - Op == __ESIMD_NS::atomic_op::load || - Op == __ESIMD_NS::atomic_op::store, - "Unsupported operation for LSC atomics"); -} - -/// Check the legality of lsc xatomic call in terms of size and type. -template <__ESIMD_NS::atomic_op Op, unsigned NumSrc> -constexpr void check_lsc_atomic() { - check_lsc_atomic_op(); - if constexpr (Op == __ESIMD_NS::atomic_op::inc || - Op == __ESIMD_NS::atomic_op::dec || - Op == __ESIMD_NS::atomic_op::load) { - static_assert(NumSrc == 0, "No source operands are expected"); - } - if constexpr (Op == __ESIMD_NS::atomic_op::store || - Op == __ESIMD_NS::atomic_op::add || - Op == __ESIMD_NS::atomic_op::sub || - Op == __ESIMD_NS::atomic_op::minsint || - Op == __ESIMD_NS::atomic_op::maxsint || - Op == __ESIMD_NS::atomic_op::min || - Op == __ESIMD_NS::atomic_op::max || - Op == __ESIMD_NS::atomic_op::fadd || - Op == __ESIMD_NS::atomic_op::fsub || - Op == __ESIMD_NS::atomic_op::fmin || - Op == __ESIMD_NS::atomic_op::fmax || - Op == __ESIMD_NS::atomic_op::bit_and || - Op == __ESIMD_NS::atomic_op::bit_or || - Op == __ESIMD_NS::atomic_op::bit_xor) { - static_assert(NumSrc == 1, "One source operand is expected"); - } - if constexpr (Op == __ESIMD_NS::atomic_op::cmpxchg || - Op == __ESIMD_NS::atomic_op::fcmpwr) { - static_assert(NumSrc == 2, "Two source operands are expected"); - } -} - -template <__ESIMD_NS::atomic_op Op> constexpr lsc_atomic_op to_lsc_atomic_op() { - check_lsc_atomic_op(); - switch (Op) { - case __ESIMD_NS::atomic_op::add: - return lsc_atomic_op::iadd; - case __ESIMD_NS::atomic_op::sub: - return lsc_atomic_op::isub; - case __ESIMD_NS::atomic_op::inc: - return lsc_atomic_op::iinc; - case __ESIMD_NS::atomic_op::dec: - return lsc_atomic_op::idec; - case __ESIMD_NS::atomic_op::min: - return lsc_atomic_op::umin; - case __ESIMD_NS::atomic_op::max: - return lsc_atomic_op::umax; - case __ESIMD_NS::atomic_op::cmpxchg: - return lsc_atomic_op::icas; - case __ESIMD_NS::atomic_op::bit_and: - return lsc_atomic_op::bit_and; - case __ESIMD_NS::atomic_op::bit_or: - return lsc_atomic_op::bit_or; - case __ESIMD_NS::atomic_op::bit_xor: - return lsc_atomic_op::bit_xor; - case __ESIMD_NS::atomic_op::minsint: - return lsc_atomic_op::smin; - case __ESIMD_NS::atomic_op::maxsint: - return lsc_atomic_op::smax; - case __ESIMD_NS::atomic_op::fmax: - return lsc_atomic_op::fmax; - case __ESIMD_NS::atomic_op::fmin: - return lsc_atomic_op::fmin; - case __ESIMD_NS::atomic_op::fcmpwr: - return lsc_atomic_op::fcas; - case __ESIMD_NS::atomic_op::fadd: - return lsc_atomic_op::fadd; - case __ESIMD_NS::atomic_op::fsub: - return lsc_atomic_op::fsub; - case __ESIMD_NS::atomic_op::load: - return lsc_atomic_op::load; - case __ESIMD_NS::atomic_op::store: - return lsc_atomic_op::store; - default: - return lsc_atomic_op::iinc; - } -} - template constexpr uint8_t to_int() { check_lsc_vector_size(); switch (VS) { diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp index a878aedc82225..2d6649c9b9c40 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp @@ -12,12 +12,15 @@ /// @cond ESIMD_DETAIL -#include +#include +#include #define __ESIMD_raw_vec_t(T, SZ) \ - __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t, SZ> + sycl::ext::intel::esimd::detail::vector_type_t< \ + sycl::ext::intel::esimd::detail::__raw_t, SZ> #define __ESIMD_cpp_vec_t(T, SZ) \ - __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__cpp_t, SZ> + sycl::ext::intel::esimd::detail::vector_type_t< \ + sycl::ext::intel::esimd::detail::__cpp_t, SZ> template __ESIMD_INTRIN __ESIMD_raw_vec_t(T0, SZ) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp index e6a18ac0fab05..e2744b6469317 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp @@ -13,6 +13,7 @@ #pragma once #include +#include #include // generic work-group split barrier @@ -576,8 +577,8 @@ void __esimd_emu_write_2d(__ESIMD_DNS::simd_mask_storage_t Pred, /// Helper function for zero-source LSC-atomic operation accessing BTI /// or SLM -template auto __esimd_emu_lsc_xatomic_offset_access_0( @@ -609,10 +610,10 @@ auto __esimd_emu_lsc_xatomic_offset_access_0( if ((ByteDistance >= 0) && (ByteDistance < BufByteWidth)) { Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); - if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::iinc) { + if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::inc) { __ESIMD_DNS::atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), static_cast(1)); - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::idec) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::dec) { __ESIMD_DNS::atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), static_cast(1)); } @@ -624,8 +625,8 @@ auto __esimd_emu_lsc_xatomic_offset_access_0( /// Helper function for one-source LSC-atomic operation accessing BTI /// or SLM -template auto __esimd_emu_lsc_xatomic_offset_access_1( @@ -665,60 +666,62 @@ auto __esimd_emu_lsc_xatomic_offset_access_1( // Keeping original values for return Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); - if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::store) { + if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::store) { __ESIMD_DNS::atomic_store((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::iadd) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::add) { __ESIMD_DNS::atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::isub) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::sub) { __ESIMD_DNS::atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::smin) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::smin) { __ESIMD_DNS::atomic_min((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::smax) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::smax) { __ESIMD_DNS::atomic_max((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::umin) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::umin) { if constexpr (!__ESIMD_DNS::is_fp_type::value) { __ESIMD_DNS::atomic_min((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::umax) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::umax) { if constexpr (!__ESIMD_DNS::is_fp_type::value) { __ESIMD_DNS::atomic_max((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fadd) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fadd) { if constexpr (__ESIMD_DNS::is_fp_type::value) { __ESIMD_DNS::atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fsub) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fsub) { if constexpr (__ESIMD_DNS::is_fp_type::value) { __ESIMD_DNS::atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fmin) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fmin) { if constexpr (__ESIMD_DNS::is_fp_type::value) { __ESIMD_DNS::atomic_min((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fmax) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fmax) { if constexpr (__ESIMD_DNS::is_fp_type::value) { __ESIMD_DNS::atomic_max((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::bit_and) { + } else if constexpr (Op == + __ESIMD_NS::native::lsc::atomic_op::bit_and) { // TODO : Type Check? Integral type only? __ESIMD_DNS::atomic_and_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::bit_or) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::bit_or) { // TODO : Type Check? Integral type only? __ESIMD_DNS::atomic_or_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::bit_xor) { + } else if constexpr (Op == + __ESIMD_NS::native::lsc::atomic_op::bit_xor) { // TODO : Type Check? Integral type only? __ESIMD_DNS::atomic_xor_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); @@ -731,8 +734,8 @@ auto __esimd_emu_lsc_xatomic_offset_access_1( /// Helper function for two-source LSC-atomic operation accessing BTI /// or SLM -template auto __esimd_emu_lsc_xatomic_offset_access_2( @@ -773,10 +776,11 @@ auto __esimd_emu_lsc_xatomic_offset_access_2( // Keeping original values for return Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); - if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::icas) { + if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::cmpxchg) { __ESIMD_DNS::atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), src0[VecIdx], src1[VecIdx]); - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fcas) { + } else if constexpr (Op == + __ESIMD_NS::native::lsc::atomic_op::fcmpxchg) { if constexpr (__ESIMD_DNS::is_fp_type::value) { __ESIMD_DNS::atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), src0[VecIdx], src1[VecIdx]); @@ -1327,7 +1331,7 @@ __esimd_lsc_store2d_stateless(__ESIMD_DNS::simd_mask_storage_t Pred, /// @tparam N is the SIMD size of operation (the number of addresses to access) /// @param pred is predicates. /// @param offsets is the zero-based offsets. -template pred, /// @param pred is predicates. /// @param offsets is the zero-based offsets. /// @param src0 is the first atomic operand. -template +template __ESIMD_INTRIN __ESIMD_DNS::vector_type_t()> __esimd_lsc_xatomic_bti_0(__ESIMD_DNS::simd_mask_storage_t pred, __ESIMD_DNS::vector_type_t offsets, @@ -1498,11 +1503,12 @@ __esimd_lsc_xatomic_bti_0(__ESIMD_DNS::simd_mask_storage_t pred, /// @param offsets is the zero-based offsets. /// @param src0 is the first atomic operand. /// @param surf_ind is the surface index. -template < - typename Ty, __ESIMD_EDNS::lsc_atomic_op Op, __ESIMD_ENS::cache_hint L1H, - __ESIMD_ENS::cache_hint L3H, uint16_t AddressScale, int ImmOffset, - __ESIMD_ENS::lsc_data_size DS, __ESIMD_EDNS::lsc_vector_size VS, - __ESIMD_EDNS::lsc_data_order _Transposed, int N, typename SurfIndAliasTy> +template __ESIMD_INTRIN __ESIMD_DNS::vector_type_t()> __esimd_lsc_xatomic_bti_1( __ESIMD_DNS::simd_mask_storage_t pred, @@ -1552,11 +1558,12 @@ __esimd_lsc_xatomic_bti_1( /// @param src0 is the first atomic operand. /// @param src1 is the second atomic operand. /// @param surf_ind is the surface index. -template < - typename Ty, __ESIMD_EDNS::lsc_atomic_op Op, __ESIMD_ENS::cache_hint L1H, - __ESIMD_ENS::cache_hint L3H, uint16_t AddressScale, int ImmOffset, - __ESIMD_ENS::lsc_data_size DS, __ESIMD_EDNS::lsc_vector_size VS, - __ESIMD_EDNS::lsc_data_order _Transposed, int N, typename SurfIndAliasTy> +template __ESIMD_INTRIN __ESIMD_DNS::vector_type_t()> __esimd_lsc_xatomic_bti_2( __ESIMD_DNS::simd_mask_storage_t pred, @@ -1603,7 +1610,7 @@ __esimd_lsc_xatomic_bti_2( /// @tparam N is the SIMD size of operation (the number of addresses to access) /// @param pred is predicates. /// @param addrs is the prefetch addresses. -template pred, // Keeping original values for return + 'load' Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); - if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::iinc) { + if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::inc) { __ESIMD_DNS::atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), static_cast(1)); - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::idec) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::dec) { __ESIMD_DNS::atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), static_cast(1)); } @@ -1674,7 +1681,7 @@ __esimd_lsc_xatomic_stateless_0(__ESIMD_DNS::simd_mask_storage_t pred, /// @param pred is predicates. /// @param addrs is the prefetch addresses. /// @param src0 is the first atomic operand. -template ((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::iadd) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::add) { __ESIMD_DNS::atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::isub) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::sub) { __ESIMD_DNS::atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::smin) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::smin) { __ESIMD_DNS::atomic_min((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::smax) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::smax) { __ESIMD_DNS::atomic_max((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::umin) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::umin) { if constexpr (!__ESIMD_DNS::is_fp_type::value) { __ESIMD_DNS::atomic_min((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::umax) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::umax) { if constexpr (!__ESIMD_DNS::is_fp_type::value) { __ESIMD_DNS::atomic_max((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fadd) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fadd) { if constexpr (__ESIMD_DNS::is_fp_type::value) { __ESIMD_DNS::atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fsub) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fsub) { if constexpr (__ESIMD_DNS::is_fp_type::value) { __ESIMD_DNS::atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fmin) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fmin) { if constexpr (__ESIMD_DNS::is_fp_type::value) { __ESIMD_DNS::atomic_min((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fmax) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fmax) { if constexpr (__ESIMD_DNS::is_fp_type::value) { __ESIMD_DNS::atomic_max((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::bit_and) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::bit_and) { // TODO : Type Check? Integral type only? __ESIMD_DNS::atomic_and_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::bit_or) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::bit_or) { // TODO : Type Check? Integral type only? __ESIMD_DNS::atomic_or_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); - } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::bit_xor) { + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::bit_xor) { // TODO : Type Check? Integral type only? __ESIMD_DNS::atomic_xor_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); @@ -1798,7 +1805,7 @@ __esimd_lsc_xatomic_stateless_1( /// @param addrs is the prefetch addresses. /// @param src0 is the first atomic operand. /// @param src1 is the second atomic operand. -template ::value) { __ESIMD_DNS::atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), src0[VecIdx], src1[VecIdx]); diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp index df4d80c789db0..23231ce062587 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp @@ -10,6 +10,8 @@ #pragma once +#include +#include #include #include #include @@ -1976,8 +1978,9 @@ __ESIMD_API __ESIMD_NS::simd dpasw(__ESIMD_NS::simd src0, __ESIMD_NS::simd src1, __ESIMD_NS::simd src2, Sat sat = {}) { constexpr bool is_4xhf = - (__ESIMD_DNS::is_type()) && - src1_precision == src2_precision && src1_precision == argument_type::FP16; + std::is_same_v> && + (src1_precision == src2_precision) && + (src1_precision == argument_type::FP16); constexpr bool is_4xbf = __ESIMD_DNS::is_word_type::value && src1_precision == src2_precision && @@ -2049,7 +2052,7 @@ __ESIMD_API __ESIMD_NS::simd dpasw2(__ESIMD_NS::simd src1, __ESIMD_NS::simd src2, Sat sat = {}) { constexpr bool is_4xhf = - (__ESIMD_DNS::is_type()) && + std::is_same_v> && src1_precision == src2_precision && src1_precision == argument_type::FP16; constexpr bool is_4xbf = __ESIMD_DNS::is_word_type::value && diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp index 371ecd1cc4245..8a2f89ae3ba0d 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp @@ -10,16 +10,17 @@ #pragma once +#include #include -#include #include #include namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { -namespace ext::intel::experimental::esimd { - -#define __ESIMD_GET_SURF_HANDLE(acc) __ESIMD_NS::get_surface_index(acc) +namespace ext { +namespace intel { +namespace experimental { +namespace esimd { /// @addtogroup sycl_esimd_memory /// @{ @@ -281,6 +282,29 @@ lsc_format_ret(__ESIMD_NS::simd Vals) { constexpr int Stride = Formatted.length / N; return Formatted.template select(0); } + +/// Check the legality of lsc atomic call in terms of size and type. +template <__ESIMD_NS::native::lsc::atomic_op Op, typename T, int N, + unsigned NumSrc> +constexpr void check_lsc_atomic() { + if constexpr (!__ESIMD_DNS::isPowerOf2(N, 32)) { + static_assert((__ESIMD_DNS::isPowerOf2(N, 32)), + "Execution size 1, 2, 4, 8, 16, 32 are supported"); + } + if constexpr (NumSrc != __ESIMD_DNS::get_num_args()) { + static_assert(NumSrc == __ESIMD_DNS::get_num_args(), + "wrong number of operands"); + } + if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fcmpxchg) { + if constexpr (!is_type()) { + static_assert((is_type()), + "Type F or HF is expected"); + } + } else { + __ESIMD_DNS::check_atomic<__ESIMD_DNS::to_atomic_op(), T, N, NumSrc>(); + } +} + } // namespace detail /// SLM gather. @@ -395,7 +419,7 @@ lsc_gather(AccessorTy acc, __ESIMD_NS::simd offsets, constexpr detail::lsc_data_order _Transposed = detail::lsc_data_order::nontranspose; using _MsgT = typename detail::lsc_expand_type::type; - auto si = __ESIMD_GET_SURF_HANDLE(acc); + auto si = __ESIMD_NS::get_surface_index(acc); __ESIMD_NS::simd<_MsgT, N *NElts> Tmp = __esimd_lsc_load_bti<_MsgT, L1H, L3H, _AddressScale, _ImmOffset, _DS, _VS, _Transposed, N>(pred.data(), offsets.data(), si); @@ -445,7 +469,7 @@ lsc_block_load(AccessorTy acc, uint32_t offset) { constexpr int N = 1; __ESIMD_NS::simd_mask pred = 1; __ESIMD_NS::simd offsets = offset; - auto si = __ESIMD_GET_SURF_HANDLE(acc); + auto si = __ESIMD_NS::get_surface_index(acc); return __esimd_lsc_load_bti(pred.data(), offsets.data(), si); #endif @@ -573,7 +597,7 @@ lsc_prefetch(AccessorTy acc, __ESIMD_NS::simd offsets, constexpr detail::lsc_data_order _Transposed = detail::lsc_data_order::nontranspose; using _MsgT = typename detail::lsc_expand_type::type; - auto si = __ESIMD_GET_SURF_HANDLE(acc); + auto si = __ESIMD_NS::get_surface_index(acc); __esimd_lsc_prefetch_bti<_MsgT, L1H, L3H, _AddressScale, _ImmOffset, _DS, _VS, _Transposed, N>(pred.data(), offsets.data(), si); #endif @@ -619,7 +643,7 @@ lsc_prefetch(AccessorTy acc, uint32_t offset) { constexpr int N = 1; __ESIMD_NS::simd_mask pred = 1; __ESIMD_NS::simd offsets = offset; - auto si = __ESIMD_GET_SURF_HANDLE(acc); + auto si = __ESIMD_NS::get_surface_index(acc); __esimd_lsc_prefetch_bti(pred.data(), offsets.data(), si); #endif @@ -815,7 +839,7 @@ lsc_scatter(AccessorTy acc, __ESIMD_NS::simd offsets, using _MsgT = typename detail::lsc_expand_type::type; using _CstT = typename detail::lsc_bitcast_type::type; __ESIMD_NS::simd<_MsgT, N *NElts> Tmp = vals.template bit_cast_view<_CstT>(); - auto si = __ESIMD_GET_SURF_HANDLE(acc); + auto si = __ESIMD_NS::get_surface_index(acc); __esimd_lsc_store_bti<_MsgT, L1H, L3H, _AddressScale, _ImmOffset, _DS, _VS, _Transposed, N>(pred.data(), offsets.data(), Tmp.data(), si); @@ -863,7 +887,7 @@ lsc_block_store(AccessorTy acc, uint32_t offset, constexpr int N = 1; __ESIMD_NS::simd_mask pred = 1; __ESIMD_NS::simd offsets = offset; - auto si = __ESIMD_GET_SURF_HANDLE(acc); + auto si = __ESIMD_NS::get_surface_index(acc); __esimd_lsc_store_bti(pred.data(), offsets.data(), vals.data(), si); @@ -1165,9 +1189,11 @@ template <__ESIMD_NS::atomic_op Op, typename T, int N, __ESIMD_API __ESIMD_NS::simd lsc_slm_atomic_update(__ESIMD_NS::simd offsets, __ESIMD_NS::simd_mask pred) { - detail::check_lsc_vector_size<1>(); - detail::check_lsc_data_size(); - detail::check_lsc_atomic(); + __ESIMD_EDNS::check_lsc_vector_size<1>(); + __ESIMD_EDNS::check_lsc_data_size(); + constexpr __ESIMD_NS::native::lsc::atomic_op _Op = + __ESIMD_DNS::to_lsc_atomic_op(); + __ESIMD_EDNS::check_lsc_atomic<_Op, T, N, 0>(); constexpr uint16_t _AddressScale = 1; constexpr int _ImmOffset = 0; constexpr lsc_data_size _DS = @@ -1175,7 +1201,6 @@ lsc_slm_atomic_update(__ESIMD_NS::simd offsets, constexpr detail::lsc_vector_size _VS = detail::to_lsc_vector_size<1>(); constexpr detail::lsc_data_order _Transposed = detail::lsc_data_order::nontranspose; - constexpr detail::lsc_atomic_op _Op = detail::to_lsc_atomic_op(); using _MsgT = typename detail::lsc_expand_type::type; __ESIMD_NS::simd<_MsgT, N> Tmp = __esimd_lsc_xatomic_slm_0<_MsgT, _Op, cache_hint::none, cache_hint::none, @@ -1204,7 +1229,9 @@ lsc_slm_atomic_update(__ESIMD_NS::simd offsets, __ESIMD_NS::simd_mask pred) { detail::check_lsc_vector_size<1>(); detail::check_lsc_data_size(); - detail::check_lsc_atomic(); + constexpr __ESIMD_NS::native::lsc::atomic_op _Op = + __ESIMD_DNS::to_lsc_atomic_op(); + __ESIMD_EDNS::check_lsc_atomic<_Op, T, N, 1>(); constexpr uint16_t _AddressScale = 1; constexpr int _ImmOffset = 0; constexpr lsc_data_size _DS = @@ -1212,7 +1239,6 @@ lsc_slm_atomic_update(__ESIMD_NS::simd offsets, constexpr detail::lsc_vector_size _VS = detail::to_lsc_vector_size<1>(); constexpr detail::lsc_data_order _Transposed = detail::lsc_data_order::nontranspose; - constexpr detail::lsc_atomic_op _Op = detail::to_lsc_atomic_op(); using _MsgT = typename detail::lsc_expand_type::type; __ESIMD_NS::simd<_MsgT, N> Tmp = __esimd_lsc_xatomic_slm_1<_MsgT, _Op, cache_hint::none, cache_hint::none, @@ -1243,7 +1269,9 @@ lsc_slm_atomic_update(__ESIMD_NS::simd offsets, __ESIMD_NS::simd_mask pred) { detail::check_lsc_vector_size<1>(); detail::check_lsc_data_size(); - detail::check_lsc_atomic(); + constexpr __ESIMD_NS::native::lsc::atomic_op _Op = + __ESIMD_DNS::to_lsc_atomic_op(); + __ESIMD_EDNS::check_lsc_atomic<_Op, T, N, 2>(); constexpr uint16_t _AddressScale = 1; constexpr int _ImmOffset = 0; constexpr lsc_data_size _DS = @@ -1251,7 +1279,6 @@ lsc_slm_atomic_update(__ESIMD_NS::simd offsets, constexpr detail::lsc_vector_size _VS = detail::to_lsc_vector_size<1>(); constexpr detail::lsc_data_order _Transposed = detail::lsc_data_order::nontranspose; - constexpr detail::lsc_atomic_op _Op = detail::to_lsc_atomic_op(); using _MsgT = typename detail::lsc_expand_type::type; __ESIMD_NS::simd<_MsgT, N> Tmp = __esimd_lsc_xatomic_slm_2<_MsgT, _Op, cache_hint::none, cache_hint::none, @@ -1290,7 +1317,9 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd offsets, #else detail::check_lsc_vector_size<1>(); detail::check_lsc_data_size(); - detail::check_lsc_atomic(); + constexpr __ESIMD_NS::native::lsc::atomic_op _Op = + __ESIMD_DNS::to_lsc_atomic_op(); + __ESIMD_EDNS::check_lsc_atomic<_Op, T, N, 0>(); detail::check_lsc_cache_hint(); constexpr uint16_t _AddressScale = 1; constexpr int _ImmOffset = 0; @@ -1299,9 +1328,8 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd offsets, constexpr detail::lsc_vector_size _VS = detail::to_lsc_vector_size<1>(); constexpr detail::lsc_data_order _Transposed = detail::lsc_data_order::nontranspose; - constexpr detail::lsc_atomic_op _Op = detail::to_lsc_atomic_op(); using _MsgT = typename detail::lsc_expand_type::type; - auto si = __ESIMD_GET_SURF_HANDLE(acc); + auto si = __ESIMD_NS::get_surface_index(acc); __ESIMD_NS::simd<_MsgT, N> Tmp = __esimd_lsc_xatomic_bti_0<_MsgT, _Op, L1H, L3H, _AddressScale, _ImmOffset, _DS, _VS, _Transposed, N>(pred.data(), @@ -1340,7 +1368,9 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd offsets, #else detail::check_lsc_vector_size<1>(); detail::check_lsc_data_size(); - detail::check_lsc_atomic(); + constexpr __ESIMD_NS::native::lsc::atomic_op _Op = + __ESIMD_DNS::to_lsc_atomic_op(); + __ESIMD_EDNS::check_lsc_atomic<_Op, T, N, 1>(); detail::check_lsc_cache_hint(); constexpr uint16_t _AddressScale = 1; constexpr int _ImmOffset = 0; @@ -1349,9 +1379,8 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd offsets, constexpr detail::lsc_vector_size _VS = detail::to_lsc_vector_size<1>(); constexpr detail::lsc_data_order _Transposed = detail::lsc_data_order::nontranspose; - constexpr detail::lsc_atomic_op _Op = detail::to_lsc_atomic_op(); using _MsgT = typename detail::lsc_expand_type::type; - auto si = __ESIMD_GET_SURF_HANDLE(acc); + auto si = __ESIMD_NS::get_surface_index(acc); __ESIMD_NS::simd<_MsgT, N> Tmp = __esimd_lsc_xatomic_bti_1<_MsgT, _Op, L1H, L3H, _AddressScale, _ImmOffset, _DS, _VS, _Transposed, N>( @@ -1392,7 +1421,9 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd offsets, #else detail::check_lsc_vector_size<1>(); detail::check_lsc_data_size(); - detail::check_lsc_atomic(); + constexpr __ESIMD_NS::native::lsc::atomic_op _Op = + __ESIMD_DNS::to_lsc_atomic_op(); + __ESIMD_EDNS::check_lsc_atomic<_Op, T, N, 2>(); detail::check_lsc_cache_hint(); constexpr uint16_t _AddressScale = 1; constexpr int _ImmOffset = 0; @@ -1401,9 +1432,8 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd offsets, constexpr detail::lsc_vector_size _VS = detail::to_lsc_vector_size<1>(); constexpr detail::lsc_data_order _Transposed = detail::lsc_data_order::nontranspose; - constexpr detail::lsc_atomic_op _Op = detail::to_lsc_atomic_op(); using _MsgT = typename detail::lsc_expand_type::type; - auto si = __ESIMD_GET_SURF_HANDLE(acc); + auto si = __ESIMD_NS::get_surface_index(acc); __ESIMD_NS::simd<_MsgT, N> Tmp = __esimd_lsc_xatomic_bti_2<_MsgT, _Op, L1H, L3H, _AddressScale, _ImmOffset, _DS, _VS, _Transposed, N>( @@ -1434,7 +1464,9 @@ lsc_atomic_update(T *p, __ESIMD_NS::simd offsets, __ESIMD_NS::simd_mask pred) { detail::check_lsc_vector_size<1>(); detail::check_lsc_data_size(); - detail::check_lsc_atomic(); + constexpr __ESIMD_NS::native::lsc::atomic_op _Op = + __ESIMD_DNS::to_lsc_atomic_op(); + __ESIMD_EDNS::check_lsc_atomic<_Op, T, N, 0>(); detail::check_lsc_cache_hint(); constexpr uint16_t _AddressScale = 1; constexpr int _ImmOffset = 0; @@ -1443,7 +1475,6 @@ lsc_atomic_update(T *p, __ESIMD_NS::simd offsets, constexpr detail::lsc_vector_size _VS = detail::to_lsc_vector_size<1>(); constexpr detail::lsc_data_order _Transposed = detail::lsc_data_order::nontranspose; - constexpr detail::lsc_atomic_op _Op = detail::to_lsc_atomic_op(); using _MsgT = typename detail::lsc_expand_type::type; __ESIMD_NS::simd addrs = reinterpret_cast(p); addrs += convert(offsets); @@ -1477,7 +1508,9 @@ lsc_atomic_update(T *p, __ESIMD_NS::simd offsets, __ESIMD_NS::simd src0, __ESIMD_NS::simd_mask pred) { detail::check_lsc_vector_size<1>(); detail::check_lsc_data_size(); - detail::check_lsc_atomic(); + constexpr __ESIMD_NS::native::lsc::atomic_op _Op = + __ESIMD_DNS::to_lsc_atomic_op(); + __ESIMD_EDNS::check_lsc_atomic<_Op, T, N, 1>(); detail::check_lsc_cache_hint(); constexpr uint16_t _AddressScale = 1; constexpr int _ImmOffset = 0; @@ -1486,7 +1519,6 @@ lsc_atomic_update(T *p, __ESIMD_NS::simd offsets, constexpr detail::lsc_vector_size _VS = detail::to_lsc_vector_size<1>(); constexpr detail::lsc_data_order _Transposed = detail::lsc_data_order::nontranspose; - constexpr detail::lsc_atomic_op _Op = detail::to_lsc_atomic_op(); using _MsgT = typename detail::lsc_expand_type::type; __ESIMD_NS::simd addrs = reinterpret_cast(p); addrs += convert(offsets); @@ -1522,7 +1554,9 @@ lsc_atomic_update(T *p, __ESIMD_NS::simd offsets, __ESIMD_NS::simd_mask pred) { detail::check_lsc_vector_size<1>(); detail::check_lsc_data_size(); - detail::check_lsc_atomic(); + constexpr __ESIMD_NS::native::lsc::atomic_op _Op = + __ESIMD_DNS::to_lsc_atomic_op(); + __ESIMD_EDNS::check_lsc_atomic<_Op, T, N, 2>(); detail::check_lsc_cache_hint(); constexpr uint16_t _AddressScale = 1; constexpr int _ImmOffset = 0; @@ -1531,7 +1565,6 @@ lsc_atomic_update(T *p, __ESIMD_NS::simd offsets, constexpr detail::lsc_vector_size _VS = detail::to_lsc_vector_size<1>(); constexpr detail::lsc_data_order _Transposed = detail::lsc_data_order::nontranspose; - constexpr detail::lsc_atomic_op _Op = detail::to_lsc_atomic_op(); using _MsgT = typename detail::lsc_expand_type::type; __ESIMD_NS::simd addrs = reinterpret_cast(p); addrs += convert(offsets); @@ -1563,8 +1596,40 @@ __ESIMD_API void lsc_fence(__ESIMD_NS::simd_mask pred = 1) { /// @} sycl_esimd_memory_lsc -#undef __ESIMD_GET_SURF_HANDLE +} // namespace esimd +} // namespace experimental + +namespace esimd { + +/// LSC version of no argument variant of the \c atomic_update - accepts +/// native::lsc::atomic_op instead of atomic_op as atomic +/// operation template argument. +template +__ESIMD_API simd atomic_update(T *p, simd offset, + simd_mask mask) { + return __ESIMD_ENS::lsc_atomic_update(), T, N>( + p, offset, mask); +} + +/// LSC version of the single-argument atomic update. +template +__ESIMD_API simd atomic_update(T *p, simd offset, + simd src0, simd_mask mask) { + return __ESIMD_ENS::lsc_atomic_update(), T, N>( + p, offset, src0, mask); +} + +/// LSC version of the two-argument atomic update. +template +__ESIMD_API simd atomic_update(T *p, simd offset, + simd src0, simd src1, + simd_mask mask) { + return __ESIMD_ENS::lsc_atomic_update(), T, N>( + p, offset, src0, src1, mask); +} -} // namespace ext::intel::experimental::esimd +} // namespace esimd +} // namespace intel +} // namespace ext } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/test/esimd/enums.cpp b/sycl/test/esimd/enums.cpp index 2f28ed25fb22d..934689cc0529f 100644 --- a/sycl/test/esimd/enums.cpp +++ b/sycl/test/esimd/enums.cpp @@ -3,7 +3,7 @@ // This test checks compilation of various ESIMD enum types. Those which are // deprecated must produce deprecation messages. -#include +#include using namespace sycl::ext::intel::esimd; using namespace sycl::ext::intel::experimental::esimd; diff --git a/sycl/test/esimd/lsc.cpp b/sycl/test/esimd/lsc.cpp index 68ae0ae608ca2..7f622162bbff3 100644 --- a/sycl/test/esimd/lsc.cpp +++ b/sycl/test/esimd/lsc.cpp @@ -5,18 +5,20 @@ // Checks ESIMD intrinsic translation. // NOTE: must be run in -O0, as optimizer optimizes away some of the code -#include #include #include using namespace sycl::ext::intel::esimd; using namespace sycl::ext::intel::experimental::esimd; -SYCL_ESIMD_FUNCTION SYCL_EXTERNAL void foo(); +using AccType = sycl::accessor; + +SYCL_ESIMD_FUNCTION SYCL_EXTERNAL void foo(AccType &); class EsimdFunctor { public: - void operator()() __attribute__((sycl_explicit_simd)) { foo(); } + AccType acc; + void operator()() __attribute__((sycl_explicit_simd)) { foo(acc); } }; template @@ -24,12 +26,12 @@ __attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } -void bar() { - EsimdFunctor esimdf; +void bar(AccType &acc) { + EsimdFunctor esimdf{acc}; kernel(esimdf); } -SYCL_ESIMD_FUNCTION SYCL_EXTERNAL void foo() { +SYCL_ESIMD_FUNCTION SYCL_EXTERNAL void foo(AccType &acc) { constexpr int VL = 4; int *ptr = 0; uintptr_t addr = reinterpret_cast(ptr); @@ -57,7 +59,6 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL void foo() { lsc_prefetch(ptr, offsets); - sycl::accessor acc; uint32_t surf_offset = 1 * VL * sizeof(int); // CHECK: call void @llvm.genx.lsc.store.bti.v1i1.v1i32.v4i32(<1 x i1> {{[^)]+}}, i8 4, i8 0, i8 0, i16 1, i32 0, i8 3, i8 4, i8 2, i8 0, <1 x i32> {{[^)]+}}, <4 x i32> {{[^)]+}}, i32 {{[^)]+}})