From 8c8ab7e7b2195fc3d0754018df5b7d692d05ecb0 Mon Sep 17 00:00:00 2001 From: "jack.kirk" Date: Thu, 21 Apr 2022 18:21:55 +0100 Subject: [PATCH 01/34] Working marray math impls including sycl:: math/native/half_precision/experimental cases. removed marray from "floating_list" Signed-off-by: jack.kirk --- sycl/include/CL/sycl/builtins.hpp | 242 ++++++++++++++++++ .../CL/sycl/detail/generic_type_lists.hpp | 9 +- .../sycl/ext/oneapi/experimental/builtins.hpp | 50 ++++ 3 files changed, 295 insertions(+), 6 deletions(-) diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index 0a9814da3eed0..12c00e4b406dc 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -32,6 +32,135 @@ detail::enable_if_t::value, T> acos(T x) __NOEXC { return __sycl_std::__invoke_acos(x); } +#define __SYCL_MATH_FUNCTION_OVERLOAD(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE std::enable_if_t< \ + std::is_same::value || std::is_same::value || \ + std::is_same::value, \ + sycl::marray> \ + NAME(sycl::marray x) __NOEXC { \ + sycl::marray res; \ + auto x_vec2 = reinterpret_cast const *>(&x); \ + auto res_vec2 = reinterpret_cast *>(&res); \ + for (size_t i = 0; i < N / 2; i++) { \ + res_vec2[i] = __sycl_std::__invoke_##NAME>(x_vec2[i]); \ + } \ + if (N % 2) { \ + res[N - 1] = __sycl_std::__invoke_##NAME(x[N - 1]); \ + } \ + return res; \ + } + +__SYCL_MATH_FUNCTION_OVERLOAD(sin) +__SYCL_MATH_FUNCTION_OVERLOAD(cos) +__SYCL_MATH_FUNCTION_OVERLOAD(tan) +__SYCL_MATH_FUNCTION_OVERLOAD(cospi) +__SYCL_MATH_FUNCTION_OVERLOAD(sinpi) +__SYCL_MATH_FUNCTION_OVERLOAD(tanpi) +__SYCL_MATH_FUNCTION_OVERLOAD(sinh) +__SYCL_MATH_FUNCTION_OVERLOAD(cosh) +__SYCL_MATH_FUNCTION_OVERLOAD(tanh) +__SYCL_MATH_FUNCTION_OVERLOAD(asin) +__SYCL_MATH_FUNCTION_OVERLOAD(acos) +__SYCL_MATH_FUNCTION_OVERLOAD(atan) +__SYCL_MATH_FUNCTION_OVERLOAD(asinpi) +__SYCL_MATH_FUNCTION_OVERLOAD(acospi) +__SYCL_MATH_FUNCTION_OVERLOAD(atanpi) +__SYCL_MATH_FUNCTION_OVERLOAD(asinh) +__SYCL_MATH_FUNCTION_OVERLOAD(acosh) +__SYCL_MATH_FUNCTION_OVERLOAD(atanh) +__SYCL_MATH_FUNCTION_OVERLOAD(cbrt) +__SYCL_MATH_FUNCTION_OVERLOAD(ceil) +__SYCL_MATH_FUNCTION_OVERLOAD(floor) +__SYCL_MATH_FUNCTION_OVERLOAD(erfc) +__SYCL_MATH_FUNCTION_OVERLOAD(erf) +__SYCL_MATH_FUNCTION_OVERLOAD(exp) +__SYCL_MATH_FUNCTION_OVERLOAD(exp2) +__SYCL_MATH_FUNCTION_OVERLOAD(exp10) +__SYCL_MATH_FUNCTION_OVERLOAD(expm1) +__SYCL_MATH_FUNCTION_OVERLOAD(tgamma) +__SYCL_MATH_FUNCTION_OVERLOAD(lgamma) +__SYCL_MATH_FUNCTION_OVERLOAD(log) +__SYCL_MATH_FUNCTION_OVERLOAD(log2) +__SYCL_MATH_FUNCTION_OVERLOAD(log10) +__SYCL_MATH_FUNCTION_OVERLOAD(log1p) +__SYCL_MATH_FUNCTION_OVERLOAD(logb) +__SYCL_MATH_FUNCTION_OVERLOAD(rint) +__SYCL_MATH_FUNCTION_OVERLOAD(round) +__SYCL_MATH_FUNCTION_OVERLOAD(sqrt) +__SYCL_MATH_FUNCTION_OVERLOAD(rsqrt) +__SYCL_MATH_FUNCTION_OVERLOAD(trunc) + +#undef __SYCL_MATH_FUNCTION_OVERLOAD + +#define __SYCL_MATH_FUNCTION_2_OVERLOAD(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE std::enable_if_t< \ + std::is_same::value || std::is_same::value || \ + std::is_same::value, \ + sycl::marray> \ + NAME(sycl::marray x, sycl::marray y) __NOEXC { \ + sycl::marray res; \ + auto x_vec2 = reinterpret_cast const *>(&x); \ + auto y_vec2 = reinterpret_cast const *>(&y); \ + auto res_vec2 = reinterpret_cast *>(&res); \ + for (size_t i = 0; i < N / 2; i++) { \ + res_vec2[i] = \ + __sycl_std::__invoke_##NAME>(x_vec2[i], y_vec2[i]); \ + } \ + if (N % 2) { \ + res[N - 1] = __sycl_std::__invoke_##NAME(x[N - 1], y[N - 1]); \ + } \ + return res; \ + } + +__SYCL_MATH_FUNCTION_2_OVERLOAD(atan2) +__SYCL_MATH_FUNCTION_2_OVERLOAD(atan2pi) +__SYCL_MATH_FUNCTION_2_OVERLOAD(copysign) +__SYCL_MATH_FUNCTION_2_OVERLOAD(fdim) +__SYCL_MATH_FUNCTION_2_OVERLOAD(fmin) +__SYCL_MATH_FUNCTION_2_OVERLOAD(fmax) +__SYCL_MATH_FUNCTION_2_OVERLOAD(fmod) +__SYCL_MATH_FUNCTION_2_OVERLOAD(hypot) +__SYCL_MATH_FUNCTION_2_OVERLOAD(maxmag) +__SYCL_MATH_FUNCTION_2_OVERLOAD(minmag) +__SYCL_MATH_FUNCTION_2_OVERLOAD(nextafter) +__SYCL_MATH_FUNCTION_2_OVERLOAD(pow) +__SYCL_MATH_FUNCTION_2_OVERLOAD(powr) +__SYCL_MATH_FUNCTION_2_OVERLOAD(remainder) + +#undef __SYCL_MATH_FUNCTION_2_OVERLOAD + +#define __SYCL_MATH_FUNCTION_3_OVERLOAD(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE std::enable_if_t< \ + std::is_same::value || std::is_same::value || \ + std::is_same::value, \ + sycl::marray> \ + NAME(sycl::marray x, sycl::marray y, sycl::marray z) \ + __NOEXC { \ + sycl::marray res; \ + auto x_vec2 = reinterpret_cast const *>(&x); \ + auto y_vec2 = reinterpret_cast const *>(&y); \ + auto z_vec2 = reinterpret_cast const *>(&z); \ + auto res_vec2 = reinterpret_cast *>(&res); \ + for (size_t i = 0; i < N / 2; i++) { \ + res_vec2[i] = __sycl_std::__invoke_##NAME>( \ + x_vec2[i], y_vec2[i], z_vec2[i]); \ + } \ + if (N % 2) { \ + res[N - 1] = \ + __sycl_std::__invoke_##NAME(x[N - 1], y[N - 1], z[N - 1]); \ + } \ + return res; \ + } + +__SYCL_MATH_FUNCTION_3_OVERLOAD(mad) +__SYCL_MATH_FUNCTION_3_OVERLOAD(mix) +__SYCL_MATH_FUNCTION_3_OVERLOAD(fma) + +#undef __SYCL_MATH_FUNCTION_3_OVERLOAD + // genfloat acosh (genfloat x) template detail::enable_if_t::value, T> acosh(T x) __NOEXC { @@ -1381,6 +1510,63 @@ select(T a, T b, T2 c) __NOEXC { namespace native { /* ----------------- 4.13.3 Math functions. ---------------------------------*/ // genfloatf cos (genfloatf x) + +#define __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE sycl::marray NAME( \ + sycl::marray x) __NOEXC { \ + sycl::marray res; \ + auto x_vec2 = reinterpret_cast const *>(&x); \ + auto res_vec2 = reinterpret_cast *>(&res); \ + for (size_t i = 0; i < N / 2; i++) { \ + res_vec2[i] = \ + __sycl_std::__invoke_native_##NAME>(x_vec2[i]); \ + } \ + if (N % 2) { \ + res[N - 1] = __sycl_std::__invoke_native_##NAME(x[N - 1]); \ + } \ + return res; \ + } + +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(sin) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(cos) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(tan) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(exp) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(exp2) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(exp10) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(log) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(log2) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(log10) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(sqrt) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(rsqrt) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(recip) + +#undef __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD + +#define __SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE sycl::marray NAME( \ + sycl::marray x, sycl::marray y) __NOEXC { \ + sycl::marray res; \ + auto x_vec2 = reinterpret_cast const *>(&x); \ + auto y_vec2 = reinterpret_cast const *>(&y); \ + auto res_vec2 = reinterpret_cast *>(&res); \ + for (size_t i = 0; i < N / 2; i++) { \ + res_vec2[i] = __sycl_std::__invoke_native_##NAME>( \ + x_vec2[i], y_vec2[i]); \ + } \ + if (N % 2) { \ + res[N - 1] = \ + __sycl_std::__invoke_native_##NAME(x[N - 1], y[N - 1]); \ + } \ + return res; \ + } + +__SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD(divide) +__SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD(powr) + +#undef __SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD + template detail::enable_if_t::value, T> cos(T x) __NOEXC { return __sycl_std::__invoke_native_cos(x); @@ -1468,6 +1654,62 @@ detail::enable_if_t::value, T> tan(T x) __NOEXC { } // namespace native namespace half_precision { /* ----------------- 4.13.3 Math functions. ---------------------------------*/ +#define __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE sycl::marray NAME( \ + sycl::marray x) __NOEXC { \ + sycl::marray res; \ + auto x_vec2 = reinterpret_cast const *>(&x); \ + auto res_vec2 = reinterpret_cast *>(&res); \ + for (size_t i = 0; i < N / 2; i++) { \ + res_vec2[i] = \ + __sycl_std::__invoke_half_##NAME>(x_vec2[i]); \ + } \ + if (N % 2) { \ + res[N - 1] = __sycl_std::__invoke_half_##NAME(x[N - 1]); \ + } \ + return res; \ + } + +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(sin) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(cos) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(tan) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(exp) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(exp2) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(exp10) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(log) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(log2) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(log10) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(sqrt) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(rsqrt) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(recip) + +#undef __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD + +#define __SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE sycl::marray NAME( \ + sycl::marray x, sycl::marray y) __NOEXC { \ + sycl::marray res; \ + auto x_vec2 = reinterpret_cast const *>(&x); \ + auto y_vec2 = reinterpret_cast const *>(&y); \ + auto res_vec2 = reinterpret_cast *>(&res); \ + for (size_t i = 0; i < N / 2; i++) { \ + res_vec2[i] = __sycl_std::__invoke_half_##NAME>( \ + x_vec2[i], y_vec2[i]); \ + } \ + if (N % 2) { \ + res[N - 1] = \ + __sycl_std::__invoke_half_##NAME(x[N - 1], y[N - 1]); \ + } \ + return res; \ + } + +__SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD(divide) +__SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD(powr) + +#undef __SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD + // genfloatf cos (genfloatf x) template detail::enable_if_t::value, T> cos(T x) __NOEXC { diff --git a/sycl/include/CL/sycl/detail/generic_type_lists.hpp b/sycl/include/CL/sycl/detail/generic_type_lists.hpp index 7182e9f79f22e..4790a03b901d3 100644 --- a/sycl/include/CL/sycl/detail/generic_type_lists.hpp +++ b/sycl/include/CL/sycl/detail/generic_type_lists.hpp @@ -45,8 +45,7 @@ using marray_half_list = type_list, marray, marray, marray, marray, marray>; -using half_list = - type_list; +using half_list = type_list; using scalar_float_list = type_list; @@ -58,8 +57,7 @@ using marray_float_list = type_list, marray, marray, marray, marray, marray>; -using float_list = - type_list; +using float_list = type_list; using scalar_double_list = type_list; @@ -83,8 +81,7 @@ using vector_floating_list = using marray_floating_list = type_list; -using floating_list = - type_list; +using floating_list = type_list; // geometric floating point types using scalar_geo_half_list = type_list; diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index c8fa033d8c79e..6b0bf7f01b313 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -100,6 +100,32 @@ inline __SYCL_ALWAYS_INLINE #endif } +template +inline __SYCL_ALWAYS_INLINE std::enable_if_t::value || + std::is_same::value, + sycl::marray> +tanh(sycl::marray x) __NOEXC { + sycl::marray res; + auto x_vec2 = reinterpret_cast const *>(&x); + auto res_vec2 = reinterpret_cast *>(&res); +#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) + for (size_t i = 0; i < N / 2; i++) { + res_vec2[i] = __clc_native_tanh(x_vec2[i]); + } + if constexpr (N % 2) { + res[N - 1] = __clc_native_tanh(x[N - 1]); + } +#else + for (size_t i = 0; i < N / 2; i++) { + res_vec2[i] = __sycl_std::__invoke_tanh>(x_vec2[i]); + } + if constexpr (N % 2) { + res[N - 1] = __sycl_std::__invoke_tanh(x[N - 1]); + } +#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) + return res; +} + // genfloath exp2 (genfloath x) template inline __SYCL_ALWAYS_INLINE @@ -115,6 +141,30 @@ inline __SYCL_ALWAYS_INLINE #endif } +template +inline __SYCL_ALWAYS_INLINE sycl::marray +exp2(sycl::marray x) __NOEXC { + sycl::marray res; + auto x_vec2 = reinterpret_cast const *>(&x); + auto res_vec2 = reinterpret_cast *>(&res); +#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) + for (size_t i = 0; i < N / 2; i++) { + res_vec2[i] = __clc_native_exp2(x_vec2[i]); + } + if constexpr (N % 2) { + res[N - 1] = __clc_native_exp2(x[N - 1]); + } +#else + for (size_t i = 0; i < N / 2; i++) { + res_vec2[i] = __sycl_std::__invoke_exp2>(x_vec2[i]); + } + if constexpr (N % 2) { + res[N - 1] = __sycl_std::__invoke_exp2(x[N - 1]); + } +#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) + return res; +} + } // namespace native } // namespace experimental From 477d0795bab6d84227299f9b0c23f6c04e078085 Mon Sep 17 00:00:00 2001 From: "jack.kirk" Date: Wed, 11 May 2022 15:36:17 +0100 Subject: [PATCH 02/34] introduced scalar_vector lists not including marray. Signed-off-by: jack.kirk --- sycl/include/CL/sycl/builtins.hpp | 246 +++++++++--------- .../CL/sycl/detail/generic_type_lists.hpp | 14 +- .../CL/sycl/detail/generic_type_traits.hpp | 8 + .../sycl/ext/oneapi/experimental/builtins.hpp | 6 +- 4 files changed, 145 insertions(+), 129 deletions(-) diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index 12c00e4b406dc..46e6a59e2a200 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -28,7 +28,7 @@ namespace __sycl_std = __host_std; /* ----------------- 4.13.3 Math functions. ---------------------------------*/ // genfloat acos (genfloat x) template -detail::enable_if_t::value, T> acos(T x) __NOEXC { +detail::enable_if_t::value, T> acos(T x) __NOEXC { return __sycl_std::__invoke_acos(x); } @@ -163,166 +163,166 @@ __SYCL_MATH_FUNCTION_3_OVERLOAD(fma) // genfloat acosh (genfloat x) template -detail::enable_if_t::value, T> acosh(T x) __NOEXC { +detail::enable_if_t::value, T> acosh(T x) __NOEXC { return __sycl_std::__invoke_acosh(x); } // genfloat acospi (genfloat x) template -detail::enable_if_t::value, T> acospi(T x) __NOEXC { +detail::enable_if_t::value, T> acospi(T x) __NOEXC { return __sycl_std::__invoke_acospi(x); } // genfloat asin (genfloat x) template -detail::enable_if_t::value, T> asin(T x) __NOEXC { +detail::enable_if_t::value, T> asin(T x) __NOEXC { return __sycl_std::__invoke_asin(x); } // genfloat asinh (genfloat x) template -detail::enable_if_t::value, T> asinh(T x) __NOEXC { +detail::enable_if_t::value, T> asinh(T x) __NOEXC { return __sycl_std::__invoke_asinh(x); } // genfloat asinpi (genfloat x) template -detail::enable_if_t::value, T> asinpi(T x) __NOEXC { +detail::enable_if_t::value, T> asinpi(T x) __NOEXC { return __sycl_std::__invoke_asinpi(x); } // genfloat atan (genfloat y_over_x) template -detail::enable_if_t::value, T> atan(T y_over_x) __NOEXC { +detail::enable_if_t::value, T> atan(T y_over_x) __NOEXC { return __sycl_std::__invoke_atan(y_over_x); } // genfloat atan2 (genfloat y, genfloat x) template -detail::enable_if_t::value, T> atan2(T y, T x) __NOEXC { +detail::enable_if_t::value, T> atan2(T y, T x) __NOEXC { return __sycl_std::__invoke_atan2(y, x); } // genfloat atanh (genfloat x) template -detail::enable_if_t::value, T> atanh(T x) __NOEXC { +detail::enable_if_t::value, T> atanh(T x) __NOEXC { return __sycl_std::__invoke_atanh(x); } // genfloat atanpi (genfloat x) template -detail::enable_if_t::value, T> atanpi(T x) __NOEXC { +detail::enable_if_t::value, T> atanpi(T x) __NOEXC { return __sycl_std::__invoke_atanpi(x); } // genfloat atan2pi (genfloat y, genfloat x) template -detail::enable_if_t::value, T> atan2pi(T y, +detail::enable_if_t::value, T> atan2pi(T y, T x) __NOEXC { return __sycl_std::__invoke_atan2pi(y, x); } // genfloat cbrt (genfloat x) template -detail::enable_if_t::value, T> cbrt(T x) __NOEXC { +detail::enable_if_t::value, T> cbrt(T x) __NOEXC { return __sycl_std::__invoke_cbrt(x); } // genfloat ceil (genfloat x) template -detail::enable_if_t::value, T> ceil(T x) __NOEXC { +detail::enable_if_t::value, T> ceil(T x) __NOEXC { return __sycl_std::__invoke_ceil(x); } // genfloat copysign (genfloat x, genfloat y) template -detail::enable_if_t::value, T> copysign(T x, +detail::enable_if_t::value, T> copysign(T x, T y) __NOEXC { return __sycl_std::__invoke_copysign(x, y); } // genfloat cos (genfloat x) template -detail::enable_if_t::value, T> cos(T x) __NOEXC { +detail::enable_if_t::value, T> cos(T x) __NOEXC { return __sycl_std::__invoke_cos(x); } // genfloat cosh (genfloat x) template -detail::enable_if_t::value, T> cosh(T x) __NOEXC { +detail::enable_if_t::value, T> cosh(T x) __NOEXC { return __sycl_std::__invoke_cosh(x); } // genfloat cospi (genfloat x) template -detail::enable_if_t::value, T> cospi(T x) __NOEXC { +detail::enable_if_t::value, T> cospi(T x) __NOEXC { return __sycl_std::__invoke_cospi(x); } // genfloat erfc (genfloat x) template -detail::enable_if_t::value, T> erfc(T x) __NOEXC { +detail::enable_if_t::value, T> erfc(T x) __NOEXC { return __sycl_std::__invoke_erfc(x); } // genfloat erf (genfloat x) template -detail::enable_if_t::value, T> erf(T x) __NOEXC { +detail::enable_if_t::value, T> erf(T x) __NOEXC { return __sycl_std::__invoke_erf(x); } // genfloat exp (genfloat x ) template -detail::enable_if_t::value, T> exp(T x) __NOEXC { +detail::enable_if_t::value, T> exp(T x) __NOEXC { return __sycl_std::__invoke_exp(x); } // genfloat exp2 (genfloat x) template -detail::enable_if_t::value, T> exp2(T x) __NOEXC { +detail::enable_if_t::value, T> exp2(T x) __NOEXC { return __sycl_std::__invoke_exp2(x); } // genfloat exp10 (genfloat x) template -detail::enable_if_t::value, T> exp10(T x) __NOEXC { +detail::enable_if_t::value, T> exp10(T x) __NOEXC { return __sycl_std::__invoke_exp10(x); } // genfloat expm1 (genfloat x) template -detail::enable_if_t::value, T> expm1(T x) __NOEXC { +detail::enable_if_t::value, T> expm1(T x) __NOEXC { return __sycl_std::__invoke_expm1(x); } // genfloat fabs (genfloat x) template -detail::enable_if_t::value, T> fabs(T x) __NOEXC { +detail::enable_if_t::value, T> fabs(T x) __NOEXC { return __sycl_std::__invoke_fabs(x); } // genfloat fdim (genfloat x, genfloat y) template -detail::enable_if_t::value, T> fdim(T x, T y) __NOEXC { +detail::enable_if_t::value, T> fdim(T x, T y) __NOEXC { return __sycl_std::__invoke_fdim(x, y); } // genfloat floor (genfloat x) template -detail::enable_if_t::value, T> floor(T x) __NOEXC { +detail::enable_if_t::value, T> floor(T x) __NOEXC { return __sycl_std::__invoke_floor(x); } // genfloat fma (genfloat a, genfloat b, genfloat c) template -detail::enable_if_t::value, T> fma(T a, T b, +detail::enable_if_t::value, T> fma(T a, T b, T c) __NOEXC { return __sycl_std::__invoke_fma(a, b, c); } // genfloat fmax (genfloat x, genfloat y) template -detail::enable_if_t::value, T> fmax(T x, T y) __NOEXC { +detail::enable_if_t::value, T> fmax(T x, T y) __NOEXC { return __sycl_std::__invoke_fmax(x, y); } @@ -335,7 +335,7 @@ fmax(T x, typename T::element_type y) __NOEXC { // genfloat fmin (genfloat x, genfloat y) template -detail::enable_if_t::value, T> fmin(T x, T y) __NOEXC { +detail::enable_if_t::value, T> fmin(T x, T y) __NOEXC { return __sycl_std::__invoke_fmin(x, y); } @@ -348,14 +348,14 @@ fmin(T x, typename T::element_type y) __NOEXC { // genfloat fmod (genfloat x, genfloat y) template -detail::enable_if_t::value, T> fmod(T x, T y) __NOEXC { +detail::enable_if_t::value, T> fmod(T x, T y) __NOEXC { return __sycl_std::__invoke_fmod(x, y); } // genfloat fract (genfloat x, genfloatptr iptr) template detail::enable_if_t< - detail::is_genfloat::value && detail::is_genfloatptr::value, T> + detail::is_svgenfloat::value && detail::is_genfloatptr::value, T> fract(T x, T2 iptr) __NOEXC { detail::check_vector_size(); return __sycl_std::__invoke_fract(x, iptr); @@ -364,7 +364,7 @@ fract(T x, T2 iptr) __NOEXC { // genfloat frexp (genfloat x, genintptr exp) template detail::enable_if_t< - detail::is_genfloat::value && detail::is_genintptr::value, T> + detail::is_svgenfloat::value && detail::is_genintptr::value, T> frexp(T x, T2 exp) __NOEXC { detail::check_vector_size(); return __sycl_std::__invoke_frexp(x, exp); @@ -372,13 +372,13 @@ frexp(T x, T2 exp) __NOEXC { // genfloat hypot (genfloat x, genfloat y) template -detail::enable_if_t::value, T> hypot(T x, T y) __NOEXC { +detail::enable_if_t::value, T> hypot(T x, T y) __NOEXC { return __sycl_std::__invoke_hypot(x, y); } // genint ilogb (genfloat x) template ::value, T>> + typename = detail::enable_if_t::value, T>> detail::change_base_type_t ilogb(T x) __NOEXC { return __sycl_std::__invoke_ilogb>(x); } @@ -410,14 +410,14 @@ ldexp(T x, T2 k) __NOEXC { // genfloat lgamma (genfloat x) template -detail::enable_if_t::value, T> lgamma(T x) __NOEXC { +detail::enable_if_t::value, T> lgamma(T x) __NOEXC { return __sycl_std::__invoke_lgamma(x); } // genfloat lgamma_r (genfloat x, genintptr signp) template detail::enable_if_t< - detail::is_genfloat::value && detail::is_genintptr::value, T> + detail::is_svgenfloat::value && detail::is_genintptr::value, T> lgamma_r(T x, T2 signp) __NOEXC { detail::check_vector_size(); return __sycl_std::__invoke_lgamma_r(x, signp); @@ -425,57 +425,57 @@ lgamma_r(T x, T2 signp) __NOEXC { // genfloat log (genfloat x) template -detail::enable_if_t::value, T> log(T x) __NOEXC { +detail::enable_if_t::value, T> log(T x) __NOEXC { return __sycl_std::__invoke_log(x); } // genfloat log2 (genfloat x) template -detail::enable_if_t::value, T> log2(T x) __NOEXC { +detail::enable_if_t::value, T> log2(T x) __NOEXC { return __sycl_std::__invoke_log2(x); } // genfloat log10 (genfloat x) template -detail::enable_if_t::value, T> log10(T x) __NOEXC { +detail::enable_if_t::value, T> log10(T x) __NOEXC { return __sycl_std::__invoke_log10(x); } // genfloat log1p (genfloat x) template -detail::enable_if_t::value, T> log1p(T x) __NOEXC { +detail::enable_if_t::value, T> log1p(T x) __NOEXC { return __sycl_std::__invoke_log1p(x); } // genfloat logb (genfloat x) template -detail::enable_if_t::value, T> logb(T x) __NOEXC { +detail::enable_if_t::value, T> logb(T x) __NOEXC { return __sycl_std::__invoke_logb(x); } // genfloat mad (genfloat a, genfloat b, genfloat c) template -detail::enable_if_t::value, T> mad(T a, T b, +detail::enable_if_t::value, T> mad(T a, T b, T c) __NOEXC { return __sycl_std::__invoke_mad(a, b, c); } // genfloat maxmag (genfloat x, genfloat y) template -detail::enable_if_t::value, T> maxmag(T x, T y) __NOEXC { +detail::enable_if_t::value, T> maxmag(T x, T y) __NOEXC { return __sycl_std::__invoke_maxmag(x, y); } // genfloat minmag (genfloat x, genfloat y) template -detail::enable_if_t::value, T> minmag(T x, T y) __NOEXC { +detail::enable_if_t::value, T> minmag(T x, T y) __NOEXC { return __sycl_std::__invoke_minmag(x, y); } // genfloat modf (genfloat x, genfloatptr iptr) template detail::enable_if_t< - detail::is_genfloat::value && detail::is_genfloatptr::value, T> + detail::is_svgenfloat::value && detail::is_genfloatptr::value, T> modf(T x, T2 iptr) __NOEXC { detail::check_vector_size(); return __sycl_std::__invoke_modf(x, iptr); @@ -490,21 +490,21 @@ detail::nan_return_t nan(T nancode) __NOEXC { // genfloat nextafter (genfloat x, genfloat y) template -detail::enable_if_t::value, T> nextafter(T x, +detail::enable_if_t::value, T> nextafter(T x, T y) __NOEXC { return __sycl_std::__invoke_nextafter(x, y); } // genfloat pow (genfloat x, genfloat y) template -detail::enable_if_t::value, T> pow(T x, T y) __NOEXC { +detail::enable_if_t::value, T> pow(T x, T y) __NOEXC { return __sycl_std::__invoke_pow(x, y); } // genfloat pown (genfloat x, genint y) template detail::enable_if_t< - detail::is_genfloat::value && detail::is_genint::value, T> + detail::is_svgenfloat::value && detail::is_genint::value, T> pown(T x, T2 y) __NOEXC { detail::check_vector_size(); return __sycl_std::__invoke_pown(x, y); @@ -512,13 +512,13 @@ pown(T x, T2 y) __NOEXC { // genfloat powr (genfloat x, genfloat y) template -detail::enable_if_t::value, T> powr(T x, T y) __NOEXC { +detail::enable_if_t::value, T> powr(T x, T y) __NOEXC { return __sycl_std::__invoke_powr(x, y); } // genfloat remainder (genfloat x, genfloat y) template -detail::enable_if_t::value, T> remainder(T x, +detail::enable_if_t::value, T> remainder(T x, T y) __NOEXC { return __sycl_std::__invoke_remainder(x, y); } @@ -526,7 +526,7 @@ detail::enable_if_t::value, T> remainder(T x, // genfloat remquo (genfloat x, genfloat y, genintptr quo) template detail::enable_if_t< - detail::is_genfloat::value && detail::is_genintptr::value, T> + detail::is_svgenfloat::value && detail::is_genintptr::value, T> remquo(T x, T y, T2 quo) __NOEXC { detail::check_vector_size(); return __sycl_std::__invoke_remquo(x, y, quo); @@ -534,14 +534,14 @@ remquo(T x, T y, T2 quo) __NOEXC { // genfloat rint (genfloat x) template -detail::enable_if_t::value, T> rint(T x) __NOEXC { +detail::enable_if_t::value, T> rint(T x) __NOEXC { return __sycl_std::__invoke_rint(x); } // genfloat rootn (genfloat x, genint y) template detail::enable_if_t< - detail::is_genfloat::value && detail::is_genint::value, T> + detail::is_svgenfloat::value && detail::is_genint::value, T> rootn(T x, T2 y) __NOEXC { detail::check_vector_size(); return __sycl_std::__invoke_rootn(x, y); @@ -549,26 +549,26 @@ rootn(T x, T2 y) __NOEXC { // genfloat round (genfloat x) template -detail::enable_if_t::value, T> round(T x) __NOEXC { +detail::enable_if_t::value, T> round(T x) __NOEXC { return __sycl_std::__invoke_round(x); } // genfloat rsqrt (genfloat x) template -detail::enable_if_t::value, T> rsqrt(T x) __NOEXC { +detail::enable_if_t::value, T> rsqrt(T x) __NOEXC { return __sycl_std::__invoke_rsqrt(x); } // genfloat sin (genfloat x) template -detail::enable_if_t::value, T> sin(T x) __NOEXC { +detail::enable_if_t::value, T> sin(T x) __NOEXC { return __sycl_std::__invoke_sin(x); } // genfloat sincos (genfloat x, genfloatptr cosval) template detail::enable_if_t< - detail::is_genfloat::value && detail::is_genfloatptr::value, T> + detail::is_svgenfloat::value && detail::is_genfloatptr::value, T> sincos(T x, T2 cosval) __NOEXC { detail::check_vector_size(); return __sycl_std::__invoke_sincos(x, cosval); @@ -576,56 +576,56 @@ sincos(T x, T2 cosval) __NOEXC { // genfloat sinh (genfloat x) template -detail::enable_if_t::value, T> sinh(T x) __NOEXC { +detail::enable_if_t::value, T> sinh(T x) __NOEXC { return __sycl_std::__invoke_sinh(x); } // genfloat sinpi (genfloat x) template -detail::enable_if_t::value, T> sinpi(T x) __NOEXC { +detail::enable_if_t::value, T> sinpi(T x) __NOEXC { return __sycl_std::__invoke_sinpi(x); } // genfloat sqrt (genfloat x) template -detail::enable_if_t::value, T> sqrt(T x) __NOEXC { +detail::enable_if_t::value, T> sqrt(T x) __NOEXC { return __sycl_std::__invoke_sqrt(x); } // genfloat tan (genfloat x) template -detail::enable_if_t::value, T> tan(T x) __NOEXC { +detail::enable_if_t::value, T> tan(T x) __NOEXC { return __sycl_std::__invoke_tan(x); } // genfloat tanh (genfloat x) template -detail::enable_if_t::value, T> tanh(T x) __NOEXC { +detail::enable_if_t::value, T> tanh(T x) __NOEXC { return __sycl_std::__invoke_tanh(x); } // genfloat tanpi (genfloat x) template -detail::enable_if_t::value, T> tanpi(T x) __NOEXC { +detail::enable_if_t::value, T> tanpi(T x) __NOEXC { return __sycl_std::__invoke_tanpi(x); } // genfloat tgamma (genfloat x) template -detail::enable_if_t::value, T> tgamma(T x) __NOEXC { +detail::enable_if_t::value, T> tgamma(T x) __NOEXC { return __sycl_std::__invoke_tgamma(x); } // genfloat trunc (genfloat x) template -detail::enable_if_t::value, T> trunc(T x) __NOEXC { +detail::enable_if_t::value, T> trunc(T x) __NOEXC { return __sycl_std::__invoke_trunc(x); } /* --------------- 4.13.5 Common functions. ---------------------------------*/ // genfloat clamp (genfloat x, genfloat minval, genfloat maxval) template -detail::enable_if_t::value, T> clamp(T x, T minval, +detail::enable_if_t::value, T> clamp(T x, T minval, T maxval) __NOEXC { return __sycl_std::__invoke_fclamp(x, minval, maxval); } @@ -642,20 +642,20 @@ clamp(T x, typename T::element_type minval, // genfloat degrees (genfloat radians) template -detail::enable_if_t::value, T> +detail::enable_if_t::value, T> degrees(T radians) __NOEXC { return __sycl_std::__invoke_degrees(radians); } // genfloat abs (genfloat x) template -detail::enable_if_t::value, T> abs(T x) __NOEXC { +detail::enable_if_t::value, T> abs(T x) __NOEXC { return __sycl_std::__invoke_fabs(x); } // genfloat max (genfloat x, genfloat y) template -detail::enable_if_t::value, T>(max)(T x, T y) __NOEXC { +detail::enable_if_t::value, T>(max)(T x, T y) __NOEXC { return __sycl_std::__invoke_fmax_common(x, y); } @@ -670,7 +670,7 @@ detail::enable_if_t::value, T>(max)( // genfloat min (genfloat x, genfloat y) template -detail::enable_if_t::value, T>(min)(T x, T y) __NOEXC { +detail::enable_if_t::value, T>(min)(T x, T y) __NOEXC { return __sycl_std::__invoke_fmin_common(x, y); } @@ -685,7 +685,7 @@ detail::enable_if_t::value, T>(min)( // genfloat mix (genfloat x, genfloat y, genfloat a) template -detail::enable_if_t::value, T> mix(T x, T y, +detail::enable_if_t::value, T> mix(T x, T y, T a) __NOEXC { return __sycl_std::__invoke_mix(x, y, a); } @@ -701,14 +701,14 @@ mix(T x, T y, typename T::element_type a) __NOEXC { // genfloat radians (genfloat degrees) template -detail::enable_if_t::value, T> +detail::enable_if_t::value, T> radians(T degrees) __NOEXC { return __sycl_std::__invoke_radians(degrees); } // genfloat step (genfloat edge, genfloat x) template -detail::enable_if_t::value, T> step(T edge, +detail::enable_if_t::value, T> step(T edge, T x) __NOEXC { return __sycl_std::__invoke_step(edge, x); } @@ -724,7 +724,7 @@ step(typename T::element_type edge, T x) __NOEXC { // genfloat smoothstep (genfloat edge0, genfloat edge1, genfloat x) template -detail::enable_if_t::value, T> +detail::enable_if_t::value, T> smoothstep(T edge0, T edge1, T x) __NOEXC { return __sycl_std::__invoke_smoothstep(edge0, edge1, x); } @@ -741,7 +741,7 @@ smoothstep(typename T::element_type edge0, typename T::element_type edge1, // genfloat sign (genfloat x) template -detail::enable_if_t::value, T> sign(T x) __NOEXC { +detail::enable_if_t::value, T> sign(T x) __NOEXC { return __sycl_std::__invoke_sign(x); } @@ -1238,7 +1238,7 @@ fast_normalize(T p) __NOEXC { // int isequal (double x,double y); // longn isequal (doublen x, doublen y) template ::value, T>> + typename = detail::enable_if_t::value, T>> detail::common_rel_ret_t isequal(T x, T y) __NOEXC { return detail::RelConverter::apply( __sycl_std::__invoke_FOrdEqual>(x, y)); @@ -1250,7 +1250,7 @@ detail::common_rel_ret_t isequal(T x, T y) __NOEXC { // int isnotequal (double x, double y) // longn isnotequal (doublen x, doublen y) template ::value, T>> + typename = detail::enable_if_t::value, T>> detail::common_rel_ret_t isnotequal(T x, T y) __NOEXC { return detail::RelConverter::apply( __sycl_std::__invoke_FUnordNotEqual>(x, y)); @@ -1262,7 +1262,7 @@ detail::common_rel_ret_t isnotequal(T x, T y) __NOEXC { // int isgreater (double x, double y) // longn isgreater (doublen x, doublen y) template ::value, T>> + typename = detail::enable_if_t::value, T>> detail::common_rel_ret_t isgreater(T x, T y) __NOEXC { return detail::RelConverter::apply( __sycl_std::__invoke_FOrdGreaterThan>(x, y)); @@ -1274,7 +1274,7 @@ detail::common_rel_ret_t isgreater(T x, T y) __NOEXC { // int isgreaterequal (double x, double y) // longn isgreaterequal (doublen x, doublen y) template ::value, T>> + typename = detail::enable_if_t::value, T>> detail::common_rel_ret_t isgreaterequal(T x, T y) __NOEXC { return detail::RelConverter::apply( __sycl_std::__invoke_FOrdGreaterThanEqual>(x, y)); @@ -1286,7 +1286,7 @@ detail::common_rel_ret_t isgreaterequal(T x, T y) __NOEXC { // int isless (long x, long y) // longn isless (doublen x, doublen y) template ::value, T>> + typename = detail::enable_if_t::value, T>> detail::common_rel_ret_t isless(T x, T y) __NOEXC { return detail::RelConverter::apply( __sycl_std::__invoke_FOrdLessThan>(x, y)); @@ -1298,7 +1298,7 @@ detail::common_rel_ret_t isless(T x, T y) __NOEXC { // int islessequal (double x, double y) // longn islessequal (doublen x, doublen y) template ::value, T>> + typename = detail::enable_if_t::value, T>> detail::common_rel_ret_t islessequal(T x, T y) __NOEXC { return detail::RelConverter::apply( __sycl_std::__invoke_FOrdLessThanEqual>(x, y)); @@ -1310,7 +1310,7 @@ detail::common_rel_ret_t islessequal(T x, T y) __NOEXC { // int islessgreater (double x, double y) // longn islessgreater (doublen x, doublen y) template ::value, T>> + typename = detail::enable_if_t::value, T>> detail::common_rel_ret_t islessgreater(T x, T y) __NOEXC { return detail::RelConverter::apply( __sycl_std::__invoke_FOrdNotEqual>(x, y)); @@ -1322,7 +1322,7 @@ detail::common_rel_ret_t islessgreater(T x, T y) __NOEXC { // int isfinite (double x) // longn isfinite (doublen x) template ::value, T>> + typename = detail::enable_if_t::value, T>> detail::common_rel_ret_t isfinite(T x) __NOEXC { return detail::RelConverter::apply( __sycl_std::__invoke_IsFinite>(x)); @@ -1334,7 +1334,7 @@ detail::common_rel_ret_t isfinite(T x) __NOEXC { // int isinf (double x) // longn isinf (doublen x) template ::value, T>> + typename = detail::enable_if_t::value, T>> detail::common_rel_ret_t isinf(T x) __NOEXC { return detail::RelConverter::apply( __sycl_std::__invoke_IsInf>(x)); @@ -1346,7 +1346,7 @@ detail::common_rel_ret_t isinf(T x) __NOEXC { // int isnan (double x) // longn isnan (doublen x) template ::value, T>> + typename = detail::enable_if_t::value, T>> detail::common_rel_ret_t isnan(T x) __NOEXC { return detail::RelConverter::apply( __sycl_std::__invoke_IsNan>(x)); @@ -1358,7 +1358,7 @@ detail::common_rel_ret_t isnan(T x) __NOEXC { // int isnormal (double x) // longn isnormal (doublen x) template ::value, T>> + typename = detail::enable_if_t::value, T>> detail::common_rel_ret_t isnormal(T x) __NOEXC { return detail::RelConverter::apply( __sycl_std::__invoke_IsNormal>(x)); @@ -1370,7 +1370,7 @@ detail::common_rel_ret_t isnormal(T x) __NOEXC { // int isordered (double x, double y) // longn isordered (doublen x, doublen y) template ::value, T>> + typename = detail::enable_if_t::value, T>> detail::common_rel_ret_t isordered(T x, T y) __NOEXC { return detail::RelConverter::apply( __sycl_std::__invoke_Ordered>(x, y)); @@ -1382,7 +1382,7 @@ detail::common_rel_ret_t isordered(T x, T y) __NOEXC { // int isunordered (double x, double y) // longn isunordered (doublen x, doublen y) template ::value, T>> + typename = detail::enable_if_t::value, T>> detail::common_rel_ret_t isunordered(T x, T y) __NOEXC { return detail::RelConverter::apply( __sycl_std::__invoke_Unordered>(x, y)); @@ -1394,7 +1394,7 @@ detail::common_rel_ret_t isunordered(T x, T y) __NOEXC { // int signbit (double) // longn signbit (doublen x) template ::value, T>> + typename = detail::enable_if_t::value, T>> detail::common_rel_ret_t signbit(T x) __NOEXC { return detail::RelConverter::apply( __sycl_std::__invoke_SignBitSet>(x)); @@ -1456,7 +1456,7 @@ select(T a, T b, T2 c) __NOEXC { // genfloatf select (genfloatf a, genfloatf b, genint c) template detail::enable_if_t< - detail::is_genfloatf::value && detail::is_genint::value, T> + detail::is_svgenfloatf::value && detail::is_genint::value, T> select(T a, T b, T2 c) __NOEXC { detail::check_vector_size(); return __sycl_std::__invoke_select(a, b, c); @@ -1465,7 +1465,7 @@ select(T a, T b, T2 c) __NOEXC { // genfloatf select (genfloatf a, genfloatf b, ugenint c) template detail::enable_if_t< - detail::is_genfloatf::value && detail::is_ugenint::value, T> + detail::is_svgenfloatf::value && detail::is_ugenint::value, T> select(T a, T b, T2 c) __NOEXC { detail::check_vector_size(); return __sycl_std::__invoke_select(a, b, c); @@ -1474,7 +1474,7 @@ select(T a, T b, T2 c) __NOEXC { // genfloatd select (genfloatd a, genfloatd b, igeninteger64 c) template detail::enable_if_t< - detail::is_genfloatd::value && detail::is_igeninteger64bit::value, T> + detail::is_svgenfloatd::value && detail::is_igeninteger64bit::value, T> select(T a, T b, T2 c) __NOEXC { detail::check_vector_size(); return __sycl_std::__invoke_select(a, b, c); @@ -1483,7 +1483,7 @@ select(T a, T b, T2 c) __NOEXC { // genfloatd select (genfloatd a, genfloatd b, ugeninteger64 c) template detail::enable_if_t< - detail::is_genfloatd::value && detail::is_ugeninteger64bit::value, T> + detail::is_svgenfloatd::value && detail::is_ugeninteger64bit::value, T> select(T a, T b, T2 c) __NOEXC { detail::check_vector_size(); return __sycl_std::__invoke_select(a, b, c); @@ -1492,7 +1492,7 @@ select(T a, T b, T2 c) __NOEXC { // genfloath select (genfloath a, genfloath b, igeninteger16 c) template detail::enable_if_t< - detail::is_genfloath::value && detail::is_igeninteger16bit::value, T> + detail::is_svgenfloath::value && detail::is_igeninteger16bit::value, T> select(T a, T b, T2 c) __NOEXC { detail::check_vector_size(); return __sycl_std::__invoke_select(a, b, c); @@ -1501,7 +1501,7 @@ select(T a, T b, T2 c) __NOEXC { // genfloath select (genfloath a, genfloath b, ugeninteger16 c) template detail::enable_if_t< - detail::is_genfloath::value && detail::is_ugeninteger16bit::value, T> + detail::is_svgenfloath::value && detail::is_ugeninteger16bit::value, T> select(T a, T b, T2 c) __NOEXC { detail::check_vector_size(); return __sycl_std::__invoke_select(a, b, c); @@ -1568,86 +1568,86 @@ __SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD(powr) #undef __SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD template -detail::enable_if_t::value, T> cos(T x) __NOEXC { +detail::enable_if_t::value, T> cos(T x) __NOEXC { return __sycl_std::__invoke_native_cos(x); } // genfloatf divide (genfloatf x, genfloatf y) template -detail::enable_if_t::value, T> divide(T x, +detail::enable_if_t::value, T> divide(T x, T y) __NOEXC { return __sycl_std::__invoke_native_divide(x, y); } // genfloatf exp (genfloatf x) template -detail::enable_if_t::value, T> exp(T x) __NOEXC { +detail::enable_if_t::value, T> exp(T x) __NOEXC { return __sycl_std::__invoke_native_exp(x); } // genfloatf exp2 (genfloatf x) template -detail::enable_if_t::value, T> exp2(T x) __NOEXC { +detail::enable_if_t::value, T> exp2(T x) __NOEXC { return __sycl_std::__invoke_native_exp2(x); } // genfloatf exp10 (genfloatf x) template -detail::enable_if_t::value, T> exp10(T x) __NOEXC { +detail::enable_if_t::value, T> exp10(T x) __NOEXC { return __sycl_std::__invoke_native_exp10(x); } // genfloatf log (genfloatf x) template -detail::enable_if_t::value, T> log(T x) __NOEXC { +detail::enable_if_t::value, T> log(T x) __NOEXC { return __sycl_std::__invoke_native_log(x); } // genfloatf log2 (genfloatf x) template -detail::enable_if_t::value, T> log2(T x) __NOEXC { +detail::enable_if_t::value, T> log2(T x) __NOEXC { return __sycl_std::__invoke_native_log2(x); } // genfloatf log10 (genfloatf x) template -detail::enable_if_t::value, T> log10(T x) __NOEXC { +detail::enable_if_t::value, T> log10(T x) __NOEXC { return __sycl_std::__invoke_native_log10(x); } // genfloatf powr (genfloatf x, genfloatf y) template -detail::enable_if_t::value, T> powr(T x, T y) __NOEXC { +detail::enable_if_t::value, T> powr(T x, T y) __NOEXC { return __sycl_std::__invoke_native_powr(x, y); } // genfloatf recip (genfloatf x) template -detail::enable_if_t::value, T> recip(T x) __NOEXC { +detail::enable_if_t::value, T> recip(T x) __NOEXC { return __sycl_std::__invoke_native_recip(x); } // genfloatf rsqrt (genfloatf x) template -detail::enable_if_t::value, T> rsqrt(T x) __NOEXC { +detail::enable_if_t::value, T> rsqrt(T x) __NOEXC { return __sycl_std::__invoke_native_rsqrt(x); } // genfloatf sin (genfloatf x) template -detail::enable_if_t::value, T> sin(T x) __NOEXC { +detail::enable_if_t::value, T> sin(T x) __NOEXC { return __sycl_std::__invoke_native_sin(x); } // genfloatf sqrt (genfloatf x) template -detail::enable_if_t::value, T> sqrt(T x) __NOEXC { +detail::enable_if_t::value, T> sqrt(T x) __NOEXC { return __sycl_std::__invoke_native_sqrt(x); } // genfloatf tan (genfloatf x) template -detail::enable_if_t::value, T> tan(T x) __NOEXC { +detail::enable_if_t::value, T> tan(T x) __NOEXC { return __sycl_std::__invoke_native_tan(x); } @@ -1712,86 +1712,86 @@ __SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD(powr) // genfloatf cos (genfloatf x) template -detail::enable_if_t::value, T> cos(T x) __NOEXC { +detail::enable_if_t::value, T> cos(T x) __NOEXC { return __sycl_std::__invoke_half_cos(x); } // genfloatf divide (genfloatf x, genfloatf y) template -detail::enable_if_t::value, T> divide(T x, +detail::enable_if_t::value, T> divide(T x, T y) __NOEXC { return __sycl_std::__invoke_half_divide(x, y); } // genfloatf exp (genfloatf x) template -detail::enable_if_t::value, T> exp(T x) __NOEXC { +detail::enable_if_t::value, T> exp(T x) __NOEXC { return __sycl_std::__invoke_half_exp(x); } // genfloatf exp2 (genfloatf x) template -detail::enable_if_t::value, T> exp2(T x) __NOEXC { +detail::enable_if_t::value, T> exp2(T x) __NOEXC { return __sycl_std::__invoke_half_exp2(x); } // genfloatf exp10 (genfloatf x) template -detail::enable_if_t::value, T> exp10(T x) __NOEXC { +detail::enable_if_t::value, T> exp10(T x) __NOEXC { return __sycl_std::__invoke_half_exp10(x); } // genfloatf log (genfloatf x) template -detail::enable_if_t::value, T> log(T x) __NOEXC { +detail::enable_if_t::value, T> log(T x) __NOEXC { return __sycl_std::__invoke_half_log(x); } // genfloatf log2 (genfloatf x) template -detail::enable_if_t::value, T> log2(T x) __NOEXC { +detail::enable_if_t::value, T> log2(T x) __NOEXC { return __sycl_std::__invoke_half_log2(x); } // genfloatf log10 (genfloatf x) template -detail::enable_if_t::value, T> log10(T x) __NOEXC { +detail::enable_if_t::value, T> log10(T x) __NOEXC { return __sycl_std::__invoke_half_log10(x); } // genfloatf powr (genfloatf x, genfloatf y) template -detail::enable_if_t::value, T> powr(T x, T y) __NOEXC { +detail::enable_if_t::value, T> powr(T x, T y) __NOEXC { return __sycl_std::__invoke_half_powr(x, y); } // genfloatf recip (genfloatf x) template -detail::enable_if_t::value, T> recip(T x) __NOEXC { +detail::enable_if_t::value, T> recip(T x) __NOEXC { return __sycl_std::__invoke_half_recip(x); } // genfloatf rsqrt (genfloatf x) template -detail::enable_if_t::value, T> rsqrt(T x) __NOEXC { +detail::enable_if_t::value, T> rsqrt(T x) __NOEXC { return __sycl_std::__invoke_half_rsqrt(x); } // genfloatf sin (genfloatf x) template -detail::enable_if_t::value, T> sin(T x) __NOEXC { +detail::enable_if_t::value, T> sin(T x) __NOEXC { return __sycl_std::__invoke_half_sin(x); } // genfloatf sqrt (genfloatf x) template -detail::enable_if_t::value, T> sqrt(T x) __NOEXC { +detail::enable_if_t::value, T> sqrt(T x) __NOEXC { return __sycl_std::__invoke_half_sqrt(x); } // genfloatf tan (genfloatf x) template -detail::enable_if_t::value, T> tan(T x) __NOEXC { +detail::enable_if_t::value, T> tan(T x) __NOEXC { return __sycl_std::__invoke_half_tan(x); } diff --git a/sycl/include/CL/sycl/detail/generic_type_lists.hpp b/sycl/include/CL/sycl/detail/generic_type_lists.hpp index 4790a03b901d3..9e1420fd9e7a7 100644 --- a/sycl/include/CL/sycl/detail/generic_type_lists.hpp +++ b/sycl/include/CL/sycl/detail/generic_type_lists.hpp @@ -45,7 +45,9 @@ using marray_half_list = type_list, marray, marray, marray, marray, marray>; -using half_list = type_list; +using scalar_vector_half_list = type_list; + +using half_list = type_list; using scalar_float_list = type_list; @@ -57,7 +59,9 @@ using marray_float_list = type_list, marray, marray, marray, marray, marray>; -using float_list = type_list; +using scalar_vector_float_list = type_list; + +using float_list = type_list; using scalar_double_list = type_list; @@ -69,6 +73,8 @@ using marray_double_list = type_list, marray, marray, marray, marray, marray>; +using scalar_vector_double_list = type_list; + using double_list = type_list; @@ -81,7 +87,9 @@ using vector_floating_list = using marray_floating_list = type_list; -using floating_list = type_list; +using scalar_vector_floating_list = type_list; + +using floating_list = type_list; // geometric floating point types using scalar_geo_half_list = type_list; diff --git a/sycl/include/CL/sycl/detail/generic_type_traits.hpp b/sycl/include/CL/sycl/detail/generic_type_traits.hpp index 670fb49104a60..e16b1d248a63b 100644 --- a/sycl/include/CL/sycl/detail/generic_type_traits.hpp +++ b/sycl/include/CL/sycl/detail/generic_type_traits.hpp @@ -25,15 +25,21 @@ template using is_floatn = is_contained; template using is_genfloatf = is_contained; +template using is_svgenfloatf = is_contained; + template using is_doublen = is_contained; template using is_genfloatd = is_contained; +template using is_svgenfloatd = is_contained; + template using is_halfn = is_contained; template using is_genfloath = is_contained; +template using is_svgenfloath = is_contained; + template using is_genfloat = is_contained; template @@ -42,6 +48,8 @@ using is_sgenfloat = is_contained; template using is_vgenfloat = is_contained; +template using is_svgenfloat = is_contained; + template using is_gengeofloat = is_contained; diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index 6b0bf7f01b313..39ab813fe825a 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -86,8 +86,8 @@ namespace native { // genfloatfh tanh (genfloatfh x) template inline __SYCL_ALWAYS_INLINE - sycl::detail::enable_if_t::value || - sycl::detail::is_genfloath::value, + sycl::detail::enable_if_t::value || + sycl::detail::is_svgenfloath::value, T> tanh(T x) __NOEXC { #if defined(__NVPTX__) @@ -129,7 +129,7 @@ tanh(sycl::marray x) __NOEXC { // genfloath exp2 (genfloath x) template inline __SYCL_ALWAYS_INLINE - sycl::detail::enable_if_t::value, T> + sycl::detail::enable_if_t::value, T> exp2(T x) __NOEXC { #if defined(__NVPTX__) using _ocl_T = cl::sycl::detail::ConvertToOpenCLType_t; From a0e5bacdd749e178c42c4f774b41748635118217 Mon Sep 17 00:00:00 2001 From: "jack.kirk" Date: Wed, 11 May 2022 16:20:45 +0100 Subject: [PATCH 03/34] format Signed-off-by: jack.kirk --- sycl/include/CL/sycl/builtins.hpp | 55 +++++++++++-------- .../CL/sycl/detail/generic_type_lists.hpp | 18 ++++-- .../CL/sycl/detail/generic_type_traits.hpp | 12 ++-- 3 files changed, 52 insertions(+), 33 deletions(-) diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index 8bfd1e7de6550..2b934f570e929 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -200,13 +200,15 @@ detail::enable_if_t::value, T> asinpi(T x) __NOEXC { // genfloat atan (genfloat y_over_x) template -detail::enable_if_t::value, T> atan(T y_over_x) __NOEXC { +detail::enable_if_t::value, T> +atan(T y_over_x) __NOEXC { return __sycl_std::__invoke_atan(y_over_x); } // genfloat atan2 (genfloat y, genfloat x) template -detail::enable_if_t::value, T> atan2(T y, T x) __NOEXC { +detail::enable_if_t::value, T> atan2(T y, + T x) __NOEXC { return __sycl_std::__invoke_atan2(y, x); } @@ -225,7 +227,7 @@ detail::enable_if_t::value, T> atanpi(T x) __NOEXC { // genfloat atan2pi (genfloat y, genfloat x) template detail::enable_if_t::value, T> atan2pi(T y, - T x) __NOEXC { + T x) __NOEXC { return __sycl_std::__invoke_atan2pi(y, x); } @@ -244,7 +246,7 @@ detail::enable_if_t::value, T> ceil(T x) __NOEXC { // genfloat copysign (genfloat x, genfloat y) template detail::enable_if_t::value, T> copysign(T x, - T y) __NOEXC { + T y) __NOEXC { return __sycl_std::__invoke_copysign(x, y); } @@ -323,7 +325,7 @@ detail::enable_if_t::value, T> floor(T x) __NOEXC { // genfloat fma (genfloat a, genfloat b, genfloat c) template detail::enable_if_t::value, T> fma(T a, T b, - T c) __NOEXC { + T c) __NOEXC { return __sycl_std::__invoke_fma(a, b, c); } @@ -379,7 +381,8 @@ frexp(T x, T2 exp) __NOEXC { // genfloat hypot (genfloat x, genfloat y) template -detail::enable_if_t::value, T> hypot(T x, T y) __NOEXC { +detail::enable_if_t::value, T> hypot(T x, + T y) __NOEXC { return __sycl_std::__invoke_hypot(x, y); } @@ -463,19 +466,21 @@ detail::enable_if_t::value, T> logb(T x) __NOEXC { // genfloat mad (genfloat a, genfloat b, genfloat c) template detail::enable_if_t::value, T> mad(T a, T b, - T c) __NOEXC { + T c) __NOEXC { return __sycl_std::__invoke_mad(a, b, c); } // genfloat maxmag (genfloat x, genfloat y) template -detail::enable_if_t::value, T> maxmag(T x, T y) __NOEXC { +detail::enable_if_t::value, T> maxmag(T x, + T y) __NOEXC { return __sycl_std::__invoke_maxmag(x, y); } // genfloat minmag (genfloat x, genfloat y) template -detail::enable_if_t::value, T> minmag(T x, T y) __NOEXC { +detail::enable_if_t::value, T> minmag(T x, + T y) __NOEXC { return __sycl_std::__invoke_minmag(x, y); } @@ -498,7 +503,7 @@ detail::nan_return_t nan(T nancode) __NOEXC { // genfloat nextafter (genfloat x, genfloat y) template detail::enable_if_t::value, T> nextafter(T x, - T y) __NOEXC { + T y) __NOEXC { return __sycl_std::__invoke_nextafter(x, y); } @@ -526,7 +531,7 @@ detail::enable_if_t<__FAST_MATH_GENFLOAT(T), T> powr(T x, T y) __NOEXC { // genfloat remainder (genfloat x, genfloat y) template detail::enable_if_t::value, T> remainder(T x, - T y) __NOEXC { + T y) __NOEXC { return __sycl_std::__invoke_remainder(x, y); } @@ -632,8 +637,8 @@ detail::enable_if_t::value, T> trunc(T x) __NOEXC { /* --------------- 4.13.5 Common functions. ---------------------------------*/ // genfloat clamp (genfloat x, genfloat minval, genfloat maxval) template -detail::enable_if_t::value, T> clamp(T x, T minval, - T maxval) __NOEXC { +detail::enable_if_t::value, T> +clamp(T x, T minval, T maxval) __NOEXC { return __sycl_std::__invoke_fclamp(x, minval, maxval); } @@ -693,7 +698,7 @@ detail::enable_if_t::value, T>(min)( // genfloat mix (genfloat x, genfloat y, genfloat a) template detail::enable_if_t::value, T> mix(T x, T y, - T a) __NOEXC { + T a) __NOEXC { return __sycl_std::__invoke_mix(x, y, a); } @@ -716,7 +721,7 @@ radians(T degrees) __NOEXC { // genfloat step (genfloat edge, genfloat x) template detail::enable_if_t::value, T> step(T edge, - T x) __NOEXC { + T x) __NOEXC { return __sycl_std::__invoke_step(edge, x); } @@ -1487,8 +1492,9 @@ select(T a, T b, T2 c) __NOEXC { // genfloatd select (genfloatd a, genfloatd b, igeninteger64 c) template -detail::enable_if_t< - detail::is_svgenfloatd::value && detail::is_igeninteger64bit::value, T> +detail::enable_if_t::value && + detail::is_igeninteger64bit::value, + T> select(T a, T b, T2 c) __NOEXC { detail::check_vector_size(); return __sycl_std::__invoke_select(a, b, c); @@ -1496,8 +1502,9 @@ select(T a, T b, T2 c) __NOEXC { // genfloatd select (genfloatd a, genfloatd b, ugeninteger64 c) template -detail::enable_if_t< - detail::is_svgenfloatd::value && detail::is_ugeninteger64bit::value, T> +detail::enable_if_t::value && + detail::is_ugeninteger64bit::value, + T> select(T a, T b, T2 c) __NOEXC { detail::check_vector_size(); return __sycl_std::__invoke_select(a, b, c); @@ -1505,8 +1512,9 @@ select(T a, T b, T2 c) __NOEXC { // genfloath select (genfloath a, genfloath b, igeninteger16 c) template -detail::enable_if_t< - detail::is_svgenfloath::value && detail::is_igeninteger16bit::value, T> +detail::enable_if_t::value && + detail::is_igeninteger16bit::value, + T> select(T a, T b, T2 c) __NOEXC { detail::check_vector_size(); return __sycl_std::__invoke_select(a, b, c); @@ -1514,8 +1522,9 @@ select(T a, T b, T2 c) __NOEXC { // genfloath select (genfloath a, genfloath b, ugeninteger16 c) template -detail::enable_if_t< - detail::is_svgenfloath::value && detail::is_ugeninteger16bit::value, T> +detail::enable_if_t::value && + detail::is_ugeninteger16bit::value, + T> select(T a, T b, T2 c) __NOEXC { detail::check_vector_size(); return __sycl_std::__invoke_select(a, b, c); diff --git a/sycl/include/CL/sycl/detail/generic_type_lists.hpp b/sycl/include/CL/sycl/detail/generic_type_lists.hpp index 9e1420fd9e7a7..b8829bed32dc8 100644 --- a/sycl/include/CL/sycl/detail/generic_type_lists.hpp +++ b/sycl/include/CL/sycl/detail/generic_type_lists.hpp @@ -47,7 +47,8 @@ using marray_half_list = using scalar_vector_half_list = type_list; -using half_list = type_list; +using half_list = + type_list; using scalar_float_list = type_list; @@ -59,9 +60,11 @@ using marray_float_list = type_list, marray, marray, marray, marray, marray>; -using scalar_vector_float_list = type_list; +using scalar_vector_float_list = + type_list; -using float_list = type_list; +using float_list = + type_list; using scalar_double_list = type_list; @@ -73,7 +76,8 @@ using marray_double_list = type_list, marray, marray, marray, marray, marray>; -using scalar_vector_double_list = type_list; +using scalar_vector_double_list = + type_list; using double_list = type_list; @@ -87,9 +91,11 @@ using vector_floating_list = using marray_floating_list = type_list; -using scalar_vector_floating_list = type_list; +using scalar_vector_floating_list = + type_list; -using floating_list = type_list; +using floating_list = + type_list; // geometric floating point types using scalar_geo_half_list = type_list; diff --git a/sycl/include/CL/sycl/detail/generic_type_traits.hpp b/sycl/include/CL/sycl/detail/generic_type_traits.hpp index e16b1d248a63b..46ba6505d0f20 100644 --- a/sycl/include/CL/sycl/detail/generic_type_traits.hpp +++ b/sycl/include/CL/sycl/detail/generic_type_traits.hpp @@ -25,20 +25,23 @@ template using is_floatn = is_contained; template using is_genfloatf = is_contained; -template using is_svgenfloatf = is_contained; +template +using is_svgenfloatf = is_contained; template using is_doublen = is_contained; template using is_genfloatd = is_contained; -template using is_svgenfloatd = is_contained; +template +using is_svgenfloatd = is_contained; template using is_halfn = is_contained; template using is_genfloath = is_contained; -template using is_svgenfloath = is_contained; +template +using is_svgenfloath = is_contained; template using is_genfloat = is_contained; @@ -48,7 +51,8 @@ using is_sgenfloat = is_contained; template using is_vgenfloat = is_contained; -template using is_svgenfloat = is_contained; +template +using is_svgenfloat = is_contained; template using is_gengeofloat = is_contained; From a60c15c1fc29a3cc1f07dd8a68d69040332aca16 Mon Sep 17 00:00:00 2001 From: "jack.kirk" Date: Wed, 11 May 2022 16:29:44 +0100 Subject: [PATCH 04/34] format Signed-off-by: jack.kirk --- sycl/include/CL/sycl/builtins.hpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index 2b934f570e929..4ea84a7f11ed1 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -1598,7 +1598,7 @@ detail::enable_if_t::value, T> cos(T x) __NOEXC { // genfloatf divide (genfloatf x, genfloatf y) template detail::enable_if_t::value, T> divide(T x, - T y) __NOEXC { + T y) __NOEXC { return __sycl_std::__invoke_native_divide(x, y); } @@ -1640,7 +1640,8 @@ detail::enable_if_t::value, T> log10(T x) __NOEXC { // genfloatf powr (genfloatf x, genfloatf y) template -detail::enable_if_t::value, T> powr(T x, T y) __NOEXC { +detail::enable_if_t::value, T> powr(T x, + T y) __NOEXC { return __sycl_std::__invoke_native_powr(x, y); } @@ -1742,7 +1743,7 @@ detail::enable_if_t::value, T> cos(T x) __NOEXC { // genfloatf divide (genfloatf x, genfloatf y) template detail::enable_if_t::value, T> divide(T x, - T y) __NOEXC { + T y) __NOEXC { return __sycl_std::__invoke_half_divide(x, y); } @@ -1784,7 +1785,8 @@ detail::enable_if_t::value, T> log10(T x) __NOEXC { // genfloatf powr (genfloatf x, genfloatf y) template -detail::enable_if_t::value, T> powr(T x, T y) __NOEXC { +detail::enable_if_t::value, T> powr(T x, + T y) __NOEXC { return __sycl_std::__invoke_half_powr(x, y); } From 18df26a8d39265d9580f9b9d960dc8dbd0e3b8b7 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 17 May 2022 15:57:08 +0100 Subject: [PATCH 05/34] used is_sgenfloat where possible. Signed-off-by: JackAKirk --- sycl/include/CL/sycl/builtins.hpp | 38 +++++++++++++------------------ 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index 4ea84a7f11ed1..ae9ff82a3e237 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -33,19 +33,11 @@ namespace __sycl_std = __host_std; #endif /* ----------------- 4.13.3 Math functions. ---------------------------------*/ -// genfloat acos (genfloat x) -template -detail::enable_if_t::value, T> acos(T x) __NOEXC { - return __sycl_std::__invoke_acos(x); -} - #define __SYCL_MATH_FUNCTION_OVERLOAD(NAME) \ template \ - inline __SYCL_ALWAYS_INLINE std::enable_if_t< \ - std::is_same::value || std::is_same::value || \ - std::is_same::value, \ - sycl::marray> \ - NAME(sycl::marray x) __NOEXC { \ + inline __SYCL_ALWAYS_INLINE \ + std::enable_if_t::value, sycl::marray> \ + NAME(sycl::marray x) __NOEXC { \ sycl::marray res; \ auto x_vec2 = reinterpret_cast const *>(&x); \ auto res_vec2 = reinterpret_cast *>(&res); \ @@ -102,11 +94,9 @@ __SYCL_MATH_FUNCTION_OVERLOAD(trunc) #define __SYCL_MATH_FUNCTION_2_OVERLOAD(NAME) \ template \ - inline __SYCL_ALWAYS_INLINE std::enable_if_t< \ - std::is_same::value || std::is_same::value || \ - std::is_same::value, \ - sycl::marray> \ - NAME(sycl::marray x, sycl::marray y) __NOEXC { \ + inline __SYCL_ALWAYS_INLINE \ + std::enable_if_t::value, sycl::marray> \ + NAME(sycl::marray x, sycl::marray y) __NOEXC { \ sycl::marray res; \ auto x_vec2 = reinterpret_cast const *>(&x); \ auto y_vec2 = reinterpret_cast const *>(&y); \ @@ -140,12 +130,10 @@ __SYCL_MATH_FUNCTION_2_OVERLOAD(remainder) #define __SYCL_MATH_FUNCTION_3_OVERLOAD(NAME) \ template \ - inline __SYCL_ALWAYS_INLINE std::enable_if_t< \ - std::is_same::value || std::is_same::value || \ - std::is_same::value, \ - sycl::marray> \ - NAME(sycl::marray x, sycl::marray y, sycl::marray z) \ - __NOEXC { \ + inline __SYCL_ALWAYS_INLINE \ + std::enable_if_t::value, sycl::marray> \ + NAME(sycl::marray x, sycl::marray y, sycl::marray z) \ + __NOEXC { \ sycl::marray res; \ auto x_vec2 = reinterpret_cast const *>(&x); \ auto y_vec2 = reinterpret_cast const *>(&y); \ @@ -168,6 +156,12 @@ __SYCL_MATH_FUNCTION_3_OVERLOAD(fma) #undef __SYCL_MATH_FUNCTION_3_OVERLOAD +// genfloat acos (genfloat x) +template +detail::enable_if_t::value, T> acos(T x) __NOEXC { + return __sycl_std::__invoke_acos(x); +} + // genfloat acosh (genfloat x) template detail::enable_if_t::value, T> acosh(T x) __NOEXC { From 205625846f011775988ee3d66f62759c2886f714 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 18 May 2022 17:30:58 +0100 Subject: [PATCH 06/34] reinterpret_cast usage -> std::memcpy. Signed-off-by: JackAKirk --- sycl/include/CL/sycl/builtins.hpp | 105 +++++++++--------- .../sycl/ext/oneapi/experimental/builtins.hpp | 18 +-- 2 files changed, 61 insertions(+), 62 deletions(-) diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index ae9ff82a3e237..8162bddac6677 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -19,6 +19,14 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace detail { +template vec to_vec(marray x, size_t start) { + vec res; + std::memcpy(&res, &x[start], sizeof(vec)); + return res; +} +} // namespace detail + #ifdef __SYCL_DEVICE_ONLY__ #define __sycl_std #else @@ -36,13 +44,13 @@ namespace __sycl_std = __host_std; #define __SYCL_MATH_FUNCTION_OVERLOAD(NAME) \ template \ inline __SYCL_ALWAYS_INLINE \ - std::enable_if_t::value, sycl::marray> \ - NAME(sycl::marray x) __NOEXC { \ - sycl::marray res; \ - auto x_vec2 = reinterpret_cast const *>(&x); \ - auto res_vec2 = reinterpret_cast *>(&res); \ + std::enable_if_t::value, marray> \ + NAME(marray x) __NOEXC { \ + marray res; \ for (size_t i = 0; i < N / 2; i++) { \ - res_vec2[i] = __sycl_std::__invoke_##NAME>(x_vec2[i]); \ + vec partial_res = \ + __sycl_std::__invoke_##NAME>(detail::to_vec(x, i * 2)); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ } \ if (N % 2) { \ res[N - 1] = __sycl_std::__invoke_##NAME(x[N - 1]); \ @@ -95,15 +103,13 @@ __SYCL_MATH_FUNCTION_OVERLOAD(trunc) #define __SYCL_MATH_FUNCTION_2_OVERLOAD(NAME) \ template \ inline __SYCL_ALWAYS_INLINE \ - std::enable_if_t::value, sycl::marray> \ - NAME(sycl::marray x, sycl::marray y) __NOEXC { \ - sycl::marray res; \ - auto x_vec2 = reinterpret_cast const *>(&x); \ - auto y_vec2 = reinterpret_cast const *>(&y); \ - auto res_vec2 = reinterpret_cast *>(&res); \ + std::enable_if_t::value, marray> \ + NAME(marray x, marray y) __NOEXC { \ + marray res; \ for (size_t i = 0; i < N / 2; i++) { \ - res_vec2[i] = \ - __sycl_std::__invoke_##NAME>(x_vec2[i], y_vec2[i]); \ + auto partial_res = __sycl_std::__invoke_##NAME>( \ + detail::to_vec(x, i * 2), detail::to_vec(y, i * 2)); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ } \ if (N % 2) { \ res[N - 1] = __sycl_std::__invoke_##NAME(x[N - 1], y[N - 1]); \ @@ -131,17 +137,14 @@ __SYCL_MATH_FUNCTION_2_OVERLOAD(remainder) #define __SYCL_MATH_FUNCTION_3_OVERLOAD(NAME) \ template \ inline __SYCL_ALWAYS_INLINE \ - std::enable_if_t::value, sycl::marray> \ - NAME(sycl::marray x, sycl::marray y, sycl::marray z) \ - __NOEXC { \ - sycl::marray res; \ - auto x_vec2 = reinterpret_cast const *>(&x); \ - auto y_vec2 = reinterpret_cast const *>(&y); \ - auto z_vec2 = reinterpret_cast const *>(&z); \ - auto res_vec2 = reinterpret_cast *>(&res); \ + std::enable_if_t::value, marray> \ + NAME(marray x, marray y, marray z) __NOEXC { \ + marray res; \ for (size_t i = 0; i < N / 2; i++) { \ - res_vec2[i] = __sycl_std::__invoke_##NAME>( \ - x_vec2[i], y_vec2[i], z_vec2[i]); \ + auto partial_res = __sycl_std::__invoke_##NAME>( \ + detail::to_vec(x, i * 2), detail::to_vec(y, i * 2), \ + detail::to_vec(z, i * 2)); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ } \ if (N % 2) { \ res[N - 1] = \ @@ -1530,14 +1533,13 @@ namespace native { #define __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(NAME) \ template \ - inline __SYCL_ALWAYS_INLINE sycl::marray NAME( \ - sycl::marray x) __NOEXC { \ - sycl::marray res; \ - auto x_vec2 = reinterpret_cast const *>(&x); \ - auto res_vec2 = reinterpret_cast *>(&res); \ + inline __SYCL_ALWAYS_INLINE marray NAME(marray x) \ + __NOEXC { \ + marray res; \ for (size_t i = 0; i < N / 2; i++) { \ - res_vec2[i] = \ - __sycl_std::__invoke_native_##NAME>(x_vec2[i]); \ + auto partial_res = __sycl_std::__invoke_native_##NAME>( \ + detail::to_vec(x, i * 2)); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ } \ if (N % 2) { \ res[N - 1] = __sycl_std::__invoke_native_##NAME(x[N - 1]); \ @@ -1562,15 +1564,13 @@ __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(recip) #define __SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD(NAME) \ template \ - inline __SYCL_ALWAYS_INLINE sycl::marray NAME( \ - sycl::marray x, sycl::marray y) __NOEXC { \ - sycl::marray res; \ - auto x_vec2 = reinterpret_cast const *>(&x); \ - auto y_vec2 = reinterpret_cast const *>(&y); \ - auto res_vec2 = reinterpret_cast *>(&res); \ + inline __SYCL_ALWAYS_INLINE marray NAME( \ + marray x, marray y) __NOEXC { \ + marray res; \ for (size_t i = 0; i < N / 2; i++) { \ - res_vec2[i] = __sycl_std::__invoke_native_##NAME>( \ - x_vec2[i], y_vec2[i]); \ + auto partial_res = __sycl_std::__invoke_native_##NAME>( \ + detail::to_vec(x, i * 2), detail::to_vec(y, i * 2)); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ } \ if (N % 2) { \ res[N - 1] = \ @@ -1674,14 +1674,13 @@ namespace half_precision { /* ----------------- 4.13.3 Math functions. ---------------------------------*/ #define __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(NAME) \ template \ - inline __SYCL_ALWAYS_INLINE sycl::marray NAME( \ - sycl::marray x) __NOEXC { \ - sycl::marray res; \ - auto x_vec2 = reinterpret_cast const *>(&x); \ - auto res_vec2 = reinterpret_cast *>(&res); \ + inline __SYCL_ALWAYS_INLINE marray NAME(marray x) \ + __NOEXC { \ + marray res; \ for (size_t i = 0; i < N / 2; i++) { \ - res_vec2[i] = \ - __sycl_std::__invoke_half_##NAME>(x_vec2[i]); \ + auto partial_res = __sycl_std::__invoke_half_##NAME>( \ + detail::to_vec(x, i * 2)); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ } \ if (N % 2) { \ res[N - 1] = __sycl_std::__invoke_half_##NAME(x[N - 1]); \ @@ -1706,15 +1705,13 @@ __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(recip) #define __SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD(NAME) \ template \ - inline __SYCL_ALWAYS_INLINE sycl::marray NAME( \ - sycl::marray x, sycl::marray y) __NOEXC { \ - sycl::marray res; \ - auto x_vec2 = reinterpret_cast const *>(&x); \ - auto y_vec2 = reinterpret_cast const *>(&y); \ - auto res_vec2 = reinterpret_cast *>(&res); \ + inline __SYCL_ALWAYS_INLINE marray NAME( \ + marray x, marray y) __NOEXC { \ + marray res; \ for (size_t i = 0; i < N / 2; i++) { \ - res_vec2[i] = __sycl_std::__invoke_half_##NAME>( \ - x_vec2[i], y_vec2[i]); \ + auto partial_res = __sycl_std::__invoke_half_##NAME>( \ + detail::to_vec(x, i * 2), detail::to_vec(y, i * 2)); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ } \ if (N % 2) { \ res[N - 1] = \ diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index 39ab813fe825a..1d32c6bf5597e 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -106,18 +106,19 @@ inline __SYCL_ALWAYS_INLINE std::enable_if_t::value || sycl::marray> tanh(sycl::marray x) __NOEXC { sycl::marray res; - auto x_vec2 = reinterpret_cast const *>(&x); - auto res_vec2 = reinterpret_cast *>(&res); #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) for (size_t i = 0; i < N / 2; i++) { - res_vec2[i] = __clc_native_tanh(x_vec2[i]); + auto partial_res = __clc_native_tanh(sycl::detail::to_vec(x, i * 2)); + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); } if constexpr (N % 2) { res[N - 1] = __clc_native_tanh(x[N - 1]); } #else for (size_t i = 0; i < N / 2; i++) { - res_vec2[i] = __sycl_std::__invoke_tanh>(x_vec2[i]); + auto partial_res = __sycl_std::__invoke_tanh>( + sycl::detail::to_vec(x, i * 2)); + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); } if constexpr (N % 2) { res[N - 1] = __sycl_std::__invoke_tanh(x[N - 1]); @@ -145,18 +146,19 @@ template inline __SYCL_ALWAYS_INLINE sycl::marray exp2(sycl::marray x) __NOEXC { sycl::marray res; - auto x_vec2 = reinterpret_cast const *>(&x); - auto res_vec2 = reinterpret_cast *>(&res); #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) for (size_t i = 0; i < N / 2; i++) { - res_vec2[i] = __clc_native_exp2(x_vec2[i]); + auto partial_res = __clc_native_exp2(sycl::detail::to_vec(x, i * 2)); + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); } if constexpr (N % 2) { res[N - 1] = __clc_native_exp2(x[N - 1]); } #else for (size_t i = 0; i < N / 2; i++) { - res_vec2[i] = __sycl_std::__invoke_exp2>(x_vec2[i]); + auto partial_res = __sycl_std::__invoke_exp2>( + sycl::detail::to_vec(x, i * 2)); + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); } if constexpr (N % 2) { res[N - 1] = __sycl_std::__invoke_exp2(x[N - 1]); From 8fc29a8b1eb29782ebc327d0a4e5b3a1a3eb6d56 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 19 May 2022 11:35:36 +0100 Subject: [PATCH 07/34] to_vec -> to_vec2 naming Signed-off-by: JackAKirk --- sycl/include/CL/sycl/builtins.hpp | 18 +++++++++--------- .../sycl/ext/oneapi/experimental/builtins.hpp | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index 8162bddac6677..86c0e0cdba736 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -20,7 +20,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { -template vec to_vec(marray x, size_t start) { +template vec to_vec2(marray x, size_t start) { vec res; std::memcpy(&res, &x[start], sizeof(vec)); return res; @@ -49,7 +49,7 @@ namespace __sycl_std = __host_std; marray res; \ for (size_t i = 0; i < N / 2; i++) { \ vec partial_res = \ - __sycl_std::__invoke_##NAME>(detail::to_vec(x, i * 2)); \ + __sycl_std::__invoke_##NAME>(detail::to_vec2(x, i * 2)); \ std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ } \ if (N % 2) { \ @@ -108,7 +108,7 @@ __SYCL_MATH_FUNCTION_OVERLOAD(trunc) marray res; \ for (size_t i = 0; i < N / 2; i++) { \ auto partial_res = __sycl_std::__invoke_##NAME>( \ - detail::to_vec(x, i * 2), detail::to_vec(y, i * 2)); \ + detail::to_vec2(x, i * 2), detail::to_vec2(y, i * 2)); \ std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ } \ if (N % 2) { \ @@ -142,8 +142,8 @@ __SYCL_MATH_FUNCTION_2_OVERLOAD(remainder) marray res; \ for (size_t i = 0; i < N / 2; i++) { \ auto partial_res = __sycl_std::__invoke_##NAME>( \ - detail::to_vec(x, i * 2), detail::to_vec(y, i * 2), \ - detail::to_vec(z, i * 2)); \ + detail::to_vec2(x, i * 2), detail::to_vec2(y, i * 2), \ + detail::to_vec2(z, i * 2)); \ std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ } \ if (N % 2) { \ @@ -1538,7 +1538,7 @@ namespace native { marray res; \ for (size_t i = 0; i < N / 2; i++) { \ auto partial_res = __sycl_std::__invoke_native_##NAME>( \ - detail::to_vec(x, i * 2)); \ + detail::to_vec2(x, i * 2)); \ std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ } \ if (N % 2) { \ @@ -1569,7 +1569,7 @@ __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(recip) marray res; \ for (size_t i = 0; i < N / 2; i++) { \ auto partial_res = __sycl_std::__invoke_native_##NAME>( \ - detail::to_vec(x, i * 2), detail::to_vec(y, i * 2)); \ + detail::to_vec2(x, i * 2), detail::to_vec2(y, i * 2)); \ std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ } \ if (N % 2) { \ @@ -1679,7 +1679,7 @@ namespace half_precision { marray res; \ for (size_t i = 0; i < N / 2; i++) { \ auto partial_res = __sycl_std::__invoke_half_##NAME>( \ - detail::to_vec(x, i * 2)); \ + detail::to_vec2(x, i * 2)); \ std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ } \ if (N % 2) { \ @@ -1710,7 +1710,7 @@ __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(recip) marray res; \ for (size_t i = 0; i < N / 2; i++) { \ auto partial_res = __sycl_std::__invoke_half_##NAME>( \ - detail::to_vec(x, i * 2), detail::to_vec(y, i * 2)); \ + detail::to_vec2(x, i * 2), detail::to_vec2(y, i * 2)); \ std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ } \ if (N % 2) { \ diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index 1d32c6bf5597e..a5b4dd9365d28 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -108,7 +108,7 @@ tanh(sycl::marray x) __NOEXC { sycl::marray res; #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) for (size_t i = 0; i < N / 2; i++) { - auto partial_res = __clc_native_tanh(sycl::detail::to_vec(x, i * 2)); + auto partial_res = __clc_native_tanh(sycl::detail::to_vec2(x, i * 2)); std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); } if constexpr (N % 2) { @@ -117,7 +117,7 @@ tanh(sycl::marray x) __NOEXC { #else for (size_t i = 0; i < N / 2; i++) { auto partial_res = __sycl_std::__invoke_tanh>( - sycl::detail::to_vec(x, i * 2)); + sycl::detail::to_vec2(x, i * 2)); std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); } if constexpr (N % 2) { @@ -148,7 +148,7 @@ exp2(sycl::marray x) __NOEXC { sycl::marray res; #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) for (size_t i = 0; i < N / 2; i++) { - auto partial_res = __clc_native_exp2(sycl::detail::to_vec(x, i * 2)); + auto partial_res = __clc_native_exp2(sycl::detail::to_vec2(x, i * 2)); std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); } if constexpr (N % 2) { @@ -157,7 +157,7 @@ exp2(sycl::marray x) __NOEXC { #else for (size_t i = 0; i < N / 2; i++) { auto partial_res = __sycl_std::__invoke_exp2>( - sycl::detail::to_vec(x, i * 2)); + sycl::detail::to_vec2(x, i * 2)); std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); } if constexpr (N % 2) { From a8a71597748eb4e06823e8b608f3514845e4121a Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 16 Jun 2022 09:02:29 +0100 Subject: [PATCH 08/34] Add comments, reuse vector impls in ext cases. Signed-off-by: JackAKirk --- sycl/include/CL/sycl/builtins.hpp | 8 ++++++++ .../sycl/ext/oneapi/experimental/builtins.hpp | 16 ++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index 86c0e0cdba736..fb422f56580de 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -41,6 +41,14 @@ namespace __sycl_std = __host_std; #endif /* ----------------- 4.13.3 Math functions. ---------------------------------*/ + +// These macros for marray math function implementations use vectorizations of +// size two as a simple general optimization. A more complex implementation +// using larger vectorizations for large marray sizes is possible; however more +// testing is required in order to ascertain the performance implications for +// all backends. Currently the compiler does not produce vectorized loads and +// stores from this implementation for all backends. It would be wise to +// investigate how this can be fixed first. #define __SYCL_MATH_FUNCTION_OVERLOAD(NAME) \ template \ inline __SYCL_ALWAYS_INLINE \ diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index a5b4dd9365d28..2ff92d6cddc24 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -95,11 +95,19 @@ inline __SYCL_ALWAYS_INLINE _ocl_T arg1 = cl::sycl::detail::convertDataToType(x); return cl::sycl::detail::convertDataToType<_ocl_T, T>( __clc_native_tanh(arg1)); + return cl::sycl::detail::convertDataToType<_ocl_T, T>( #else return __sycl_std::__invoke_tanh(x); #endif } +// These marray math function implementations use vectorizations of +// size two as a simple general optimization. A more complex implementation +// using larger vectorizations for large marray sizes is possible; however more +// testing is required in order to ascertain the performance implications for +// all backends. Currently the compiler does not produce vectorized loads and +// stores from this implementation for all backends. It would be wise to +// investigate how this can be fixed first. template inline __SYCL_ALWAYS_INLINE std::enable_if_t::value || std::is_same::value, @@ -108,11 +116,11 @@ tanh(sycl::marray x) __NOEXC { sycl::marray res; #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) for (size_t i = 0; i < N / 2; i++) { - auto partial_res = __clc_native_tanh(sycl::detail::to_vec2(x, i * 2)); + auto partial_res = native::tanh(sycl::detail::to_vec2(x, i * 2)); std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); } if constexpr (N % 2) { - res[N - 1] = __clc_native_tanh(x[N - 1]); + res[N - 1] = native::tanh(x[N - 1]); } #else for (size_t i = 0; i < N / 2; i++) { @@ -148,11 +156,11 @@ exp2(sycl::marray x) __NOEXC { sycl::marray res; #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) for (size_t i = 0; i < N / 2; i++) { - auto partial_res = __clc_native_exp2(sycl::detail::to_vec2(x, i * 2)); + auto partial_res = native::exp2(sycl::detail::to_vec2(x, i * 2)); std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); } if constexpr (N % 2) { - res[N - 1] = __clc_native_exp2(x[N - 1]); + res[N - 1] = native::exp2(x[N - 1]); } #else for (size_t i = 0; i < N / 2; i++) { From e13f44a0844fd1d6329cd71d69124d5feb0f6961 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 16 Jun 2022 09:54:27 +0100 Subject: [PATCH 09/34] removed accidentally added line. Signed-off-by: JackAKirk --- sycl/include/sycl/ext/oneapi/experimental/builtins.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index 2ff92d6cddc24..6b30a38bcf85f 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -95,7 +95,6 @@ inline __SYCL_ALWAYS_INLINE _ocl_T arg1 = cl::sycl::detail::convertDataToType(x); return cl::sycl::detail::convertDataToType<_ocl_T, T>( __clc_native_tanh(arg1)); - return cl::sycl::detail::convertDataToType<_ocl_T, T>( #else return __sycl_std::__invoke_tanh(x); #endif From 2eb3bc6c7f48c2a32bd8e0cb34c443331f9ccd46 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 11 Jul 2022 11:42:56 +0100 Subject: [PATCH 10/34] Removed if constexpr usage Signed-off-by: JackAKirk --- sycl/include/sycl/ext/oneapi/experimental/builtins.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index 6b30a38bcf85f..21ffee0265e46 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -118,7 +118,7 @@ tanh(sycl::marray x) __NOEXC { auto partial_res = native::tanh(sycl::detail::to_vec2(x, i * 2)); std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); } - if constexpr (N % 2) { + if (N % 2) { res[N - 1] = native::tanh(x[N - 1]); } #else @@ -127,7 +127,7 @@ tanh(sycl::marray x) __NOEXC { sycl::detail::to_vec2(x, i * 2)); std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); } - if constexpr (N % 2) { + if (N % 2) { res[N - 1] = __sycl_std::__invoke_tanh(x[N - 1]); } #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) @@ -158,7 +158,7 @@ exp2(sycl::marray x) __NOEXC { auto partial_res = native::exp2(sycl::detail::to_vec2(x, i * 2)); std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); } - if constexpr (N % 2) { + if (N % 2) { res[N - 1] = native::exp2(x[N - 1]); } #else @@ -167,7 +167,7 @@ exp2(sycl::marray x) __NOEXC { sycl::detail::to_vec2(x, i * 2)); std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); } - if constexpr (N % 2) { + if (N % 2) { res[N - 1] = __sycl_std::__invoke_exp2(x[N - 1]); } #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) From 2c6544766fb05c65ad7d19953accfb5ee1b44094 Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Thu, 21 Jul 2022 12:29:03 +0100 Subject: [PATCH 11/34] [SYCL] Align marray to corresponding vector type --- sycl/include/CL/sycl/marray.hpp | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/marray.hpp b/sycl/include/CL/sycl/marray.hpp index 6bc2c7bfffe47..f5911e6518d4f 100644 --- a/sycl/include/CL/sycl/marray.hpp +++ b/sycl/include/CL/sycl/marray.hpp @@ -17,6 +17,29 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +template +constexpr std::size_t vecAlignment() { + static_assert(N > 0, "Invalid number of elements."); + static_assert(SizeOfT > 0, "Invalid size of T."); + // First find the "previous" vector num elements. + size_t res = N >= 16 ? 16 + : N >= 8 ? 8 + : N >= 4 ? 4 + : N >= 3 ? 3 + : N >= 2 ? 2 + : 1; + // Then calculate the alignment size in bytes, making sure it's power of 2. + res *= SizeOfT; + res--; + res |= res >> 1; + res |= res >> 2; + res |= res >> 4; + res |= res >> 8; + res |= res >> 16; + res++; + return res; +} + /// Provides a cross-patform math array class template that works on /// SYCL devices as well as in host C++ code. /// @@ -298,7 +321,7 @@ template class marray { } return Ret; } -}; +} __attribute__((aligned(vecAlignment()))); #define __SYCL_MAKE_MARRAY_ALIAS(ALIAS, TYPE, N) \ using ALIAS##N = cl::sycl::marray; From 2a4f3a26745ab8e14f3efffd6fe77ad9f1685aea Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Thu, 21 Jul 2022 12:29:42 +0100 Subject: [PATCH 12/34] [SYCL] Simplify marray math funcs --- sycl/include/CL/sycl/builtins.hpp | 4 +- .../sycl/ext/oneapi/experimental/builtins.hpp | 42 +++++++++---------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index a8251a244eeb0..43fd8f5484d28 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -46,9 +46,7 @@ namespace __sycl_std = __host_std; // size two as a simple general optimization. A more complex implementation // using larger vectorizations for large marray sizes is possible; however more // testing is required in order to ascertain the performance implications for -// all backends. Currently the compiler does not produce vectorized loads and -// stores from this implementation for all backends. It would be wise to -// investigate how this can be fixed first. +// all backends. #define __SYCL_MATH_FUNCTION_OVERLOAD(NAME) \ template \ inline __SYCL_ALWAYS_INLINE \ diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index 82c505a4a73d5..53723e64b88da 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -123,23 +123,22 @@ inline __SYCL_ALWAYS_INLINE std::enable_if_t::value || tanh(sycl::marray x) __NOEXC { sycl::marray res; #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - for (size_t i = 0; i < N / 2; i++) { - auto partial_res = native::tanh(sycl::detail::to_vec2(x, i * 2)); - std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); - } - if (N % 2) { - res[N - 1] = native::tanh(x[N - 1]); - } +#define FUNC_VEC native::tanh +#define FUNC FUNC_VEC #else +#define FUNC_VEC __sycl_std::__invoke_tanh> +#define FUNC __sycl_std::__invoke_tanh +#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) + for (size_t i = 0; i < N / 2; i++) { - auto partial_res = __sycl_std::__invoke_tanh>( - sycl::detail::to_vec2(x, i * 2)); + auto partial_res = FUNC_VEC(sycl::detail::to_vec2(x, i * 2)); std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); } if (N % 2) { - res[N - 1] = __sycl_std::__invoke_tanh(x[N - 1]); + res[N - 1] = FUNC(x[N - 1]); } -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) +#undef FUNC_VEC +#undef FUNC return res; } @@ -163,23 +162,22 @@ inline __SYCL_ALWAYS_INLINE sycl::marray exp2(sycl::marray x) __NOEXC { sycl::marray res; #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - for (size_t i = 0; i < N / 2; i++) { - auto partial_res = native::exp2(sycl::detail::to_vec2(x, i * 2)); - std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); - } - if (N % 2) { - res[N - 1] = native::exp2(x[N - 1]); - } +#define FUNC_VEC native::exp2 +#define FUNC FUNC_VEC #else +#define FUNC_VEC __sycl_std::__invoke_exp2> +#define FUNC __sycl_std::__invoke_exp2 +#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) + for (size_t i = 0; i < N / 2; i++) { - auto partial_res = __sycl_std::__invoke_exp2>( - sycl::detail::to_vec2(x, i * 2)); + auto partial_res = FUNC_VEC(sycl::detail::to_vec2(x, i * 2)); std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); } if (N % 2) { - res[N - 1] = __sycl_std::__invoke_exp2(x[N - 1]); + res[N - 1] = FUNC(x[N - 1]); } -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) +#undef FUNC_VEC +#undef FUNC return res; } From f3b9d7e7a3580980315d5d99c921f7e1f7e57aee Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 1 Sep 2022 13:49:13 +0100 Subject: [PATCH 13/34] Simplify to_vec2. Signed-off-by: JackAKirk --- sycl/include/sycl/builtins.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 0914f66fac708..a34e92372c0ae 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -23,9 +23,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace detail { template vec to_vec2(marray x, size_t start) { - vec res; - std::memcpy(&res, &x[start], sizeof(vec)); - return res; + return {x[start], x[start + 1]}; } } // namespace detail From cd588a406cfdda6d5c38df77caa350b97229ebc3 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 1 Sep 2022 14:08:36 +0100 Subject: [PATCH 14/34] Remove obsolete comment. Signed-off-by: JackAKirk --- sycl/include/sycl/ext/oneapi/experimental/builtins.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index 19716a83d9b15..2e29f49b11241 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -112,9 +112,7 @@ inline __SYCL_ALWAYS_INLINE // size two as a simple general optimization. A more complex implementation // using larger vectorizations for large marray sizes is possible; however more // testing is required in order to ascertain the performance implications for -// all backends. Currently the compiler does not produce vectorized loads and -// stores from this implementation for all backends. It would be wise to -// investigate how this can be fixed first. +// all backends. template inline __SYCL_ALWAYS_INLINE std::enable_if_t::value || std::is_same::value, From d3dc1385a592a03183ae25643618db54db0c1abe Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 6 Sep 2022 10:52:26 +0100 Subject: [PATCH 15/34] Remove broken native/half cases. Signed-off-by: JackAKirk --- sycl/include/sycl/builtins.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index a34e92372c0ae..69258e4fd66a6 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -1561,7 +1561,6 @@ __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(exp2) __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(exp10) __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(log) __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(log2) -__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(log10) __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(sqrt) __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(rsqrt) __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(recip) @@ -1699,7 +1698,6 @@ __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(cos) __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(tan) __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(exp) __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(exp2) -__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(exp10) __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(log) __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(log2) __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(log10) From 02a3a9749069334a3ac25bd2c3c7530a6ca78427 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 12 Sep 2022 16:01:02 +0100 Subject: [PATCH 16/34] -native-math flag marray impl. Signed-off-by: JackAKirk --- sycl/include/sycl/builtins.hpp | 304 ++++++++++++++++++++------------- 1 file changed, 183 insertions(+), 121 deletions(-) diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 69258e4fd66a6..3168a0c96613c 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -36,8 +36,11 @@ namespace __sycl_std = __host_std; #ifdef __FAST_MATH__ #define __FAST_MATH_GENFLOAT(T) \ (detail::is_svgenfloatd::value || detail::is_svgenfloath::value) +#define __FAST_MATH_SGENFLOAT(T) \ + (std::is_same::value || std::is_same::value) #else #define __FAST_MATH_GENFLOAT(T) (detail::is_svgenfloat::value) +#define __FAST_MATH_SGENFLOAT(T) (detail::is_sgenfloat::value) #endif /* ----------------- 4.13.3 Math functions. ---------------------------------*/ @@ -47,26 +50,26 @@ namespace __sycl_std = __host_std; // using larger vectorizations for large marray sizes is possible; however more // testing is required in order to ascertain the performance implications for // all backends. +#define __SYCL_MATH_FUNCTION_OVERLOAD_IMPL(NAME) \ + marray res; \ + for (size_t i = 0; i < N / 2; i++) { \ + vec partial_res = \ + __sycl_std::__invoke_##NAME>(detail::to_vec2(x, i * 2)); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ + } \ + if (N % 2) { \ + res[N - 1] = __sycl_std::__invoke_##NAME(x[N - 1]); \ + } \ + return res; + #define __SYCL_MATH_FUNCTION_OVERLOAD(NAME) \ template \ inline __SYCL_ALWAYS_INLINE \ std::enable_if_t::value, marray> \ NAME(marray x) __NOEXC { \ - marray res; \ - for (size_t i = 0; i < N / 2; i++) { \ - vec partial_res = \ - __sycl_std::__invoke_##NAME>(detail::to_vec2(x, i * 2)); \ - std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ - } \ - if (N % 2) { \ - res[N - 1] = __sycl_std::__invoke_##NAME(x[N - 1]); \ - } \ - return res; \ + __SYCL_MATH_FUNCTION_OVERLOAD_IMPL(NAME) \ } -__SYCL_MATH_FUNCTION_OVERLOAD(sin) -__SYCL_MATH_FUNCTION_OVERLOAD(cos) -__SYCL_MATH_FUNCTION_OVERLOAD(tan) __SYCL_MATH_FUNCTION_OVERLOAD(cospi) __SYCL_MATH_FUNCTION_OVERLOAD(sinpi) __SYCL_MATH_FUNCTION_OVERLOAD(tanpi) @@ -87,40 +90,60 @@ __SYCL_MATH_FUNCTION_OVERLOAD(ceil) __SYCL_MATH_FUNCTION_OVERLOAD(floor) __SYCL_MATH_FUNCTION_OVERLOAD(erfc) __SYCL_MATH_FUNCTION_OVERLOAD(erf) -__SYCL_MATH_FUNCTION_OVERLOAD(exp) -__SYCL_MATH_FUNCTION_OVERLOAD(exp2) -__SYCL_MATH_FUNCTION_OVERLOAD(exp10) __SYCL_MATH_FUNCTION_OVERLOAD(expm1) __SYCL_MATH_FUNCTION_OVERLOAD(tgamma) __SYCL_MATH_FUNCTION_OVERLOAD(lgamma) -__SYCL_MATH_FUNCTION_OVERLOAD(log) -__SYCL_MATH_FUNCTION_OVERLOAD(log2) -__SYCL_MATH_FUNCTION_OVERLOAD(log10) __SYCL_MATH_FUNCTION_OVERLOAD(log1p) __SYCL_MATH_FUNCTION_OVERLOAD(logb) __SYCL_MATH_FUNCTION_OVERLOAD(rint) __SYCL_MATH_FUNCTION_OVERLOAD(round) -__SYCL_MATH_FUNCTION_OVERLOAD(sqrt) -__SYCL_MATH_FUNCTION_OVERLOAD(rsqrt) __SYCL_MATH_FUNCTION_OVERLOAD(trunc) #undef __SYCL_MATH_FUNCTION_OVERLOAD +// __SYCL_MATH_FUNCTION_OVERLOAD_FM cases use corresponding native +// implementations when the -ffast-math flag is used with float. +#define __SYCL_MATH_FUNCTION_OVERLOAD_FM(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE \ + std::enable_if_t<__FAST_MATH_SGENFLOAT(T), marray> \ + NAME(marray x) __NOEXC { \ + __SYCL_MATH_FUNCTION_OVERLOAD_IMPL(NAME) \ + } + +__SYCL_MATH_FUNCTION_OVERLOAD_FM(sin) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(cos) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(tan) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(exp) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(exp2) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(exp10) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(log) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(log2) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(log10) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(sqrt) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(rsqrt) + +#undef __SYCL_MATH_FUNCTION_OVERLOAD_FM +#undef __SYCL_MATH_FUNCTION_OVERLOAD_IMPL + +#define __SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL(NAME) \ + marray res; \ + for (size_t i = 0; i < N / 2; i++) { \ + auto partial_res = __sycl_std::__invoke_##NAME>( \ + detail::to_vec2(x, i * 2), detail::to_vec2(y, i * 2)); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ + } \ + if (N % 2) { \ + res[N - 1] = __sycl_std::__invoke_##NAME(x[N - 1], y[N - 1]); \ + } \ + return res; + #define __SYCL_MATH_FUNCTION_2_OVERLOAD(NAME) \ template \ inline __SYCL_ALWAYS_INLINE \ std::enable_if_t::value, marray> \ NAME(marray x, marray y) __NOEXC { \ - marray res; \ - for (size_t i = 0; i < N / 2; i++) { \ - auto partial_res = __sycl_std::__invoke_##NAME>( \ - detail::to_vec2(x, i * 2), detail::to_vec2(y, i * 2)); \ - std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ - } \ - if (N % 2) { \ - res[N - 1] = __sycl_std::__invoke_##NAME(x[N - 1], y[N - 1]); \ - } \ - return res; \ + __SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL(NAME) \ } __SYCL_MATH_FUNCTION_2_OVERLOAD(atan2) @@ -135,11 +158,19 @@ __SYCL_MATH_FUNCTION_2_OVERLOAD(maxmag) __SYCL_MATH_FUNCTION_2_OVERLOAD(minmag) __SYCL_MATH_FUNCTION_2_OVERLOAD(nextafter) __SYCL_MATH_FUNCTION_2_OVERLOAD(pow) -__SYCL_MATH_FUNCTION_2_OVERLOAD(powr) __SYCL_MATH_FUNCTION_2_OVERLOAD(remainder) #undef __SYCL_MATH_FUNCTION_2_OVERLOAD +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t<__FAST_MATH_SGENFLOAT(T), marray> + powr(marray x, marray y) __NOEXC { + __SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL(powr) +} + +#undef __SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL + #define __SYCL_MATH_FUNCTION_3_OVERLOAD(NAME) \ template \ inline __SYCL_ALWAYS_INLINE \ @@ -165,206 +196,206 @@ __SYCL_MATH_FUNCTION_3_OVERLOAD(fma) #undef __SYCL_MATH_FUNCTION_3_OVERLOAD -// genfloat acos (genfloat x) +// svgenfloat acos (svgenfloat x) template detail::enable_if_t::value, T> acos(T x) __NOEXC { return __sycl_std::__invoke_acos(x); } -// genfloat acosh (genfloat x) +// svgenfloat acosh (svgenfloat x) template detail::enable_if_t::value, T> acosh(T x) __NOEXC { return __sycl_std::__invoke_acosh(x); } -// genfloat acospi (genfloat x) +// svgenfloat acospi (svgenfloat x) template detail::enable_if_t::value, T> acospi(T x) __NOEXC { return __sycl_std::__invoke_acospi(x); } -// genfloat asin (genfloat x) +// svgenfloat asin (svgenfloat x) template detail::enable_if_t::value, T> asin(T x) __NOEXC { return __sycl_std::__invoke_asin(x); } -// genfloat asinh (genfloat x) +// svgenfloat asinh (svgenfloat x) template detail::enable_if_t::value, T> asinh(T x) __NOEXC { return __sycl_std::__invoke_asinh(x); } -// genfloat asinpi (genfloat x) +// svgenfloat asinpi (svgenfloat x) template detail::enable_if_t::value, T> asinpi(T x) __NOEXC { return __sycl_std::__invoke_asinpi(x); } -// genfloat atan (genfloat y_over_x) +// svgenfloat atan (svgenfloat y_over_x) template detail::enable_if_t::value, T> atan(T y_over_x) __NOEXC { return __sycl_std::__invoke_atan(y_over_x); } -// genfloat atan2 (genfloat y, genfloat x) +// svgenfloat atan2 (svgenfloat y, svgenfloat x) template detail::enable_if_t::value, T> atan2(T y, T x) __NOEXC { return __sycl_std::__invoke_atan2(y, x); } -// genfloat atanh (genfloat x) +// svgenfloat atanh (svgenfloat x) template detail::enable_if_t::value, T> atanh(T x) __NOEXC { return __sycl_std::__invoke_atanh(x); } -// genfloat atanpi (genfloat x) +// svgenfloat atanpi (svgenfloat x) template detail::enable_if_t::value, T> atanpi(T x) __NOEXC { return __sycl_std::__invoke_atanpi(x); } -// genfloat atan2pi (genfloat y, genfloat x) +// svgenfloat atan2pi (svgenfloat y, svgenfloat x) template detail::enable_if_t::value, T> atan2pi(T y, T x) __NOEXC { return __sycl_std::__invoke_atan2pi(y, x); } -// genfloat cbrt (genfloat x) +// svgenfloat cbrt (svgenfloat x) template detail::enable_if_t::value, T> cbrt(T x) __NOEXC { return __sycl_std::__invoke_cbrt(x); } -// genfloat ceil (genfloat x) +// svgenfloat ceil (svgenfloat x) template detail::enable_if_t::value, T> ceil(T x) __NOEXC { return __sycl_std::__invoke_ceil(x); } -// genfloat copysign (genfloat x, genfloat y) +// svgenfloat copysign (svgenfloat x, svgenfloat y) template detail::enable_if_t::value, T> copysign(T x, T y) __NOEXC { return __sycl_std::__invoke_copysign(x, y); } -// genfloat cos (genfloat x) +// svgenfloat cos (svgenfloat x) template detail::enable_if_t<__FAST_MATH_GENFLOAT(T), T> cos(T x) __NOEXC { return __sycl_std::__invoke_cos(x); } -// genfloat cosh (genfloat x) +// svgenfloat cosh (svgenfloat x) template detail::enable_if_t::value, T> cosh(T x) __NOEXC { return __sycl_std::__invoke_cosh(x); } -// genfloat cospi (genfloat x) +// svgenfloat cospi (svgenfloat x) template detail::enable_if_t::value, T> cospi(T x) __NOEXC { return __sycl_std::__invoke_cospi(x); } -// genfloat erfc (genfloat x) +// svgenfloat erfc (svgenfloat x) template detail::enable_if_t::value, T> erfc(T x) __NOEXC { return __sycl_std::__invoke_erfc(x); } -// genfloat erf (genfloat x) +// svgenfloat erf (svgenfloat x) template detail::enable_if_t::value, T> erf(T x) __NOEXC { return __sycl_std::__invoke_erf(x); } -// genfloat exp (genfloat x ) +// svgenfloat exp (svgenfloat x ) template detail::enable_if_t<__FAST_MATH_GENFLOAT(T), T> exp(T x) __NOEXC { return __sycl_std::__invoke_exp(x); } -// genfloat exp2 (genfloat x) +// svgenfloat exp2 (svgenfloat x) template detail::enable_if_t<__FAST_MATH_GENFLOAT(T), T> exp2(T x) __NOEXC { return __sycl_std::__invoke_exp2(x); } -// genfloat exp10 (genfloat x) +// svgenfloat exp10 (svgenfloat x) template detail::enable_if_t<__FAST_MATH_GENFLOAT(T), T> exp10(T x) __NOEXC { return __sycl_std::__invoke_exp10(x); } -// genfloat expm1 (genfloat x) +// svgenfloat expm1 (svgenfloat x) template detail::enable_if_t::value, T> expm1(T x) __NOEXC { return __sycl_std::__invoke_expm1(x); } -// genfloat fabs (genfloat x) +// svgenfloat fabs (svgenfloat x) template detail::enable_if_t::value, T> fabs(T x) __NOEXC { return __sycl_std::__invoke_fabs(x); } -// genfloat fdim (genfloat x, genfloat y) +// svgenfloat fdim (svgenfloat x, svgenfloat y) template detail::enable_if_t::value, T> fdim(T x, T y) __NOEXC { return __sycl_std::__invoke_fdim(x, y); } -// genfloat floor (genfloat x) +// svgenfloat floor (svgenfloat x) template detail::enable_if_t::value, T> floor(T x) __NOEXC { return __sycl_std::__invoke_floor(x); } -// genfloat fma (genfloat a, genfloat b, genfloat c) +// svgenfloat fma (svgenfloat a, svgenfloat b, svgenfloat c) template detail::enable_if_t::value, T> fma(T a, T b, T c) __NOEXC { return __sycl_std::__invoke_fma(a, b, c); } -// genfloat fmax (genfloat x, genfloat y) +// svgenfloat fmax (svgenfloat x, svgenfloat y) template detail::enable_if_t::value, T> fmax(T x, T y) __NOEXC { return __sycl_std::__invoke_fmax(x, y); } -// genfloat fmax (genfloat x, sgenfloat y) +// svgenfloat fmax (svgenfloat x, sgenfloat y) template detail::enable_if_t::value, T> fmax(T x, typename T::element_type y) __NOEXC { return __sycl_std::__invoke_fmax(x, T(y)); } -// genfloat fmin (genfloat x, genfloat y) +// svgenfloat fmin (svgenfloat x, svgenfloat y) template detail::enable_if_t::value, T> fmin(T x, T y) __NOEXC { return __sycl_std::__invoke_fmin(x, y); } -// genfloat fmin (genfloat x, sgenfloat y) +// svgenfloat fmin (svgenfloat x, sgenfloat y) template detail::enable_if_t::value, T> fmin(T x, typename T::element_type y) __NOEXC { return __sycl_std::__invoke_fmin(x, T(y)); } -// genfloat fmod (genfloat x, genfloat y) +// svgenfloat fmod (svgenfloat x, svgenfloat y) template detail::enable_if_t::value, T> fmod(T x, T y) __NOEXC { return __sycl_std::__invoke_fmod(x, y); } -// genfloat fract (genfloat x, genfloatptr iptr) +// svgenfloat fract (svgenfloat x, genfloatptr iptr) template detail::enable_if_t< detail::is_svgenfloat::value && detail::is_genfloatptr::value, T> @@ -373,7 +404,7 @@ fract(T x, T2 iptr) __NOEXC { return __sycl_std::__invoke_fract(x, iptr); } -// genfloat frexp (genfloat x, genintptr exp) +// svgenfloat frexp (svgenfloat x, genintptr exp) template detail::enable_if_t< detail::is_svgenfloat::value && detail::is_genintptr::value, T> @@ -382,14 +413,14 @@ frexp(T x, T2 exp) __NOEXC { return __sycl_std::__invoke_frexp(x, exp); } -// genfloat hypot (genfloat x, genfloat y) +// svgenfloat hypot (svgenfloat x, svgenfloat y) template detail::enable_if_t::value, T> hypot(T x, T y) __NOEXC { return __sycl_std::__invoke_hypot(x, y); } -// genint ilogb (genfloat x) +// genint ilogb (svgenfloat x) template ::value, T>> detail::change_base_type_t ilogb(T x) __NOEXC { @@ -421,13 +452,13 @@ ldexp(T x, T2 k) __NOEXC { return __sycl_std::__invoke_ldexp(x, k); } -// genfloat lgamma (genfloat x) +// svgenfloat lgamma (svgenfloat x) template detail::enable_if_t::value, T> lgamma(T x) __NOEXC { return __sycl_std::__invoke_lgamma(x); } -// genfloat lgamma_r (genfloat x, genintptr signp) +// svgenfloat lgamma_r (svgenfloat x, genintptr signp) template detail::enable_if_t< detail::is_svgenfloat::value && detail::is_genintptr::value, T> @@ -436,58 +467,58 @@ lgamma_r(T x, T2 signp) __NOEXC { return __sycl_std::__invoke_lgamma_r(x, signp); } -// genfloat log (genfloat x) +// svgenfloat log (svgenfloat x) template detail::enable_if_t<__FAST_MATH_GENFLOAT(T), T> log(T x) __NOEXC { return __sycl_std::__invoke_log(x); } -// genfloat log2 (genfloat x) +// svgenfloat log2 (svgenfloat x) template detail::enable_if_t<__FAST_MATH_GENFLOAT(T), T> log2(T x) __NOEXC { return __sycl_std::__invoke_log2(x); } -// genfloat log10 (genfloat x) +// svgenfloat log10 (svgenfloat x) template detail::enable_if_t<__FAST_MATH_GENFLOAT(T), T> log10(T x) __NOEXC { return __sycl_std::__invoke_log10(x); } -// genfloat log1p (genfloat x) +// svgenfloat log1p (svgenfloat x) template detail::enable_if_t::value, T> log1p(T x) __NOEXC { return __sycl_std::__invoke_log1p(x); } -// genfloat logb (genfloat x) +// svgenfloat logb (svgenfloat x) template detail::enable_if_t::value, T> logb(T x) __NOEXC { return __sycl_std::__invoke_logb(x); } -// genfloat mad (genfloat a, genfloat b, genfloat c) +// svgenfloat mad (svgenfloat a, svgenfloat b, svgenfloat c) template detail::enable_if_t::value, T> mad(T a, T b, T c) __NOEXC { return __sycl_std::__invoke_mad(a, b, c); } -// genfloat maxmag (genfloat x, genfloat y) +// svgenfloat maxmag (svgenfloat x, svgenfloat y) template detail::enable_if_t::value, T> maxmag(T x, T y) __NOEXC { return __sycl_std::__invoke_maxmag(x, y); } -// genfloat minmag (genfloat x, genfloat y) +// svgenfloat minmag (svgenfloat x, svgenfloat y) template detail::enable_if_t::value, T> minmag(T x, T y) __NOEXC { return __sycl_std::__invoke_minmag(x, y); } -// genfloat modf (genfloat x, genfloatptr iptr) +// svgenfloat modf (svgenfloat x, genfloatptr iptr) template detail::enable_if_t< detail::is_svgenfloat::value && detail::is_genfloatptr::value, T> @@ -503,20 +534,20 @@ detail::nan_return_t nan(T nancode) __NOEXC { detail::convert_data_type>()(nancode)); } -// genfloat nextafter (genfloat x, genfloat y) +// svgenfloat nextafter (svgenfloat x, svgenfloat y) template detail::enable_if_t::value, T> nextafter(T x, T y) __NOEXC { return __sycl_std::__invoke_nextafter(x, y); } -// genfloat pow (genfloat x, genfloat y) +// svgenfloat pow (svgenfloat x, svgenfloat y) template detail::enable_if_t::value, T> pow(T x, T y) __NOEXC { return __sycl_std::__invoke_pow(x, y); } -// genfloat pown (genfloat x, genint y) +// svgenfloat pown (svgenfloat x, genint y) template detail::enable_if_t< detail::is_svgenfloat::value && detail::is_genint::value, T> @@ -525,20 +556,20 @@ pown(T x, T2 y) __NOEXC { return __sycl_std::__invoke_pown(x, y); } -// genfloat powr (genfloat x, genfloat y) +// svgenfloat powr (svgenfloat x, svgenfloat y) template detail::enable_if_t<__FAST_MATH_GENFLOAT(T), T> powr(T x, T y) __NOEXC { return __sycl_std::__invoke_powr(x, y); } -// genfloat remainder (genfloat x, genfloat y) +// svgenfloat remainder (svgenfloat x, svgenfloat y) template detail::enable_if_t::value, T> remainder(T x, T y) __NOEXC { return __sycl_std::__invoke_remainder(x, y); } -// genfloat remquo (genfloat x, genfloat y, genintptr quo) +// svgenfloat remquo (svgenfloat x, svgenfloat y, genintptr quo) template detail::enable_if_t< detail::is_svgenfloat::value && detail::is_genintptr::value, T> @@ -547,13 +578,13 @@ remquo(T x, T y, T2 quo) __NOEXC { return __sycl_std::__invoke_remquo(x, y, quo); } -// genfloat rint (genfloat x) +// svgenfloat rint (svgenfloat x) template detail::enable_if_t::value, T> rint(T x) __NOEXC { return __sycl_std::__invoke_rint(x); } -// genfloat rootn (genfloat x, genint y) +// svgenfloat rootn (svgenfloat x, genint y) template detail::enable_if_t< detail::is_svgenfloat::value && detail::is_genint::value, T> @@ -562,25 +593,25 @@ rootn(T x, T2 y) __NOEXC { return __sycl_std::__invoke_rootn(x, y); } -// genfloat round (genfloat x) +// svgenfloat round (svgenfloat x) template detail::enable_if_t::value, T> round(T x) __NOEXC { return __sycl_std::__invoke_round(x); } -// genfloat rsqrt (genfloat x) +// svgenfloat rsqrt (svgenfloat x) template detail::enable_if_t<__FAST_MATH_GENFLOAT(T), T> rsqrt(T x) __NOEXC { return __sycl_std::__invoke_rsqrt(x); } -// genfloat sin (genfloat x) +// svgenfloat sin (svgenfloat x) template detail::enable_if_t<__FAST_MATH_GENFLOAT(T), T> sin(T x) __NOEXC { return __sycl_std::__invoke_sin(x); } -// genfloat sincos (genfloat x, genfloatptr cosval) +// svgenfloat sincos (svgenfloat x, genfloatptr cosval) template detail::enable_if_t< detail::is_svgenfloat::value && detail::is_genfloatptr::value, T> @@ -589,56 +620,56 @@ sincos(T x, T2 cosval) __NOEXC { return __sycl_std::__invoke_sincos(x, cosval); } -// genfloat sinh (genfloat x) +// svgenfloat sinh (svgenfloat x) template detail::enable_if_t::value, T> sinh(T x) __NOEXC { return __sycl_std::__invoke_sinh(x); } -// genfloat sinpi (genfloat x) +// svgenfloat sinpi (svgenfloat x) template detail::enable_if_t::value, T> sinpi(T x) __NOEXC { return __sycl_std::__invoke_sinpi(x); } -// genfloat sqrt (genfloat x) +// svgenfloat sqrt (svgenfloat x) template detail::enable_if_t<__FAST_MATH_GENFLOAT(T), T> sqrt(T x) __NOEXC { return __sycl_std::__invoke_sqrt(x); } -// genfloat tan (genfloat x) +// svgenfloat tan (svgenfloat x) template detail::enable_if_t<__FAST_MATH_GENFLOAT(T), T> tan(T x) __NOEXC { return __sycl_std::__invoke_tan(x); } -// genfloat tanh (genfloat x) +// svgenfloat tanh (svgenfloat x) template detail::enable_if_t::value, T> tanh(T x) __NOEXC { return __sycl_std::__invoke_tanh(x); } -// genfloat tanpi (genfloat x) +// svgenfloat tanpi (svgenfloat x) template detail::enable_if_t::value, T> tanpi(T x) __NOEXC { return __sycl_std::__invoke_tanpi(x); } -// genfloat tgamma (genfloat x) +// svgenfloat tgamma (svgenfloat x) template detail::enable_if_t::value, T> tgamma(T x) __NOEXC { return __sycl_std::__invoke_tgamma(x); } -// genfloat trunc (genfloat x) +// svgenfloat trunc (svgenfloat x) template detail::enable_if_t::value, T> trunc(T x) __NOEXC { return __sycl_std::__invoke_trunc(x); } /* --------------- 4.13.5 Common functions. ---------------------------------*/ -// genfloat clamp (genfloat x, genfloat minval, genfloat maxval) +// svgenfloat clamp (svgenfloat x, svgenfloat minval, svgenfloat maxval) template detail::enable_if_t::value, T> clamp(T x, T minval, T maxval) __NOEXC { @@ -655,20 +686,20 @@ clamp(T x, typename T::element_type minval, return __sycl_std::__invoke_fclamp(x, T(minval), T(maxval)); } -// genfloat degrees (genfloat radians) +// svgenfloat degrees (svgenfloat radians) template detail::enable_if_t::value, T> degrees(T radians) __NOEXC { return __sycl_std::__invoke_degrees(radians); } -// genfloat abs (genfloat x) +// svgenfloat abs (svgenfloat x) template detail::enable_if_t::value, T> abs(T x) __NOEXC { return __sycl_std::__invoke_fabs(x); } -// genfloat max (genfloat x, genfloat y) +// svgenfloat max (svgenfloat x, svgenfloat y) template detail::enable_if_t::value, T>(max)(T x, T y) __NOEXC { return __sycl_std::__invoke_fmax_common(x, y); @@ -683,7 +714,7 @@ detail::enable_if_t::value, T>(max)( return __sycl_std::__invoke_fmax_common(x, T(y)); } -// genfloat min (genfloat x, genfloat y) +// svgenfloat min (svgenfloat x, svgenfloat y) template detail::enable_if_t::value, T>(min)(T x, T y) __NOEXC { return __sycl_std::__invoke_fmin_common(x, y); @@ -698,7 +729,7 @@ detail::enable_if_t::value, T>(min)( return __sycl_std::__invoke_fmin_common(x, T(y)); } -// genfloat mix (genfloat x, genfloat y, genfloat a) +// svgenfloat mix (svgenfloat x, svgenfloat y, svgenfloat a) template detail::enable_if_t::value, T> mix(T x, T y, T a) __NOEXC { @@ -714,14 +745,14 @@ mix(T x, T y, typename T::element_type a) __NOEXC { return __sycl_std::__invoke_mix(x, y, T(a)); } -// genfloat radians (genfloat degrees) +// svgenfloat radians (svgenfloat degrees) template detail::enable_if_t::value, T> radians(T degrees) __NOEXC { return __sycl_std::__invoke_radians(degrees); } -// genfloat step (genfloat edge, genfloat x) +// svgenfloat step (svgenfloat edge, svgenfloat x) template detail::enable_if_t::value, T> step(T edge, T x) __NOEXC { @@ -737,7 +768,7 @@ step(typename T::element_type edge, T x) __NOEXC { return __sycl_std::__invoke_step(T(edge), x); } -// genfloat smoothstep (genfloat edge0, genfloat edge1, genfloat x) +// svgenfloat smoothstep (svgenfloat edge0, svgenfloat edge1, svgenfloat x) template detail::enable_if_t::value, T> smoothstep(T edge0, T edge1, T x) __NOEXC { @@ -754,7 +785,7 @@ smoothstep(typename T::element_type edge0, typename T::element_type edge1, return __sycl_std::__invoke_smoothstep(T(edge0), T(edge1), x); } -// genfloat sign (genfloat x) +// svgenfloat sign (svgenfloat x) template detail::enable_if_t::value, T> sign(T x) __NOEXC { return __sycl_std::__invoke_sign(x); @@ -1561,6 +1592,7 @@ __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(exp2) __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(exp10) __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(log) __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(log2) +__SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(log10) __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(sqrt) __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(rsqrt) __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(recip) @@ -1819,75 +1851,105 @@ detail::enable_if_t::value, T> tan(T x) __NOEXC { #ifdef __FAST_MATH__ /* ----------------- -ffast-math functions. ---------------------------------*/ + +#define __SYCL_MATH_FUNCTION_OVERLOAD_FM(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE \ + std::enable_if_t::value, marray> \ + NAME(marray x) __NOEXC { \ + return native::NAME(x); \ + } + +__SYCL_MATH_FUNCTION_OVERLOAD_FM(sin) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(cos) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(tan) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(exp) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(exp2) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(exp10) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(log) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(log2) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(log10) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(sqrt) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(rsqrt) +#undef __SYCL_MATH_FUNCTION_OVERLOAD_FM + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t::value, marray> + powr(marray x, marray y) __NOEXC { + return native::powr(x, y); +} + +//todo update naming of all changed to sv! // genfloatf cos (genfloatf x) template -detail::enable_if_t::value, T> cos(T x) __NOEXC { +detail::enable_if_t::value, T> cos(T x) __NOEXC { return native::cos(x); } // genfloatf exp (genfloatf x) template -detail::enable_if_t::value, T> exp(T x) __NOEXC { +detail::enable_if_t::value, T> exp(T x) __NOEXC { return native::exp(x); } // genfloatf exp2 (genfloatf x) template -detail::enable_if_t::value, T> exp2(T x) __NOEXC { +detail::enable_if_t::value, T> exp2(T x) __NOEXC { return native::exp2(x); } // genfloatf exp10 (genfloatf x) template -detail::enable_if_t::value, T> exp10(T x) __NOEXC { +detail::enable_if_t::value, T> exp10(T x) __NOEXC { return native::exp10(x); } // genfloatf log(genfloatf x) template -detail::enable_if_t::value, T> log(T x) __NOEXC { +detail::enable_if_t::value, T> log(T x) __NOEXC { return native::log(x); } // genfloatf log2 (genfloatf x) template -detail::enable_if_t::value, T> log2(T x) __NOEXC { +detail::enable_if_t::value, T> log2(T x) __NOEXC { return native::log2(x); } // genfloatf log10 (genfloatf x) template -detail::enable_if_t::value, T> log10(T x) __NOEXC { +detail::enable_if_t::value, T> log10(T x) __NOEXC { return native::log10(x); } // genfloatf powr (genfloatf x) template -detail::enable_if_t::value, T> powr(T x, T y) __NOEXC { +detail::enable_if_t::value, T> powr(T x, T y) __NOEXC { return native::powr(x, y); } // genfloatf rsqrt (genfloatf x) template -detail::enable_if_t::value, T> rsqrt(T x) __NOEXC { +detail::enable_if_t::value, T> rsqrt(T x) __NOEXC { return native::rsqrt(x); } // genfloatf sin (genfloatf x) template -detail::enable_if_t::value, T> sin(T x) __NOEXC { +detail::enable_if_t::value, T> sin(T x) __NOEXC { return native::sin(x); } // genfloatf sqrt (genfloatf x) template -detail::enable_if_t::value, T> sqrt(T x) __NOEXC { +detail::enable_if_t::value, T> sqrt(T x) __NOEXC { return native::sqrt(x); } // genfloatf tan (genfloatf x) template -detail::enable_if_t::value, T> tan(T x) __NOEXC { +detail::enable_if_t::value, T> tan(T x) __NOEXC { return native::tan(x); } From 348d90d50707419039db3a76f8708133e22e8e46 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 12 Sep 2022 16:56:07 +0100 Subject: [PATCH 17/34] Correct comment. Signed-off-by: JackAKirk --- sycl/include/sycl/builtins.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 3168a0c96613c..3e07e0755cdcf 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -1566,7 +1566,6 @@ select(T a, T b, T2 c) __NOEXC { namespace native { /* ----------------- 4.13.3 Math functions. ---------------------------------*/ -// genfloatf cos (genfloatf x) #define __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(NAME) \ template \ @@ -1621,6 +1620,7 @@ __SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD(powr) #undef __SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD +// genfloatf cos (genfloatf x) template detail::enable_if_t::value, T> cos(T x) __NOEXC { return __sycl_std::__invoke_native_cos(x); @@ -1880,7 +1880,6 @@ inline __SYCL_ALWAYS_INLINE return native::powr(x, y); } -//todo update naming of all changed to sv! // genfloatf cos (genfloatf x) template detail::enable_if_t::value, T> cos(T x) __NOEXC { From fe57ef6498ead7dc854d24cad8b5abcea4c3c70b Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 12 Sep 2022 17:26:56 +0100 Subject: [PATCH 18/34] format. Signed-off-by: JackAKirk --- sycl/include/sycl/builtins.hpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 3e07e0755cdcf..8afed3e9acc63 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -165,9 +165,8 @@ __SYCL_MATH_FUNCTION_2_OVERLOAD(remainder) template inline __SYCL_ALWAYS_INLINE std::enable_if_t<__FAST_MATH_SGENFLOAT(T), marray> - powr(marray x, marray y) __NOEXC { - __SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL(powr) -} + powr(marray x, + marray y) __NOEXC{__SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL(powr)} #undef __SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL @@ -190,15 +189,14 @@ inline __SYCL_ALWAYS_INLINE return res; \ } -__SYCL_MATH_FUNCTION_3_OVERLOAD(mad) -__SYCL_MATH_FUNCTION_3_OVERLOAD(mix) -__SYCL_MATH_FUNCTION_3_OVERLOAD(fma) +__SYCL_MATH_FUNCTION_3_OVERLOAD(mad) __SYCL_MATH_FUNCTION_3_OVERLOAD(mix) + __SYCL_MATH_FUNCTION_3_OVERLOAD(fma) #undef __SYCL_MATH_FUNCTION_3_OVERLOAD -// svgenfloat acos (svgenfloat x) -template -detail::enable_if_t::value, T> acos(T x) __NOEXC { + // svgenfloat acos (svgenfloat x) + template + detail::enable_if_t::value, T> acos(T x) __NOEXC { return __sycl_std::__invoke_acos(x); } @@ -1924,7 +1922,8 @@ detail::enable_if_t::value, T> log10(T x) __NOEXC { // genfloatf powr (genfloatf x) template -detail::enable_if_t::value, T> powr(T x, T y) __NOEXC { +detail::enable_if_t::value, T> powr(T x, + T y) __NOEXC { return native::powr(x, y); } From d5f28d24e5391882b79540faf2b163e852a42bf0 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 13 Sep 2022 14:36:41 +0100 Subject: [PATCH 19/34] Device code test for cuda native fcts. Signed-off-by: JackAKirk --- .../math-builtins/native-math-cuda.cpp | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 sycl/test/check_device_code/math-builtins/native-math-cuda.cpp diff --git a/sycl/test/check_device_code/math-builtins/native-math-cuda.cpp b/sycl/test/check_device_code/math-builtins/native-math-cuda.cpp new file mode 100644 index 0000000000000..d8ebab70315ff --- /dev/null +++ b/sycl/test/check_device_code/math-builtins/native-math-cuda.cpp @@ -0,0 +1,69 @@ +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=nvptx64-nvidia-cuda -S -Xclang -emit-llvm %s -o -| FileCheck %s + +#include + +using namespace sycl; + +int main() { + + queue q; + + float input[2]; + float res[13]; + { + buffer input_buff(&input[0], range<1>(2)); + buffer res_buff(&res[0], range<1>(13)); + q.submit([&](handler &cgh) { + accessor res_acc(res_buff, + cgh); + accessor input_acc( + input_buff, cgh); + cgh.single_task([=]() { + // CHECK: tail call float @llvm.nvvm.cos.approx.f + res_acc[0] = sycl::native::cos(input_acc[0]); + // CHECK: tail call float @llvm.nvvm.sin.approx.f + res_acc[1] = sycl::native::sin(input_acc[0]); + // CHECK: tail call float @llvm.nvvm.ex2.approx.f + res_acc[2] = sycl::native::exp2(input_acc[0]); + // CHECK: tail call float @llvm.nvvm.lg2.approx.f + res_acc[3] = sycl::native::log2(input_acc[0]); + // CHECK: tail call float @llvm.nvvm.rsqrt.approx.f + res_acc[4] = sycl::native::rsqrt(input_acc[0]); + // CHECK: tail call float @llvm.nvvm.sqrt.approx.f + res_acc[5] = sycl::native::sqrt(input_acc[0]); + // CHECK: tail call float @llvm.nvvm.rcp.approx.f + res_acc[6] = sycl::native::recip(input_acc[0]); + // CHECK: tail call float @llvm.nvvm.div.approx.f + res_acc[7] = sycl::native::divide(input_acc[0], input_acc[1]); + + // Functions that use the above builtins: + + // CHECK: tail call float @llvm.nvvm.sin.approx.f + // CHECK: tail call float @llvm.nvvm.cos.approx.f + // CHECK: tail call float @llvm.nvvm.div.approx.f + res_acc[8] = sycl::native::tan(input_acc[0]); + // CHECK: fmul float {{.*}}, 0x3FF7154760000000 + // CHECK: tail call float @llvm.nvvm.ex2.approx.f + res_acc[9] = sycl::native::exp(input_acc[0]); + // CHECK: fmul float {{.*}}, 0x400A934F00000000 + // CHECK: tail call float @llvm.nvvm.ex2.approx.f + res_acc[10] = sycl::native::exp10(input_acc[0]); + // CHECK: tail call float @llvm.nvvm.lg2.approx.f + // CHECK: fmul float {{.*}}, 0x3FE62E4300000000 + res_acc[11] = sycl::native::log(input_acc[0]); + // CHECK: tail call float @llvm.nvvm.lg2.approx.f + // CHECK: fmul float {{.*}}, 0x3FD3441360000000 + res_acc[12] = sycl::native::log10(input_acc[0]); + + // CHECK: tail call float @llvm.nvvm.lg2.approx.f + // CHECK: fmul float {{.*}}, {{.*}} + // CHECK: tail call float @llvm.nvvm.ex2.approx.f + res_acc[13] = sycl::native::powr(input_acc[0], input_acc[1]); + }); + }); + } + + return 0; +}; From 3290d4e4e29a8ad44f8de6930824d3e8c0b55220 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 13 Sep 2022 15:46:41 +0100 Subject: [PATCH 20/34] format Signed-off-by: JackAKirk --- sycl/include/sycl/builtins.hpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 8afed3e9acc63..1f6612bfbcfcc 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -101,7 +101,7 @@ __SYCL_MATH_FUNCTION_OVERLOAD(trunc) #undef __SYCL_MATH_FUNCTION_OVERLOAD -// __SYCL_MATH_FUNCTION_OVERLOAD_FM cases use corresponding native +// __SYCL_MATH_FUNCTION_OVERLOAD_FM cases are replaced by corresponding native // implementations when the -ffast-math flag is used with float. #define __SYCL_MATH_FUNCTION_OVERLOAD_FM(NAME) \ template \ @@ -189,14 +189,15 @@ inline __SYCL_ALWAYS_INLINE return res; \ } -__SYCL_MATH_FUNCTION_3_OVERLOAD(mad) __SYCL_MATH_FUNCTION_3_OVERLOAD(mix) - __SYCL_MATH_FUNCTION_3_OVERLOAD(fma) +__SYCL_MATH_FUNCTION_3_OVERLOAD(mad) +__SYCL_MATH_FUNCTION_3_OVERLOAD(mix) +__SYCL_MATH_FUNCTION_3_OVERLOAD(fma) #undef __SYCL_MATH_FUNCTION_3_OVERLOAD - // svgenfloat acos (svgenfloat x) - template - detail::enable_if_t::value, T> acos(T x) __NOEXC { +// svgenfloat acos (svgenfloat x) +template +detail::enable_if_t::value, T> acos(T x) __NOEXC { return __sycl_std::__invoke_acos(x); } From e50cbfe7eb03423949df06e279ec6e1405aae10f Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 13 Sep 2022 15:52:14 +0100 Subject: [PATCH 21/34] format to pass lint. Signed-off-by: JackAKirk --- sycl/include/sycl/builtins.hpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 1f6612bfbcfcc..3e8ae3b800d64 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -189,15 +189,14 @@ inline __SYCL_ALWAYS_INLINE return res; \ } -__SYCL_MATH_FUNCTION_3_OVERLOAD(mad) -__SYCL_MATH_FUNCTION_3_OVERLOAD(mix) -__SYCL_MATH_FUNCTION_3_OVERLOAD(fma) +__SYCL_MATH_FUNCTION_3_OVERLOAD(mad) __SYCL_MATH_FUNCTION_3_OVERLOAD(mix) + __SYCL_MATH_FUNCTION_3_OVERLOAD(fma) #undef __SYCL_MATH_FUNCTION_3_OVERLOAD -// svgenfloat acos (svgenfloat x) -template -detail::enable_if_t::value, T> acos(T x) __NOEXC { + // svgenfloat acos (svgenfloat x) + template + detail::enable_if_t::value, T> acos(T x) __NOEXC { return __sycl_std::__invoke_acos(x); } From 67864ef9eac1d4c17a6bcf0c7cd0cbd51e5cc2fe Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 14 Sep 2022 20:47:07 +0100 Subject: [PATCH 22/34] svgenfloat comment corrections Signed-off-by: JackAKirk --- sycl/include/sycl/builtins.hpp | 156 ++++++++++++++++----------------- 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 3e8ae3b800d64..7408c1ddba832 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -674,9 +674,9 @@ clamp(T x, T minval, T maxval) __NOEXC { return __sycl_std::__invoke_fclamp(x, minval, maxval); } -// genfloath clamp (genfloath x, half minval, half maxval) -// genfloatf clamp (genfloatf x, float minval, float maxval) -// genfloatd clamp (genfloatd x, double minval, double maxval) +// vgenfloath clamp (vgenfloath x, half minval, half maxval) +// vgenfloatf clamp (vgenfloatf x, float minval, float maxval) +// vgenfloatd clamp (vgenfloatd x, double minval, double maxval) template detail::enable_if_t::value, T> clamp(T x, typename T::element_type minval, @@ -703,9 +703,9 @@ detail::enable_if_t::value, T>(max)(T x, T y) __NOEXC { return __sycl_std::__invoke_fmax_common(x, y); } -// genfloatf max (genfloatf x, float y) -// genfloatd max (genfloatd x, double y) -// genfloath max (genfloath x, half y) +// vgenfloatf max (vgenfloatf x, float y) +// vgenfloatd max (vgenfloatd x, double y) +// vgenfloath max (vgenfloath x, half y) template detail::enable_if_t::value, T>(max)( T x, typename T::element_type y) __NOEXC { @@ -718,9 +718,9 @@ detail::enable_if_t::value, T>(min)(T x, T y) __NOEXC { return __sycl_std::__invoke_fmin_common(x, y); } -// genfloatf min (genfloatf x, float y) -// genfloatd min (genfloatd x, double y) -// genfloath min (genfloath x, half y) +// vgenfloatf min (vgenfloatf x, float y) +// vgenfloatd min (vgenfloatd x, double y) +// vgenfloath min (vgenfloath x, half y) template detail::enable_if_t::value, T>(min)( T x, typename T::element_type y) __NOEXC { @@ -734,9 +734,9 @@ detail::enable_if_t::value, T> mix(T x, T y, return __sycl_std::__invoke_mix(x, y, a); } -// genfloatf mix (genfloatf x, genfloatf y, float a) -// genfloatd mix (genfloatd x, genfloatd y, double a) -// genfloatd mix (genfloath x, genfloath y, half a) +// vgenfloatf mix (vgenfloatf x, vgenfloatf y, float a) +// vgenfloatd mix (vgenfloatd x, vgenfloatd y, double a) +// vgenfloatd mix (vgenfloath x, vgenfloath y, half a) template detail::enable_if_t::value, T> mix(T x, T y, typename T::element_type a) __NOEXC { @@ -757,9 +757,9 @@ detail::enable_if_t::value, T> step(T edge, return __sycl_std::__invoke_step(edge, x); } -// genfloatf step (float edge, genfloatf x) -// genfloatd step (double edge, genfloatd x) -// genfloatd step (half edge, genfloath x) +// vgenfloatf step (float edge, vgenfloatf x) +// vgenfloatd step (double edge, vgenfloatd x) +// vgenfloatd step (half edge, vgenfloath x) template detail::enable_if_t::value, T> step(typename T::element_type edge, T x) __NOEXC { @@ -773,9 +773,9 @@ smoothstep(T edge0, T edge1, T x) __NOEXC { return __sycl_std::__invoke_smoothstep(edge0, edge1, x); } -// genfloatf smoothstep (float edge0, float edge1, genfloatf x) -// genfloatd smoothstep (double edge0, double edge1, genfloatd x) -// genfloath smoothstep (half edge0, half edge1, genfloath x) +// vgenfloatf smoothstep (float edge0, float edge1, vgenfloatf x) +// vgenfloatd smoothstep (double edge0, double edge1, vgenfloatd x) +// vgenfloath smoothstep (half edge0, half edge1, vgenfloath x) template detail::enable_if_t::value, T> smoothstep(typename T::element_type edge0, typename T::element_type edge1, @@ -1278,7 +1278,7 @@ fast_normalize(T p) __NOEXC { /* --------------- 4.13.7 Relational functions. Device version --------------*/ // int isequal (half x, half y) // shortn isequal (halfn x, halfn y) -// igeninteger32bit isequal (genfloatf x, genfloatf y) +// igeninteger32bit isequal (svgenfloatf x, svgenfloatf y) // int isequal (double x,double y); // longn isequal (doublen x, doublen y) template isequal(T x, T y) __NOEXC { // int isnotequal (half x, half y) // shortn isnotequal (halfn x, halfn y) -// igeninteger32bit isnotequal (genfloatf x, genfloatf y) +// igeninteger32bit isnotequal (svgenfloatf x, svgenfloatf y) // int isnotequal (double x, double y) // longn isnotequal (doublen x, doublen y) template isnotequal(T x, T y) __NOEXC { // int isgreater (half x, half y) // shortn isgreater (halfn x, halfn y) -// igeninteger32bit isgreater (genfloatf x, genfloatf y) +// igeninteger32bit isgreater (svgenfloatf x, svgenfloatf y) // int isgreater (double x, double y) // longn isgreater (doublen x, doublen y) template isgreater(T x, T y) __NOEXC { // int isgreaterequal (half x, half y) // shortn isgreaterequal (halfn x, halfn y) -// igeninteger32bit isgreaterequal (genfloatf x, genfloatf y) +// igeninteger32bit isgreaterequal (svgenfloatf x, svgenfloatf y) // int isgreaterequal (double x, double y) // longn isgreaterequal (doublen x, doublen y) template isgreaterequal(T x, T y) __NOEXC { // int isless (half x, half y) // shortn isless (halfn x, halfn y) -// igeninteger32bit isless (genfloatf x, genfloatf y) +// igeninteger32bit isless (svgenfloatf x, svgenfloatf y) // int isless (long x, long y) // longn isless (doublen x, doublen y) template isless(T x, T y) __NOEXC { // int islessequal (half x, half y) // shortn islessequal (halfn x, halfn y) -// igeninteger32bit islessequal (genfloatf x, genfloatf y) +// igeninteger32bit islessequal (svgenfloatf x, svgenfloatf y) // int islessequal (double x, double y) // longn islessequal (doublen x, doublen y) template islessequal(T x, T y) __NOEXC { // int islessgreater (half x, half y) // shortn islessgreater (halfn x, halfn y) -// igeninteger32bit islessgreater (genfloatf x, genfloatf y) +// igeninteger32bit islessgreater (svgenfloatf x, svgenfloatf y) // int islessgreater (double x, double y) // longn islessgreater (doublen x, doublen y) template islessgreater(T x, T y) __NOEXC { // int isfinite (half x) // shortn isfinite (halfn x) -// igeninteger32bit isfinite (genfloatf x) +// igeninteger32bit isfinite (svgenfloatf x) // int isfinite (double x) // longn isfinite (doublen x) template isfinite(T x) __NOEXC { // int isinf (half x) // shortn isinf (halfn x) -// igeninteger32bit isinf (genfloatf x) +// igeninteger32bit isinf (svgenfloatf x) // int isinf (double x) // longn isinf (doublen x) template isinf(T x) __NOEXC { // int isnan (half x) // shortn isnan (halfn x) -// igeninteger32bit isnan (genfloatf x) +// igeninteger32bit isnan (svgenfloatf x) // int isnan (double x) // longn isnan (doublen x) template isnan(T x) __NOEXC { // int isnormal (half x) // shortn isnormal (halfn x) -// igeninteger32bit isnormal (genfloatf x) +// igeninteger32bit isnormal (svgenfloatf x) // int isnormal (double x) // longn isnormal (doublen x) template isnormal(T x) __NOEXC { // int isordered (half x) // shortn isordered (halfn x, halfn y) -// igeninteger32bit isordered (genfloatf x, genfloatf y) +// igeninteger32bit isordered (svgenfloatf x, svgenfloatf y) // int isordered (double x, double y) // longn isordered (doublen x, doublen y) template isordered(T x, T y) __NOEXC { // int isunordered (half x, half y) // shortn isunordered (halfn x, halfn y) -// igeninteger32bit isunordered (genfloatf x, genfloatf y) +// igeninteger32bit isunordered (svgenfloatf x, svgenfloatf y) // int isunordered (double x, double y) // longn isunordered (doublen x, doublen y) template isunordered(T x, T y) __NOEXC { // int signbit (half x) // shortn signbit (halfn x) -// igeninteger32bit signbit (genfloatf x) +// igeninteger32bit signbit (svgenfloatf x) // int signbit (double) // longn signbit (doublen x) template (a, b, c); } -// genfloatf select (genfloatf a, genfloatf b, genint c) +// svgenfloatf select (svgenfloatf a, svgenfloatf b, genint c) template detail::enable_if_t< detail::is_svgenfloatf::value && detail::is_genint::value, T> @@ -1513,7 +1513,7 @@ select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_select(a, b, c); } -// genfloatf select (genfloatf a, genfloatf b, ugenint c) +// svgenfloatf select (svgenfloatf a, svgenfloatf b, ugenint c) template detail::enable_if_t< detail::is_svgenfloatf::value && detail::is_ugenint::value, T> @@ -1522,7 +1522,7 @@ select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_select(a, b, c); } -// genfloatd select (genfloatd a, genfloatd b, igeninteger64 c) +// svgenfloatd select (svgenfloatd a, svgenfloatd b, igeninteger64 c) template detail::enable_if_t::value && detail::is_igeninteger64bit::value, @@ -1532,7 +1532,7 @@ select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_select(a, b, c); } -// genfloatd select (genfloatd a, genfloatd b, ugeninteger64 c) +// svgenfloatd select (svgenfloatd a, svgenfloatd b, ugeninteger64 c) template detail::enable_if_t::value && detail::is_ugeninteger64bit::value, @@ -1542,7 +1542,7 @@ select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_select(a, b, c); } -// genfloath select (genfloath a, genfloath b, igeninteger16 c) +// svgenfloath select (svgenfloath a, svgenfloath b, igeninteger16 c) template detail::enable_if_t::value && detail::is_igeninteger16bit::value, @@ -1552,7 +1552,7 @@ select(T a, T b, T2 c) __NOEXC { return __sycl_std::__invoke_select(a, b, c); } -// genfloath select (genfloath a, genfloath b, ugeninteger16 c) +// svgenfloath select (svgenfloath a, svgenfloath b, ugeninteger16 c) template detail::enable_if_t::value && detail::is_ugeninteger16bit::value, @@ -1618,87 +1618,87 @@ __SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD(powr) #undef __SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD -// genfloatf cos (genfloatf x) +// svgenfloatf cos (svgenfloatf x) template detail::enable_if_t::value, T> cos(T x) __NOEXC { return __sycl_std::__invoke_native_cos(x); } -// genfloatf divide (genfloatf x, genfloatf y) +// svgenfloatf divide (svgenfloatf x, svgenfloatf y) template detail::enable_if_t::value, T> divide(T x, T y) __NOEXC { return __sycl_std::__invoke_native_divide(x, y); } -// genfloatf exp (genfloatf x) +// svgenfloatf exp (svgenfloatf x) template detail::enable_if_t::value, T> exp(T x) __NOEXC { return __sycl_std::__invoke_native_exp(x); } -// genfloatf exp2 (genfloatf x) +// svgenfloatf exp2 (svgenfloatf x) template detail::enable_if_t::value, T> exp2(T x) __NOEXC { return __sycl_std::__invoke_native_exp2(x); } -// genfloatf exp10 (genfloatf x) +// svgenfloatf exp10 (svgenfloatf x) template detail::enable_if_t::value, T> exp10(T x) __NOEXC { return __sycl_std::__invoke_native_exp10(x); } -// genfloatf log (genfloatf x) +// svgenfloatf log (svgenfloatf x) template detail::enable_if_t::value, T> log(T x) __NOEXC { return __sycl_std::__invoke_native_log(x); } -// genfloatf log2 (genfloatf x) +// svgenfloatf log2 (svgenfloatf x) template detail::enable_if_t::value, T> log2(T x) __NOEXC { return __sycl_std::__invoke_native_log2(x); } -// genfloatf log10 (genfloatf x) +// svgenfloatf log10 (svgenfloatf x) template detail::enable_if_t::value, T> log10(T x) __NOEXC { return __sycl_std::__invoke_native_log10(x); } -// genfloatf powr (genfloatf x, genfloatf y) +// svgenfloatf powr (svgenfloatf x, svgenfloatf y) template detail::enable_if_t::value, T> powr(T x, T y) __NOEXC { return __sycl_std::__invoke_native_powr(x, y); } -// genfloatf recip (genfloatf x) +// svgenfloatf recip (svgenfloatf x) template detail::enable_if_t::value, T> recip(T x) __NOEXC { return __sycl_std::__invoke_native_recip(x); } -// genfloatf rsqrt (genfloatf x) +// svgenfloatf rsqrt (svgenfloatf x) template detail::enable_if_t::value, T> rsqrt(T x) __NOEXC { return __sycl_std::__invoke_native_rsqrt(x); } -// genfloatf sin (genfloatf x) +// svgenfloatf sin (svgenfloatf x) template detail::enable_if_t::value, T> sin(T x) __NOEXC { return __sycl_std::__invoke_native_sin(x); } -// genfloatf sqrt (genfloatf x) +// svgenfloatf sqrt (svgenfloatf x) template detail::enable_if_t::value, T> sqrt(T x) __NOEXC { return __sycl_std::__invoke_native_sqrt(x); } -// genfloatf tan (genfloatf x) +// svgenfloatf tan (svgenfloatf x) template detail::enable_if_t::value, T> tan(T x) __NOEXC { return __sycl_std::__invoke_native_tan(x); @@ -1759,87 +1759,87 @@ __SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD(powr) #undef __SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD -// genfloatf cos (genfloatf x) +// svgenfloatf cos (svgenfloatf x) template detail::enable_if_t::value, T> cos(T x) __NOEXC { return __sycl_std::__invoke_half_cos(x); } -// genfloatf divide (genfloatf x, genfloatf y) +// svgenfloatf divide (svgenfloatf x, svgenfloatf y) template detail::enable_if_t::value, T> divide(T x, T y) __NOEXC { return __sycl_std::__invoke_half_divide(x, y); } -// genfloatf exp (genfloatf x) +// svgenfloatf exp (svgenfloatf x) template detail::enable_if_t::value, T> exp(T x) __NOEXC { return __sycl_std::__invoke_half_exp(x); } -// genfloatf exp2 (genfloatf x) +// svgenfloatf exp2 (svgenfloatf x) template detail::enable_if_t::value, T> exp2(T x) __NOEXC { return __sycl_std::__invoke_half_exp2(x); } -// genfloatf exp10 (genfloatf x) +// svgenfloatf exp10 (svgenfloatf x) template detail::enable_if_t::value, T> exp10(T x) __NOEXC { return __sycl_std::__invoke_half_exp10(x); } -// genfloatf log (genfloatf x) +// svgenfloatf log (svgenfloatf x) template detail::enable_if_t::value, T> log(T x) __NOEXC { return __sycl_std::__invoke_half_log(x); } -// genfloatf log2 (genfloatf x) +// svgenfloatf log2 (svgenfloatf x) template detail::enable_if_t::value, T> log2(T x) __NOEXC { return __sycl_std::__invoke_half_log2(x); } -// genfloatf log10 (genfloatf x) +// svgenfloatf log10 (svgenfloatf x) template detail::enable_if_t::value, T> log10(T x) __NOEXC { return __sycl_std::__invoke_half_log10(x); } -// genfloatf powr (genfloatf x, genfloatf y) +// svgenfloatf powr (svgenfloatf x, svgenfloatf y) template detail::enable_if_t::value, T> powr(T x, T y) __NOEXC { return __sycl_std::__invoke_half_powr(x, y); } -// genfloatf recip (genfloatf x) +// svgenfloatf recip (svgenfloatf x) template detail::enable_if_t::value, T> recip(T x) __NOEXC { return __sycl_std::__invoke_half_recip(x); } -// genfloatf rsqrt (genfloatf x) +// svgenfloatf rsqrt (svgenfloatf x) template detail::enable_if_t::value, T> rsqrt(T x) __NOEXC { return __sycl_std::__invoke_half_rsqrt(x); } -// genfloatf sin (genfloatf x) +// svgenfloatf sin (svgenfloatf x) template detail::enable_if_t::value, T> sin(T x) __NOEXC { return __sycl_std::__invoke_half_sin(x); } -// genfloatf sqrt (genfloatf x) +// svgenfloatf sqrt (svgenfloatf x) template detail::enable_if_t::value, T> sqrt(T x) __NOEXC { return __sycl_std::__invoke_half_sqrt(x); } -// genfloatf tan (genfloatf x) +// svgenfloatf tan (svgenfloatf x) template detail::enable_if_t::value, T> tan(T x) __NOEXC { return __sycl_std::__invoke_half_tan(x); @@ -1878,74 +1878,74 @@ inline __SYCL_ALWAYS_INLINE return native::powr(x, y); } -// genfloatf cos (genfloatf x) +// svgenfloatf cos (svgenfloatf x) template detail::enable_if_t::value, T> cos(T x) __NOEXC { return native::cos(x); } -// genfloatf exp (genfloatf x) +// svgenfloatf exp (svgenfloatf x) template detail::enable_if_t::value, T> exp(T x) __NOEXC { return native::exp(x); } -// genfloatf exp2 (genfloatf x) +// svgenfloatf exp2 (svgenfloatf x) template detail::enable_if_t::value, T> exp2(T x) __NOEXC { return native::exp2(x); } -// genfloatf exp10 (genfloatf x) +// svgenfloatf exp10 (svgenfloatf x) template detail::enable_if_t::value, T> exp10(T x) __NOEXC { return native::exp10(x); } -// genfloatf log(genfloatf x) +// svgenfloatf log(svgenfloatf x) template detail::enable_if_t::value, T> log(T x) __NOEXC { return native::log(x); } -// genfloatf log2 (genfloatf x) +// svgenfloatf log2 (svgenfloatf x) template detail::enable_if_t::value, T> log2(T x) __NOEXC { return native::log2(x); } -// genfloatf log10 (genfloatf x) +// svgenfloatf log10 (svgenfloatf x) template detail::enable_if_t::value, T> log10(T x) __NOEXC { return native::log10(x); } -// genfloatf powr (genfloatf x) +// svgenfloatf powr (svgenfloatf x) template detail::enable_if_t::value, T> powr(T x, T y) __NOEXC { return native::powr(x, y); } -// genfloatf rsqrt (genfloatf x) +// svgenfloatf rsqrt (svgenfloatf x) template detail::enable_if_t::value, T> rsqrt(T x) __NOEXC { return native::rsqrt(x); } -// genfloatf sin (genfloatf x) +// svgenfloatf sin (svgenfloatf x) template detail::enable_if_t::value, T> sin(T x) __NOEXC { return native::sin(x); } -// genfloatf sqrt (genfloatf x) +// svgenfloatf sqrt (svgenfloatf x) template detail::enable_if_t::value, T> sqrt(T x) __NOEXC { return native::sqrt(x); } -// genfloatf tan (genfloatf x) +// svgenfloatf tan (svgenfloatf x) template detail::enable_if_t::value, T> tan(T x) __NOEXC { return native::tan(x); From 12de028eedcbd5ca500cf6716a90ca3a2d57f526 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 16 Sep 2022 13:39:15 +0100 Subject: [PATCH 23/34] Add alignment attr for Windows. Signed-off-by: JackAKirk --- sycl/include/sycl/marray.hpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/sycl/include/sycl/marray.hpp b/sycl/include/sycl/marray.hpp index 26ccbe94d1f74..b4f0757b49ec5 100644 --- a/sycl/include/sycl/marray.hpp +++ b/sycl/include/sycl/marray.hpp @@ -40,11 +40,22 @@ constexpr std::size_t vecAlignment() { return res; } +#if defined(_WIN32) || defined(_WIN64) +#define MARRAY_WINDOWS_ALIGN_ATTR \ + __declspec(align(vecAlignment())) +#define MARRAY_LINUX_ALIGN_ATTR +#else +#define MARRAY_WINDOWS_ALIGN_ATTR +#define MARRAY_LINUX_ALIGN_ATTR \ + __attribute__((aligned(vecAlignment()))) +#endif + /// Provides a cross-patform math array class template that works on /// SYCL devices as well as in host C++ code. /// /// \ingroup sycl_api -template class marray { +template +class MARRAY_WINDOWS_ALIGN_ATTR marray { using DataT = Type; public: @@ -321,7 +332,7 @@ template class marray { } return Ret; } -} __attribute__((aligned(vecAlignment()))); +} MARRAY_LINUX_ALIGN_ATTR; #define __SYCL_MAKE_MARRAY_ALIAS(ALIAS, TYPE, N) \ using ALIAS##N = sycl::marray; @@ -359,5 +370,8 @@ __SYCL_MAKE_MARRAY_ALIASES_FOR_MARRAY_LENGTH(16) #undef __SYCL_MAKE_MARRAY_ALIASES_FOR_SIGNED_AND_UNSIGNED_TYPES #undef __SYCL_MAKE_MARRAY_ALIASES_FOR_MARRAY_LENGTH +#undef MARRAY_LINUX_ALIGN_ATTR +#undef MARRAY_WINDOWS_ALIGN_ATTR + } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl From f574019b14bc85df601aca528beaf3246690143c Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 21 Sep 2022 17:35:29 +0100 Subject: [PATCH 24/34] Made impl vs2019 compiler compatible. Signed-off-by: JackAKirk --- sycl/include/sycl/marray.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/marray.hpp b/sycl/include/sycl/marray.hpp index b4f0757b49ec5..be6144ca915d8 100644 --- a/sycl/include/sycl/marray.hpp +++ b/sycl/include/sycl/marray.hpp @@ -17,9 +17,9 @@ namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { -template -constexpr std::size_t vecAlignment() { +template constexpr std::size_t vecAlignment() { static_assert(N > 0, "Invalid number of elements."); + size_t SizeOfT = sizeof(T); static_assert(SizeOfT > 0, "Invalid size of T."); // First find the "previous" vector num elements. size_t res = N >= 16 ? 16 @@ -42,12 +42,12 @@ constexpr std::size_t vecAlignment() { #if defined(_WIN32) || defined(_WIN64) #define MARRAY_WINDOWS_ALIGN_ATTR \ - __declspec(align(vecAlignment())) + __declspec(align(vecAlignment())) #define MARRAY_LINUX_ALIGN_ATTR #else #define MARRAY_WINDOWS_ALIGN_ATTR #define MARRAY_LINUX_ALIGN_ATTR \ - __attribute__((aligned(vecAlignment()))) + __attribute__((aligned(vecAlignment()))) #endif /// Provides a cross-patform math array class template that works on From e4cbfaf3a8bdbbe38c6d8acb1efc456b105d424a Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 21 Sep 2022 19:06:42 +0100 Subject: [PATCH 25/34] Make SizeOfT constexpr Signed-off-by: JackAKirk --- sycl/include/sycl/marray.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/marray.hpp b/sycl/include/sycl/marray.hpp index be6144ca915d8..6556382508183 100644 --- a/sycl/include/sycl/marray.hpp +++ b/sycl/include/sycl/marray.hpp @@ -19,7 +19,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) { template constexpr std::size_t vecAlignment() { static_assert(N > 0, "Invalid number of elements."); - size_t SizeOfT = sizeof(T); + constexpr size_t SizeOfT = sizeof(T); static_assert(SizeOfT > 0, "Invalid size of T."); // First find the "previous" vector num elements. size_t res = N >= 16 ? 16 From 9538849ac639c6d2fb53bc8c58f0f6e82504f314 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 22 Sep 2022 11:38:42 +0100 Subject: [PATCH 26/34] Add comments to explain exp2/tanh impl. Signed-off-by: JackAKirk --- sycl/include/sycl/ext/oneapi/experimental/builtins.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index 2e29f49b11241..03a8a441d0312 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -93,6 +93,8 @@ int printf(const FormatT *__format, Args... args) { namespace native { // genfloatfh tanh (genfloatfh x) +// sycl::native::tanh is only implemented on nvptx backend so far. For other +// backends we revert to the sycl::tanh impl. template inline __SYCL_ALWAYS_INLINE sycl::detail::enable_if_t::value || @@ -140,6 +142,8 @@ tanh(sycl::marray x) __NOEXC { } // genfloath exp2 (genfloath x) +// sycl::native::exp2 (using half) is only implemented on nvptx backend so far. +// For other backends we revert to the sycl::exp2 impl. template inline __SYCL_ALWAYS_INLINE sycl::detail::enable_if_t::value, T> From 4824d63e89d15348acd9a93ceeee0f7df7b91433 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Tue, 18 Oct 2022 02:39:49 -0700 Subject: [PATCH 27/34] Revert marray alignment change Signed-off-by: JackAKirk --- sycl/include/sycl/marray.hpp | 43 +++--------------------------------- 1 file changed, 3 insertions(+), 40 deletions(-) diff --git a/sycl/include/sycl/marray.hpp b/sycl/include/sycl/marray.hpp index 6556382508183..d17dace286264 100644 --- a/sycl/include/sycl/marray.hpp +++ b/sycl/include/sycl/marray.hpp @@ -17,45 +17,11 @@ namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { -template constexpr std::size_t vecAlignment() { - static_assert(N > 0, "Invalid number of elements."); - constexpr size_t SizeOfT = sizeof(T); - static_assert(SizeOfT > 0, "Invalid size of T."); - // First find the "previous" vector num elements. - size_t res = N >= 16 ? 16 - : N >= 8 ? 8 - : N >= 4 ? 4 - : N >= 3 ? 3 - : N >= 2 ? 2 - : 1; - // Then calculate the alignment size in bytes, making sure it's power of 2. - res *= SizeOfT; - res--; - res |= res >> 1; - res |= res >> 2; - res |= res >> 4; - res |= res >> 8; - res |= res >> 16; - res++; - return res; -} - -#if defined(_WIN32) || defined(_WIN64) -#define MARRAY_WINDOWS_ALIGN_ATTR \ - __declspec(align(vecAlignment())) -#define MARRAY_LINUX_ALIGN_ATTR -#else -#define MARRAY_WINDOWS_ALIGN_ATTR -#define MARRAY_LINUX_ALIGN_ATTR \ - __attribute__((aligned(vecAlignment()))) -#endif - -/// Provides a cross-patform math array class template that works on +/// Provides a cross-platform math array class template that works on /// SYCL devices as well as in host C++ code. /// /// \ingroup sycl_api -template -class MARRAY_WINDOWS_ALIGN_ATTR marray { +template class marray { using DataT = Type; public: @@ -332,7 +298,7 @@ class MARRAY_WINDOWS_ALIGN_ATTR marray { } return Ret; } -} MARRAY_LINUX_ALIGN_ATTR; +}; #define __SYCL_MAKE_MARRAY_ALIAS(ALIAS, TYPE, N) \ using ALIAS##N = sycl::marray; @@ -370,8 +336,5 @@ __SYCL_MAKE_MARRAY_ALIASES_FOR_MARRAY_LENGTH(16) #undef __SYCL_MAKE_MARRAY_ALIASES_FOR_SIGNED_AND_UNSIGNED_TYPES #undef __SYCL_MAKE_MARRAY_ALIASES_FOR_MARRAY_LENGTH -#undef MARRAY_LINUX_ALIGN_ATTR -#undef MARRAY_WINDOWS_ALIGN_ATTR - } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl From a409474702e438dc5c498d065db5419a8305c72b Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 21 Oct 2022 07:05:48 -0700 Subject: [PATCH 28/34] Added non-native fallback comments for marray cases. Signed-off-by: JackAKirk --- sycl/include/sycl/ext/oneapi/experimental/builtins.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index 03a8a441d0312..23155fcb04c52 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -110,11 +110,13 @@ inline __SYCL_ALWAYS_INLINE #endif } -// These marray math function implementations use vectorizations of +// The marray math function implementations use vectorizations of // size two as a simple general optimization. A more complex implementation // using larger vectorizations for large marray sizes is possible; however more // testing is required in order to ascertain the performance implications for // all backends. +// sycl::native::tanh is only implemented on nvptx backend so far. For other +// backends we revert to the sycl::tanh impl. template inline __SYCL_ALWAYS_INLINE std::enable_if_t::value || std::is_same::value, @@ -157,6 +159,8 @@ inline __SYCL_ALWAYS_INLINE #endif } +// sycl::native::exp2 (using half) is only implemented on nvptx backend so far. +// For other backends we revert to the sycl::exp2 impl. template inline __SYCL_ALWAYS_INLINE sycl::marray exp2(sycl::marray x) __NOEXC { From e0cbd725f8fceccc9aedba29e11fa60306b1ecbd Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Mon, 31 Oct 2022 11:02:35 +0000 Subject: [PATCH 29/34] c++14 -> c++17 Signed-off-by: JackAKirk --- sycl/include/sycl/builtins.hpp | 6 +++--- .../sycl/ext/oneapi/experimental/builtins.hpp | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 7408c1ddba832..41187f6c60f03 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -37,7 +37,7 @@ namespace __sycl_std = __host_std; #define __FAST_MATH_GENFLOAT(T) \ (detail::is_svgenfloatd::value || detail::is_svgenfloath::value) #define __FAST_MATH_SGENFLOAT(T) \ - (std::is_same::value || std::is_same::value) + (std::is_same_v || std::is_same_v) #else #define __FAST_MATH_GENFLOAT(T) (detail::is_svgenfloat::value) #define __FAST_MATH_SGENFLOAT(T) (detail::is_sgenfloat::value) @@ -1853,7 +1853,7 @@ detail::enable_if_t::value, T> tan(T x) __NOEXC { #define __SYCL_MATH_FUNCTION_OVERLOAD_FM(NAME) \ template \ inline __SYCL_ALWAYS_INLINE \ - std::enable_if_t::value, marray> \ + std::enable_if_t, marray> \ NAME(marray x) __NOEXC { \ return native::NAME(x); \ } @@ -1873,7 +1873,7 @@ __SYCL_MATH_FUNCTION_OVERLOAD_FM(rsqrt) template inline __SYCL_ALWAYS_INLINE - std::enable_if_t::value, marray> + std::enable_if_t, marray> powr(marray x, marray y) __NOEXC { return native::powr(x, y); } diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index 23155fcb04c52..e7bed37e3fea1 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -118,8 +118,8 @@ inline __SYCL_ALWAYS_INLINE // sycl::native::tanh is only implemented on nvptx backend so far. For other // backends we revert to the sycl::tanh impl. template -inline __SYCL_ALWAYS_INLINE std::enable_if_t::value || - std::is_same::value, +inline __SYCL_ALWAYS_INLINE std::enable_if_t || + std::is_same_v, sycl::marray> tanh(sycl::marray x) __NOEXC { sycl::marray res; @@ -188,7 +188,7 @@ exp2(sycl::marray x) __NOEXC { } // namespace native template -std::enable_if_t::value, T> fabs(T x) { +std::enable_if_t, T> fabs(T x) { #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) return bfloat16::from_bits(__clc_fabs(x.raw())); #else @@ -220,7 +220,7 @@ sycl::marray fabs(sycl::marray x) { } template -std::enable_if_t::value, T> fmin(T x, T y) { +std::enable_if_t, T> fmin(T x, T y) { #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) return bfloat16::from_bits(__clc_fmin(x.raw(), y.raw())); #else @@ -258,7 +258,7 @@ sycl::marray fmin(sycl::marray x, } template -std::enable_if_t::value, T> fmax(T x, T y) { +std::enable_if_t, T> fmax(T x, T y) { #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) return bfloat16::from_bits(__clc_fmax(x.raw(), y.raw())); #else @@ -295,7 +295,7 @@ sycl::marray fmax(sycl::marray x, } template -std::enable_if_t::value, T> fma(T x, T y, T z) { +std::enable_if_t, T> fma(T x, T y, T z) { #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) return bfloat16::from_bits(__clc_fma(x.raw(), y.raw(), z.raw())); #else From a0cdf603d8643649caf112cf644b120ce1bf3885 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 1 Dec 2022 19:45:39 +0000 Subject: [PATCH 30/34] Update host error msg, switch to sycl::bfloat16. Signed-off-by: JackAKirk --- .../sycl/ext/oneapi/experimental/builtins.hpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index 7e1a4b4f91804..a87ec5a8b0a18 100755 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -15,7 +15,7 @@ #include #include -#include +#include // TODO Decide whether to mark functions with this attribute. #define __NOEXC /*noexcept*/ @@ -185,7 +185,7 @@ std::enable_if_t, T> fabs(T x) { return sycl::bfloat16::from_bits(__clc_fabs(x.raw())); #else std::ignore = x; - throw runtime_error("sycl::bfloat16 is not currently supported on the host device.", + throw runtime_error("bfloat16 math functions are not currently supported on the host device.", PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -206,7 +206,7 @@ sycl::marray fabs(sycl::marray x) { return res; #else std::ignore = x; - throw runtime_error("sycl::bfloat16 is not currently supported on the host device.", + throw runtime_error("bfloat16 math functions are not currently supported on the host device.", PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -218,7 +218,7 @@ std::enable_if_t, T> fmin(T x, T y) { #else std::ignore = x; std::ignore = y; - throw runtime_error("sycl::bfloat16 is not currently supported on the host device.", + throw runtime_error("bfloat16 math functions are not currently supported on the host device.", PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -244,7 +244,7 @@ sycl::marray fmin(sycl::marray x, #else std::ignore = x; std::ignore = y; - throw runtime_error("sycl::bfloat16 is not currently supported on the host device.", + throw runtime_error("bfloat16 math functions are not currently supported on the host device.", PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -256,7 +256,7 @@ std::enable_if_t, T> fmax(T x, T y) { #else std::ignore = x; std::ignore = y; - throw runtime_error("sycl::bfloat16 is not currently supported on the host device.", + throw runtime_error("bfloat16 math functions are not currently supported on the host device.", PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -281,20 +281,20 @@ sycl::marray fmax(sycl::marray x, #else std::ignore = x; std::ignore = y; - throw runtime_error("sycl::bfloat16 is not currently supported on the host device.", + throw runtime_error("bfloat16 math functions are not currently supported on the host device.", PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } template -std::enable_if_t, T> fma(T x, T y, T z) { +std::enable_if_t, T> fma(T x, T y, T z) { #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) return sycl::bfloat16::from_bits(__clc_fma(x.raw(), y.raw(), z.raw())); #else std::ignore = x; std::ignore = y; std::ignore = z; - throw runtime_error("sycl::bfloat16 is not currently supported on the host device.", + throw runtime_error("bfloat16 math functions are not currently supported on the host device.", PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -322,7 +322,7 @@ sycl::marray fma(sycl::marray x, std::ignore = x; std::ignore = y; std::ignore = z; - throw runtime_error("sycl::bfloat16 is not currently supported on the host device.", + throw runtime_error("bfloat16 math functions are not currently supported on the host device.", PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } From 1d88f2d2da1c8e6c3b3df7cbac417010958b2321 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 1 Dec 2022 19:56:29 +0000 Subject: [PATCH 31/34] format and used bfloat16 without fully qualified name. Signed-off-by: JackAKirk --- .../sycl/ext/oneapi/experimental/builtins.hpp | 88 ++++++++++--------- 1 file changed, 48 insertions(+), 40 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index a87ec5a8b0a18..86b5c0c6f8270 100755 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -180,20 +180,21 @@ exp2(sycl::marray x) __NOEXC { } // namespace native template -std::enable_if_t, T> fabs(T x) { +std::enable_if_t, T> fabs(T x) { #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - return sycl::bfloat16::from_bits(__clc_fabs(x.raw())); + return bfloat16::from_bits(__clc_fabs(x.raw())); #else std::ignore = x; - throw runtime_error("bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); + throw runtime_error( + "bfloat16 math functions are not currently supported on the host device.", + PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } template -sycl::marray fabs(sycl::marray x) { +sycl::marray fabs(sycl::marray x) { #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - sycl::marray res; + sycl::marray res; for (size_t i = 0; i < N / 2; i++) { auto partial_res = __clc_fabs(detail::to_uint32_t(x, i * 2)); @@ -201,33 +202,35 @@ sycl::marray fabs(sycl::marray x) { } if (N % 2) { - res[N - 1] = sycl::bfloat16::from_bits(__clc_fabs(x[N - 1].raw())); + res[N - 1] = bfloat16::from_bits(__clc_fabs(x[N - 1].raw())); } return res; #else std::ignore = x; - throw runtime_error("bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); + throw runtime_error( + "bfloat16 math functions are not currently supported on the host device.", + PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } template -std::enable_if_t, T> fmin(T x, T y) { +std::enable_if_t, T> fmin(T x, T y) { #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - return sycl::bfloat16::from_bits(__clc_fmin(x.raw(), y.raw())); + return bfloat16::from_bits(__clc_fmin(x.raw(), y.raw())); #else std::ignore = x; std::ignore = y; - throw runtime_error("bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); + throw runtime_error( + "bfloat16 math functions are not currently supported on the host device.", + PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } template -sycl::marray fmin(sycl::marray x, - sycl::marray y) { +sycl::marray fmin(sycl::marray x, + sycl::marray y) { #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - sycl::marray res; + sycl::marray res; for (size_t i = 0; i < N / 2; i++) { auto partial_res = __clc_fmin(detail::to_uint32_t(x, i * 2), @@ -237,35 +240,37 @@ sycl::marray fmin(sycl::marray x, if (N % 2) { res[N - 1] = - sycl::bfloat16::from_bits(__clc_fmin(x[N - 1].raw(), y[N - 1].raw())); + bfloat16::from_bits(__clc_fmin(x[N - 1].raw(), y[N - 1].raw())); } return res; #else std::ignore = x; std::ignore = y; - throw runtime_error("bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); + throw runtime_error( + "bfloat16 math functions are not currently supported on the host device.", + PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } template -std::enable_if_t, T> fmax(T x, T y) { +std::enable_if_t, T> fmax(T x, T y) { #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - return sycl::bfloat16::from_bits(__clc_fmax(x.raw(), y.raw())); + return bfloat16::from_bits(__clc_fmax(x.raw(), y.raw())); #else std::ignore = x; std::ignore = y; - throw runtime_error("bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); + throw runtime_error( + "bfloat16 math functions are not currently supported on the host device.", + PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } template -sycl::marray fmax(sycl::marray x, - sycl::marray y) { +sycl::marray fmax(sycl::marray x, + sycl::marray y) { #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - sycl::marray res; + sycl::marray res; for (size_t i = 0; i < N / 2; i++) { auto partial_res = __clc_fmax(detail::to_uint32_t(x, i * 2), @@ -275,36 +280,38 @@ sycl::marray fmax(sycl::marray x, if (N % 2) { res[N - 1] = - sycl::bfloat16::from_bits(__clc_fmax(x[N - 1].raw(), y[N - 1].raw())); + bfloat16::from_bits(__clc_fmax(x[N - 1].raw(), y[N - 1].raw())); } return res; #else std::ignore = x; std::ignore = y; - throw runtime_error("bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); + throw runtime_error( + "bfloat16 math functions are not currently supported on the host device.", + PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } template -std::enable_if_t, T> fma(T x, T y, T z) { +std::enable_if_t, T> fma(T x, T y, T z) { #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - return sycl::bfloat16::from_bits(__clc_fma(x.raw(), y.raw(), z.raw())); + return bfloat16::from_bits(__clc_fma(x.raw(), y.raw(), z.raw())); #else std::ignore = x; std::ignore = y; std::ignore = z; - throw runtime_error("bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); + throw runtime_error( + "bfloat16 math functions are not currently supported on the host device.", + PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } template -sycl::marray fma(sycl::marray x, - sycl::marray y, - sycl::marray z) { +sycl::marray fma(sycl::marray x, + sycl::marray y, + sycl::marray z) { #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - sycl::marray res; + sycl::marray res; for (size_t i = 0; i < N / 2; i++) { auto partial_res = @@ -314,7 +321,7 @@ sycl::marray fma(sycl::marray x, } if (N % 2) { - res[N - 1] = sycl::bfloat16::from_bits( + res[N - 1] = bfloat16::from_bits( __clc_fma(x[N - 1].raw(), y[N - 1].raw(), z[N - 1].raw())); } return res; @@ -322,8 +329,9 @@ sycl::marray fma(sycl::marray x, std::ignore = x; std::ignore = y; std::ignore = z; - throw runtime_error("bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); + throw runtime_error( + "bfloat16 math functions are not currently supported on the host device.", + PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } From 877252602ca754e30df50095ffacad02aa48383b Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 1 Dec 2022 20:43:55 +0000 Subject: [PATCH 32/34] Remove duplicated bfloat16 math functs. Signed-off-by: JackAKirk --- .../ext/oneapi/experimental/bfloat16_math.hpp | 16 +- .../sycl/ext/oneapi/experimental/builtins.hpp | 156 ------------------ 2 files changed, 8 insertions(+), 164 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp b/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp index 8bce9d045eb59..53b2486e3ad92 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp @@ -39,7 +39,7 @@ std::enable_if_t::value, T> fabs(T x) { return oneapi::detail::bitsToBfloat16(__clc_fabs(XBits)); #else std::ignore = x; - throw runtime_error("bfloat16 is not currently supported on the host device.", + throw runtime_error("bfloat16 math functions are not currently supported on the host device.", PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -62,7 +62,7 @@ sycl::marray fabs(sycl::marray x) { return res; #else std::ignore = x; - throw runtime_error("bfloat16 is not currently supported on the host device.", + throw runtime_error("bfloat16 math functions are not currently supported on the host device.", PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -76,7 +76,7 @@ std::enable_if_t::value, T> fmin(T x, T y) { #else std::ignore = x; std::ignore = y; - throw runtime_error("bfloat16 is not currently supported on the host device.", + throw runtime_error("bfloat16 math functions are not currently supported on the host device.", PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -105,7 +105,7 @@ sycl::marray fmin(sycl::marray x, #else std::ignore = x; std::ignore = y; - throw runtime_error("bfloat16 is not currently supported on the host device.", + throw runtime_error("bfloat16 math functions are not currently supported on the host device.", PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -119,7 +119,7 @@ std::enable_if_t::value, T> fmax(T x, T y) { #else std::ignore = x; std::ignore = y; - throw runtime_error("bfloat16 is not currently supported on the host device.", + throw runtime_error("bfloat16 math functions are not currently supported on the host device.", PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -147,7 +147,7 @@ sycl::marray fmax(sycl::marray x, #else std::ignore = x; std::ignore = y; - throw runtime_error("bfloat16 is not currently supported on the host device.", + throw runtime_error("bfloat16 math functions are not currently supported on the host device.", PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -163,7 +163,7 @@ std::enable_if_t::value, T> fma(T x, T y, T z) { std::ignore = x; std::ignore = y; std::ignore = z; - throw runtime_error("bfloat16 is not currently supported on the host device.", + throw runtime_error("bfloat16 math functions are not currently supported on the host device.", PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -196,7 +196,7 @@ sycl::marray fma(sycl::marray x, std::ignore = x; std::ignore = y; std::ignore = z; - throw runtime_error("bfloat16 is not currently supported on the host device.", + throw runtime_error("bfloat16 math functions are not currently supported on the host device.", PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index 86b5c0c6f8270..30d2612854a11 100755 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -179,162 +179,6 @@ exp2(sycl::marray x) __NOEXC { } // namespace native -template -std::enable_if_t, T> fabs(T x) { -#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - return bfloat16::from_bits(__clc_fabs(x.raw())); -#else - std::ignore = x; - throw runtime_error( - "bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) -} - -template -sycl::marray fabs(sycl::marray x) { -#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - sycl::marray res; - - for (size_t i = 0; i < N / 2; i++) { - auto partial_res = __clc_fabs(detail::to_uint32_t(x, i * 2)); - std::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t)); - } - - if (N % 2) { - res[N - 1] = bfloat16::from_bits(__clc_fabs(x[N - 1].raw())); - } - return res; -#else - std::ignore = x; - throw runtime_error( - "bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) -} - -template -std::enable_if_t, T> fmin(T x, T y) { -#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - return bfloat16::from_bits(__clc_fmin(x.raw(), y.raw())); -#else - std::ignore = x; - std::ignore = y; - throw runtime_error( - "bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) -} - -template -sycl::marray fmin(sycl::marray x, - sycl::marray y) { -#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - sycl::marray res; - - for (size_t i = 0; i < N / 2; i++) { - auto partial_res = __clc_fmin(detail::to_uint32_t(x, i * 2), - detail::to_uint32_t(y, i * 2)); - std::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t)); - } - - if (N % 2) { - res[N - 1] = - bfloat16::from_bits(__clc_fmin(x[N - 1].raw(), y[N - 1].raw())); - } - - return res; -#else - std::ignore = x; - std::ignore = y; - throw runtime_error( - "bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) -} - -template -std::enable_if_t, T> fmax(T x, T y) { -#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - return bfloat16::from_bits(__clc_fmax(x.raw(), y.raw())); -#else - std::ignore = x; - std::ignore = y; - throw runtime_error( - "bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) -} - -template -sycl::marray fmax(sycl::marray x, - sycl::marray y) { -#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - sycl::marray res; - - for (size_t i = 0; i < N / 2; i++) { - auto partial_res = __clc_fmax(detail::to_uint32_t(x, i * 2), - detail::to_uint32_t(y, i * 2)); - std::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t)); - } - - if (N % 2) { - res[N - 1] = - bfloat16::from_bits(__clc_fmax(x[N - 1].raw(), y[N - 1].raw())); - } - return res; -#else - std::ignore = x; - std::ignore = y; - throw runtime_error( - "bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) -} - -template -std::enable_if_t, T> fma(T x, T y, T z) { -#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - return bfloat16::from_bits(__clc_fma(x.raw(), y.raw(), z.raw())); -#else - std::ignore = x; - std::ignore = y; - std::ignore = z; - throw runtime_error( - "bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) -} - -template -sycl::marray fma(sycl::marray x, - sycl::marray y, - sycl::marray z) { -#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - sycl::marray res; - - for (size_t i = 0; i < N / 2; i++) { - auto partial_res = - __clc_fma(detail::to_uint32_t(x, i * 2), detail::to_uint32_t(y, i * 2), - detail::to_uint32_t(z, i * 2)); - std::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t)); - } - - if (N % 2) { - res[N - 1] = bfloat16::from_bits( - __clc_fma(x[N - 1].raw(), y[N - 1].raw(), z[N - 1].raw())); - } - return res; -#else - std::ignore = x; - std::ignore = y; - std::ignore = z; - throw runtime_error( - "bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) -} - } // namespace experimental } // namespace oneapi } // namespace ext From db896f29b465efa821a7565e8b20c3adfb7ff724 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 2 Dec 2022 09:26:24 +0000 Subject: [PATCH 33/34] Format. Signed-off-by: JackAKirk --- .../ext/oneapi/experimental/bfloat16_math.hpp | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp b/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp index 53b2486e3ad92..f6d0039780153 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp @@ -39,8 +39,9 @@ std::enable_if_t::value, T> fabs(T x) { return oneapi::detail::bitsToBfloat16(__clc_fabs(XBits)); #else std::ignore = x; - throw runtime_error("bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); + throw runtime_error( + "bfloat16 math functions are not currently supported on the host device.", + PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -62,8 +63,9 @@ sycl::marray fabs(sycl::marray x) { return res; #else std::ignore = x; - throw runtime_error("bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); + throw runtime_error( + "bfloat16 math functions are not currently supported on the host device.", + PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -76,8 +78,9 @@ std::enable_if_t::value, T> fmin(T x, T y) { #else std::ignore = x; std::ignore = y; - throw runtime_error("bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); + throw runtime_error( + "bfloat16 math functions are not currently supported on the host device.", + PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -105,8 +108,9 @@ sycl::marray fmin(sycl::marray x, #else std::ignore = x; std::ignore = y; - throw runtime_error("bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); + throw runtime_error( + "bfloat16 math functions are not currently supported on the host device.", + PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -119,8 +123,9 @@ std::enable_if_t::value, T> fmax(T x, T y) { #else std::ignore = x; std::ignore = y; - throw runtime_error("bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); + throw runtime_error( + "bfloat16 math functions are not currently supported on the host device.", + PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -147,8 +152,9 @@ sycl::marray fmax(sycl::marray x, #else std::ignore = x; std::ignore = y; - throw runtime_error("bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); + throw runtime_error( + "bfloat16 math functions are not currently supported on the host device.", + PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -163,8 +169,9 @@ std::enable_if_t::value, T> fma(T x, T y, T z) { std::ignore = x; std::ignore = y; std::ignore = z; - throw runtime_error("bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); + throw runtime_error( + "bfloat16 math functions are not currently supported on the host device.", + PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } @@ -196,8 +203,9 @@ sycl::marray fma(sycl::marray x, std::ignore = x; std::ignore = y; std::ignore = z; - throw runtime_error("bfloat16 math functions are not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); + throw runtime_error( + "bfloat16 math functions are not currently supported on the host device.", + PI_ERROR_INVALID_DEVICE); #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) } From 201ae24117f06bdb00e393f2000a715f727cd1d1 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Fri, 2 Dec 2022 09:32:27 +0000 Subject: [PATCH 34/34] format Signed-off-by: JackAKirk --- sycl/include/sycl/ext/oneapi/experimental/builtins.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index 30d2612854a11..479ca9032d5f1 100755 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -110,10 +110,10 @@ inline __SYCL_ALWAYS_INLINE // sycl::native::tanh is only implemented on nvptx backend so far. For other // backends we revert to the sycl::tanh impl. template -inline __SYCL_ALWAYS_INLINE std::enable_if_t || - std::is_same_v, - sycl::marray> -tanh(sycl::marray x) __NOEXC { +inline __SYCL_ALWAYS_INLINE + std::enable_if_t || std::is_same_v, + sycl::marray> + tanh(sycl::marray x) __NOEXC { sycl::marray res; #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) #define FUNC_VEC native::tanh