diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 42ed4b062e6b4..72334378348fc 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -29,414 +29,6 @@ extern __DPCPP_SYCL_EXTERNAL_LIBC int strncmp(const char *s1, const char *s2, size_t n); extern __DPCPP_SYCL_EXTERNAL_LIBC int rand(); extern __DPCPP_SYCL_EXTERNAL_LIBC void srand(unsigned int seed); -extern __DPCPP_SYCL_EXTERNAL long long int __imf_llmax(long long int x, - long long int y); -extern __DPCPP_SYCL_EXTERNAL long long int __imf_llmin(long long int x, - long long int y); -extern __DPCPP_SYCL_EXTERNAL int __imf_max(int x, int y); -extern __DPCPP_SYCL_EXTERNAL int __imf_min(int x, int y); -extern __DPCPP_SYCL_EXTERNAL unsigned long long int -__imf_ullmax(unsigned long long int x, unsigned long long int y); -extern __DPCPP_SYCL_EXTERNAL unsigned long long int -__imf_ullmin(unsigned long long int x, unsigned long long int y); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_umax(unsigned int x, - unsigned int y); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_umin(unsigned int x, - unsigned int y); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_brev(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL unsigned long long int -__imf_brevll(unsigned long long int x); -extern __DPCPP_SYCL_EXTERNAL unsigned int -__imf_byte_perm(unsigned int x, unsigned int y, unsigned int s); -extern __DPCPP_SYCL_EXTERNAL int __imf_ffs(int x); -extern __DPCPP_SYCL_EXTERNAL int __imf_ffsll(long long int x); -extern __DPCPP_SYCL_EXTERNAL int __imf_clz(int x); -extern __DPCPP_SYCL_EXTERNAL int __imf_clzll(long long int x); -extern __DPCPP_SYCL_EXTERNAL int __imf_popc(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL int __imf_popcll(unsigned long long int x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_sad(int x, int y, - unsigned int z); -extern __DPCPP_SYCL_EXTERNAL unsigned int -__imf_usad(unsigned int x, unsigned int y, unsigned int z); -extern __DPCPP_SYCL_EXTERNAL int __imf_rhadd(int x, int y); -extern __DPCPP_SYCL_EXTERNAL int __imf_hadd(int x, int y); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_urhadd(unsigned int x, - unsigned int y); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_uhadd(unsigned int x, - unsigned int y); -extern __DPCPP_SYCL_EXTERNAL int __imf_mul24(int x, int y); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_umul24(unsigned int x, - unsigned int y); -extern __DPCPP_SYCL_EXTERNAL int __imf_mulhi(int x, int y); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_umulhi(unsigned int x, - unsigned int y); -extern __DPCPP_SYCL_EXTERNAL long long int __imf_mul64hi(long long int x, - long long int y); -extern __DPCPP_SYCL_EXTERNAL unsigned long long int -__imf_umul64hi(unsigned long long int x, unsigned long long int y); -extern __DPCPP_SYCL_EXTERNAL int __imf_abs(int x); -extern __DPCPP_SYCL_EXTERNAL long long int __imf_llabs(long long int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_saturatef(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_fmaf(float x, float y, float z); -extern __DPCPP_SYCL_EXTERNAL float __imf_fabsf(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_floorf(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_ceilf(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_truncf(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_rintf(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_nearbyintf(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_sqrtf(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_rsqrtf(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_invf(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_fmaxf(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fminf(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_copysignf(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fast_exp10f(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_fast_expf(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_fast_logf(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_fast_log2f(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_fast_log10f(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_fast_powf(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fast_fdividef(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fadd_rd(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fadd_rn(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fadd_ru(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fadd_rz(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fsub_rd(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fsub_rn(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fsub_ru(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fsub_rz(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fmul_rd(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fmul_rn(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fmul_ru(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fmul_rz(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_rd(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_rn(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_ru(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_rz(float x, float y); -extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_rd(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_rn(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_ru(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_rz(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_fsigmf(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_fmaf_rd(float x, float y, float z); -extern __DPCPP_SYCL_EXTERNAL float __imf_fmaf_rn(float x, float y, float z); -extern __DPCPP_SYCL_EXTERNAL float __imf_fmaf_ru(float x, float y, float z); -extern __DPCPP_SYCL_EXTERNAL float __imf_fmaf_rz(float x, float y, float z); -extern __DPCPP_SYCL_EXTERNAL float __imf_sqrtf_rd(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_sqrtf_rn(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_sqrtf_ru(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_sqrtf_rz(float x); -extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_rd(float x); -extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_rn(float x); -extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_ru(float x); -extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_rz(float x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_float2uint_rd(float x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_float2uint_rn(float x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_float2uint_ru(float x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_float2uint_rz(float x); -extern __DPCPP_SYCL_EXTERNAL long long int __imf_float2ll_rd(float x); -extern __DPCPP_SYCL_EXTERNAL long long int __imf_float2ll_rn(float x); -extern __DPCPP_SYCL_EXTERNAL long long int __imf_float2ll_ru(float x); -extern __DPCPP_SYCL_EXTERNAL long long int __imf_float2ll_rz(float x); -extern __DPCPP_SYCL_EXTERNAL unsigned long long int __imf_float2ull_rd(float x); -extern __DPCPP_SYCL_EXTERNAL unsigned long long int __imf_float2ull_rn(float x); -extern __DPCPP_SYCL_EXTERNAL unsigned long long int __imf_float2ull_ru(float x); -extern __DPCPP_SYCL_EXTERNAL unsigned long long int __imf_float2ull_rz(float x); -extern __DPCPP_SYCL_EXTERNAL int __imf_float_as_int(float x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_float_as_uint(float x); -extern __DPCPP_SYCL_EXTERNAL float __imf_int2float_rd(int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_int2float_rn(int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_int2float_ru(int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_int2float_rz(int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_int_as_float(int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_ll2float_rd(long long int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_ll2float_rn(long long int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_ll2float_ru(long long int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_ll2float_rz(long long int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_uint2float_rd(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_uint2float_rn(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_uint2float_ru(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_uint2float_rz(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_uint_as_float(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_ull2float_rd(unsigned long long int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_ull2float_rn(unsigned long long int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_ull2float_ru(unsigned long long int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_ull2float_rz(unsigned long long int x); -extern __DPCPP_SYCL_EXTERNAL float __imf_half2float(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_float2half_rd(float x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_float2half_rn(float x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_float2half_ru(float x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_float2half_rz(float x); -extern __DPCPP_SYCL_EXTERNAL int __imf_half2int_rd(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL int __imf_half2int_rn(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL int __imf_half2int_ru(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL int __imf_half2int_rz(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL long long __imf_half2ll_rd(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL long long __imf_half2ll_rn(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL long long __imf_half2ll_ru(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL long long __imf_half2ll_rz(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL short __imf_half2short_rd(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL short __imf_half2short_rn(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL short __imf_half2short_ru(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL short __imf_half2short_rz(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_half2uint_rd(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_half2uint_rn(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_half2uint_ru(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_half2uint_rz(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL unsigned long long __imf_half2ull_rd(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL unsigned long long __imf_half2ull_rn(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL unsigned long long __imf_half2ull_ru(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL unsigned long long __imf_half2ull_rz(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL unsigned short __imf_half2ushort_rd(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL unsigned short __imf_half2ushort_rn(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL unsigned short __imf_half2ushort_ru(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL unsigned short __imf_half2ushort_rz(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL short __imf_half_as_short(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL unsigned short __imf_half_as_ushort(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_int2half_rd(int x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_int2half_rn(int x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_int2half_ru(int x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_int2half_rz(int x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_ll2half_rd(long long x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_ll2half_rn(long long x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_ll2half_ru(long long x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_ll2half_rz(long long x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_short2half_rd(short x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_short2half_rn(short x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_short2half_ru(short x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_short2half_rz(short x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_short_as_half(short x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_uint2half_rd(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_uint2half_rn(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_uint2half_ru(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_uint2half_rz(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_ull2half_rd(unsigned long long x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_ull2half_rn(unsigned long long x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_ull2half_ru(unsigned long long x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_ull2half_rz(unsigned long long x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_ushort2half_rd(unsigned short x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_ushort2half_rn(unsigned short x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_ushort2half_ru(unsigned short x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_ushort2half_rz(unsigned short x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_ushort_as_half(unsigned short x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_double2half(double x); - -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_fmaf16(_Float16 x, _Float16 y, - _Float16 z); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_fabsf16(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_floorf16(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_ceilf16(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_truncf16(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_rintf16(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_nearbyintf16(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_sqrtf16(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_rsqrtf16(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_invf16(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_fmaxf16(_Float16 x, _Float16 y); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_fminf16(_Float16 x, _Float16 y); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_fsigmf16(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL _Float16 __imf_copysignf16(_Float16 x, _Float16 y); -extern __DPCPP_SYCL_EXTERNAL float __imf_half2float(_Float16 x); -extern __DPCPP_SYCL_EXTERNAL float __imf_bfloat162float(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_bfloat162uint_rd(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_bfloat162uint_rn(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_bfloat162uint_ru(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_bfloat162uint_rz(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL unsigned short -__imf_bfloat162ushort_rd(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL unsigned short -__imf_bfloat162ushort_rn(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL unsigned short -__imf_bfloat162ushort_ru(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL unsigned short -__imf_bfloat162ushort_rz(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL unsigned long long -__imf_bfloat162ull_rd(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL unsigned long long -__imf_bfloat162ull_rn(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL unsigned long long -__imf_bfloat162ull_ru(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL unsigned long long -__imf_bfloat162ull_rz(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL int __imf_bfloat162int_rd(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL int __imf_bfloat162int_rn(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL int __imf_bfloat162int_ru(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL int __imf_bfloat162int_rz(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL short __imf_bfloat162short_rd(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL short __imf_bfloat162short_rn(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL short __imf_bfloat162short_ru(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL short __imf_bfloat162short_rz(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL long long __imf_bfloat162ll_rd(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL long long __imf_bfloat162ll_rn(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL long long __imf_bfloat162ll_ru(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL long long __imf_bfloat162ll_rz(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_float2bfloat16(float x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_float2bfloat16_rd(float x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_float2bfloat16_rn(float x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_float2bfloat16_ru(float x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_float2bfloat16_rz(float x); -extern __DPCPP_SYCL_EXTERNAL uint16_t -__imf_ushort2bfloat16_rd(unsigned short x); -extern __DPCPP_SYCL_EXTERNAL uint16_t -__imf_ushort2bfloat16_rn(unsigned short x); -extern __DPCPP_SYCL_EXTERNAL uint16_t -__imf_ushort2bfloat16_ru(unsigned short x); -extern __DPCPP_SYCL_EXTERNAL uint16_t -__imf_ushort2bfloat16_rz(unsigned short x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_uint2bfloat16_rd(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_uint2bfloat16_rn(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_uint2bfloat16_ru(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_uint2bfloat16_rz(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL uint16_t -__imf_ull2bfloat16_rd(unsigned long long x); -extern __DPCPP_SYCL_EXTERNAL uint16_t -__imf_ull2bfloat16_rn(unsigned long long x); -extern __DPCPP_SYCL_EXTERNAL uint16_t -__imf_ull2bfloat16_ru(unsigned long long x); -extern __DPCPP_SYCL_EXTERNAL uint16_t -__imf_ull2bfloat16_rz(unsigned long long x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_short2bfloat16_rd(short x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_short2bfloat16_rn(short x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_short2bfloat16_ru(short x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_short2bfloat16_rz(short x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_int2bfloat16_rd(int x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_int2bfloat16_rn(int x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_int2bfloat16_ru(int x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_int2bfloat16_rz(int x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_rd(long long x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_rn(long long x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_ru(long long x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_rz(long long x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_double2bfloat16(double x); -extern __DPCPP_SYCL_EXTERNAL short __imf_bfloat16_as_short(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL unsigned short -__imf_bfloat16_as_ushort(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_short_as_bfloat16(short x); -extern __DPCPP_SYCL_EXTERNAL uint16_t -__imf_ushort_as_bfloat16(unsigned short x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_fmabf16(uint16_t x, uint16_t y, - uint16_t z); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_fmaxbf16(uint16_t x, uint16_t y); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_fminbf16(uint16_t x, uint16_t y); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_fabsbf16(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_rintbf16(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_floorbf16(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_ceilbf16(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_truncbf16(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_copysignbf16(uint16_t x, - uint16_t y); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_fsigmbf16(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_sqrtbf16(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL uint16_t __imf_rsqrtbf16(uint16_t x); -extern __DPCPP_SYCL_EXTERNAL double __imf_fma(double x, double y, double z); -extern __DPCPP_SYCL_EXTERNAL double __imf_fma_rd(double x, double y, double z); -extern __DPCPP_SYCL_EXTERNAL double __imf_fma_rn(double x, double y, double z); -extern __DPCPP_SYCL_EXTERNAL double __imf_fma_ru(double x, double y, double z); -extern __DPCPP_SYCL_EXTERNAL double __imf_fma_rz(double x, double y, double z); -extern __DPCPP_SYCL_EXTERNAL double __imf_fabs(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_floor(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_ceil(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_trunc(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_rint(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_rcp64h(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_nearbyint(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_sqrt(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_rsqrt(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_inv(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_fmax(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_fmin(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_copysign(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_dadd_rd(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_dadd_rn(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_dadd_ru(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_dadd_rz(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_dsub_rd(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_dsub_rn(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_dsub_ru(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_dsub_rz(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_dmul_rd(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_dmul_rn(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_dmul_ru(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_dmul_rz(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_rd(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_rn(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_ru(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_rz(double x, double y); -extern __DPCPP_SYCL_EXTERNAL double __imf_drcp_rd(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_drcp_rn(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_drcp_ru(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_drcp_rz(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_sqrt_rd(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_sqrt_rn(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_sqrt_ru(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_sqrt_rz(double x); -extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_rd(double x); -extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_rn(double x); -extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_ru(double x); -extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_rz(double x); -extern __DPCPP_SYCL_EXTERNAL int __imf_double2hiint(double x); -extern __DPCPP_SYCL_EXTERNAL int __imf_double2loint(double x); -extern __DPCPP_SYCL_EXTERNAL int __imf_double2int_rd(double x); -extern __DPCPP_SYCL_EXTERNAL int __imf_double2int_rn(double x); -extern __DPCPP_SYCL_EXTERNAL int __imf_double2int_ru(double x); -extern __DPCPP_SYCL_EXTERNAL int __imf_double2int_rz(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_int2double_rn(int x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_double2uint_rd(double x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_double2uint_rn(double x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_double2uint_ru(double x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_double2uint_rz(double x); -extern __DPCPP_SYCL_EXTERNAL long long int __imf_double2ll_rd(double x); -extern __DPCPP_SYCL_EXTERNAL long long int __imf_double2ll_rn(double x); -extern __DPCPP_SYCL_EXTERNAL long long int __imf_double2ll_ru(double x); -extern __DPCPP_SYCL_EXTERNAL long long int __imf_double2ll_rz(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_ll2double_rd(long long int x); -extern __DPCPP_SYCL_EXTERNAL double __imf_ll2double_rn(long long int x); -extern __DPCPP_SYCL_EXTERNAL double __imf_ll2double_ru(long long int x); -extern __DPCPP_SYCL_EXTERNAL double __imf_ll2double_rz(long long int x); -extern __DPCPP_SYCL_EXTERNAL double -__imf_ull2double_rd(unsigned long long int x); -extern __DPCPP_SYCL_EXTERNAL double -__imf_ull2double_rn(unsigned long long int x); -extern __DPCPP_SYCL_EXTERNAL double -__imf_ull2double_ru(unsigned long long int x); -extern __DPCPP_SYCL_EXTERNAL double -__imf_ull2double_rz(unsigned long long int x); -extern __DPCPP_SYCL_EXTERNAL unsigned long long int -__imf_double2ull_rd(double x); -extern __DPCPP_SYCL_EXTERNAL unsigned long long int -__imf_double2ull_rn(double x); -extern __DPCPP_SYCL_EXTERNAL unsigned long long int -__imf_double2ull_ru(double x); -extern __DPCPP_SYCL_EXTERNAL unsigned long long int -__imf_double2ull_rz(double x); -extern __DPCPP_SYCL_EXTERNAL long long int __imf_double_as_longlong(double x); -extern __DPCPP_SYCL_EXTERNAL double __imf_longlong_as_double(long long int x); -extern __DPCPP_SYCL_EXTERNAL double __imf_uint2double_rd(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL double __imf_uint2double_rn(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL double __imf_uint2double_ru(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL double __imf_uint2double_rz(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL double __imf_hiloint2double(int hi, int lo); - -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_vabs2(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_vabs4(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_vabsss2(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_vabsss4(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_vneg2(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_vneg4(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_vnegss2(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_vnegss4(unsigned int x); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_vabsdiffs2(unsigned int x, - unsigned int y); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_vabsdiffs4(unsigned int x, - unsigned int y); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_vabsdiffu2(unsigned int x, - unsigned int y); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_vabsdiffu4(unsigned int x, - unsigned int y); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_vadd2(unsigned int x, - unsigned int y); -extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_vadd4(unsigned int x, - unsigned int y); extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_vaddss2(unsigned int x, unsigned int y); extern __DPCPP_SYCL_EXTERNAL unsigned int __imf_vaddss4(unsigned int x, diff --git a/sycl/include/sycl/ext/intel/math.hpp b/sycl/include/sycl/ext/intel/math.hpp index d652cdccb4ff3..58cab8ff5f045 100644 --- a/sycl/include/sycl/ext/intel/math.hpp +++ b/sycl/include/sycl/ext/intel/math.hpp @@ -34,38 +34,6 @@ using _iml_bf16_internal = uint16_t; #include #include -extern "C" { -float __imf_saturatef(float); -float __imf_copysignf(float, float); -double __imf_copysign(double, double); -_iml_half_internal __imf_copysignf16(_iml_half_internal, _iml_half_internal); -float __imf_ceilf(float); -double __imf_ceil(double); -_iml_half_internal __imf_ceilf16(_iml_half_internal); -float __imf_floorf(float); -double __imf_floor(double); -_iml_half_internal __imf_floorf16(_iml_half_internal); -float __imf_fsigmf(float); -_iml_half_internal __imf_fsigmf16(_iml_half_internal); -_iml_bf16_internal __imf_fsigmbf16(_iml_bf16_internal); -float __imf_rintf(float); -double __imf_rint(double); -_iml_half_internal __imf_invf16(_iml_half_internal); -float __imf_invf(float); -double __imf_inv(double); -_iml_half_internal __imf_rintf16(_iml_half_internal); -float __imf_sqrtf(float); -double __imf_sqrt(double); -_iml_half_internal __imf_sqrtf16(_iml_half_internal); -float __imf_rsqrtf(float); -double __imf_rsqrt(double); -_iml_half_internal __imf_rsqrtf16(_iml_half_internal); -float __imf_truncf(float); -double __imf_trunc(double); -_iml_half_internal __imf_truncf16(_iml_half_internal); -double __imf_rcp64h(double); -}; - namespace sycl { inline namespace _V1 { namespace ext::intel::math { @@ -73,11 +41,30 @@ namespace ext::intel::math { static_assert(sizeof(sycl::half) == sizeof(_iml_half_internal), "sycl::half is not compatible with _iml_half_internal."); +/// -------------------------------------------------------------------------- +/// saturate(x) function +/// Clamp the float input to [+0.0, 1.0]. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_saturatef(float); +}; + template std::enable_if_t, float> saturate(Tp x) { return __imf_saturatef(x); } +/// -------------------------------------------------------------------------- +/// copysign(x) function +/// Create value with given magnitude, copying sign of second input. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_copysignf(float, float); +__DPCPP_SYCL_EXTERNAL double __imf_copysign(double, double); +__DPCPP_SYCL_EXTERNAL +_iml_half_internal __imf_copysignf16(_iml_half_internal, _iml_half_internal); +}; + template std::enable_if_t, float> copysign(Tp x, Tp y) { return __imf_copysignf(x, y); @@ -96,6 +83,16 @@ std::enable_if_t, sycl::half> copysign(Tp x, return sycl::bit_cast(__imf_copysignf16(xi, yi)); } +/// -------------------------------------------------------------------------- +/// ceil(x) function +/// Return ceiling value of the input. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_ceilf(float); +__DPCPP_SYCL_EXTERNAL double __imf_ceil(double); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_ceilf16(_iml_half_internal); +}; + template std::enable_if_t, float> ceil(Tp x) { return __imf_ceilf(x); @@ -117,6 +114,16 @@ std::enable_if_t, sycl::half2> ceil(Tp x) { return sycl::half2{ceil(x.s0()), ceil(x.s1())}; } +/// -------------------------------------------------------------------------- +/// floor(x) function +/// Return the largest integral value less than or equal to input. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_floorf(float); +__DPCPP_SYCL_EXTERNAL double __imf_floor(double); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_floorf16(_iml_half_internal); +}; + template std::enable_if_t, float> floor(Tp x) { return __imf_floorf(x); @@ -138,6 +145,16 @@ std::enable_if_t, sycl::half2> floor(Tp x) { return sycl::half2{floor(x.s0()), floor(x.s1())}; } +/// -------------------------------------------------------------------------- +/// inv(x) function +/// Return 1.0 / x. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_invf(float); +__DPCPP_SYCL_EXTERNAL double __imf_inv(double); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_invf16(_iml_half_internal); +}; + template std::enable_if_t, float> inv(Tp x) { return __imf_invf(x); @@ -159,6 +176,16 @@ std::enable_if_t, sycl::half2> inv(Tp x) { return sycl::half2{inv(x.s0()), inv(x.s1())}; } +/// -------------------------------------------------------------------------- +/// rint(x) function +/// round a floating-point value to the nearest integer value. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_rintf(float); +__DPCPP_SYCL_EXTERNAL double __imf_rint(double); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_rintf16(_iml_half_internal); +}; + template std::enable_if_t, float> rint(Tp x) { return __imf_rintf(x); @@ -180,6 +207,16 @@ std::enable_if_t, sycl::half2> rint(Tp x) { return sycl::half2{rint(x.s0()), rint(x.s1())}; } +/// -------------------------------------------------------------------------- +/// sqrt(x) function +/// Return square root of input. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_sqrtf(float); +__DPCPP_SYCL_EXTERNAL double __imf_sqrt(double); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_sqrtf16(_iml_half_internal); +}; + template std::enable_if_t, float> sqrt(Tp x) { return __imf_sqrtf(x); @@ -201,6 +238,16 @@ std::enable_if_t, sycl::half2> sqrt(Tp x) { return sycl::half2{sqrt(x.s0()), sqrt(x.s1())}; } +/// -------------------------------------------------------------------------- +/// rsqrt(x) function +/// Return 1.0 / sqrt(x). +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_rsqrtf(float); +__DPCPP_SYCL_EXTERNAL double __imf_rsqrt(double); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_rsqrtf16(_iml_half_internal); +}; + template std::enable_if_t, float> rsqrt(Tp x) { return __imf_rsqrtf(x); @@ -222,6 +269,16 @@ std::enable_if_t, sycl::half2> rsqrt(Tp x) { return sycl::half2{rsqrt(x.s0()), rsqrt(x.s1())}; } +/// -------------------------------------------------------------------------- +/// trunc(x) function +/// Truncate input to the integral part. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_truncf(float); +__DPCPP_SYCL_EXTERNAL double __imf_trunc(double); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_truncf16(_iml_half_internal); +}; + template std::enable_if_t, float> trunc(Tp x) { return __imf_truncf(x); @@ -243,17 +300,26 @@ std::enable_if_t, sycl::half2> trunc(Tp x) { return sycl::half2{trunc(x.s0()), trunc(x.s1())}; } +/// -------------------------------------------------------------------------- +/// rcp64h(x) function +/// Provide high 32 bits of 1.0 / x. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL double __imf_rcp64h(double); +}; + template std::enable_if_t, double> rcp64h(Tp x) { return __imf_rcp64h(x); } + /// -------------------------------------------------------------------------- /// sigmoid(x) function /// -------------------------------------------------------------------------- extern "C" { -_iml_bf16_internal __imf_fsigmbf16(_iml_bf16_internal x); -_iml_half_internal __imf_fsigmf16(_iml_half_internal x); -float __imf_fsigmf(float x); +__DPCPP_SYCL_EXTERNAL _iml_bf16_internal __imf_fsigmbf16(_iml_bf16_internal x); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_fsigmf16(_iml_half_internal x); +__DPCPP_SYCL_EXTERNAL float __imf_fsigmf(float x); }; template diff --git a/sycl/include/sycl/ext/intel/math/imf_fp_conversions.hpp b/sycl/include/sycl/ext/intel/math/imf_fp_conversions.hpp index 50c1e35225a6a..6c59be1b7d636 100644 --- a/sycl/include/sycl/ext/intel/math/imf_fp_conversions.hpp +++ b/sycl/include/sycl/ext/intel/math/imf_fp_conversions.hpp @@ -15,201 +15,33 @@ #include #include -extern "C" { -int __imf_float2int_rd(float); -int __imf_float2int_rn(float); -int __imf_float2int_ru(float); -int __imf_float2int_rz(float); -unsigned int __imf_float2uint_rd(float); -unsigned int __imf_float2uint_rn(float); -unsigned int __imf_float2uint_ru(float); -unsigned int __imf_float2uint_rz(float); -long long int __imf_float2ll_rd(float); -long long int __imf_float2ll_rn(float); -long long int __imf_float2ll_ru(float); -long long int __imf_float2ll_rz(float); -unsigned long long int __imf_float2ull_rd(float); -unsigned long long int __imf_float2ull_rn(float); -unsigned long long int __imf_float2ull_ru(float); -unsigned long long int __imf_float2ull_rz(float); -int __imf_float_as_int(float); -unsigned int __imf_float_as_uint(float); -float __imf_int2float_rd(int); -float __imf_int2float_rn(int); -float __imf_int2float_ru(int); -float __imf_int2float_rz(int); -float __imf_int_as_float(int); -float __imf_ll2float_rd(long long int); -float __imf_ll2float_rn(long long int); -float __imf_ll2float_ru(long long int); -float __imf_ll2float_rz(long long int); -float __imf_uint2float_rd(unsigned int); -float __imf_uint2float_rn(unsigned int); -float __imf_uint2float_ru(unsigned int); -float __imf_uint2float_rz(unsigned int); -float __imf_uint_as_float(unsigned int); -float __imf_ull2float_rd(unsigned long long int); -float __imf_ull2float_rn(unsigned long long int); -float __imf_ull2float_ru(unsigned long long int); -float __imf_ull2float_rz(unsigned long long int); -float __imf_double2float_rd(double); -float __imf_double2float_rn(double); -float __imf_double2float_ru(double); -float __imf_double2float_rz(double); -int __imf_double2hiint(double); -int __imf_double2loint(double); -int __imf_double2int_rd(double); -int __imf_double2int_rn(double); -int __imf_double2int_ru(double); -int __imf_double2int_rz(double); -long long __imf_double2ll_rd(double); -long long __imf_double2ll_rn(double); -long long __imf_double2ll_ru(double); -long long __imf_double2ll_rz(double); -unsigned int __imf_double2uint_rd(double); -unsigned int __imf_double2uint_rn(double); -unsigned int __imf_double2uint_ru(double); -unsigned int __imf_double2uint_rz(double); -unsigned long long __imf_double2ull_rd(double); -unsigned long long __imf_double2ull_rn(double); -unsigned long long __imf_double2ull_ru(double); -unsigned long long __imf_double2ull_rz(double); -long long __imf_double_as_longlong(double); -double __imf_hiloint2double(int, int); -double __imf_int2double_rn(int); -double __imf_ll2double_rd(long long); -double __imf_ll2double_rn(long long); -double __imf_ll2double_ru(long long); -double __imf_ll2double_rz(long long); -double __imf_longlong_as_double(long long); -double __imf_uint2double_rn(unsigned); -double __imf_ull2double_rd(unsigned long long); -double __imf_ull2double_rn(unsigned long long); -double __imf_ull2double_ru(unsigned long long); -double __imf_ull2double_rz(unsigned long long); -float __imf_half2float(_iml_half_internal); -_iml_half_internal __imf_float2half_rd(float); -_iml_half_internal __imf_float2half_rn(float); -_iml_half_internal __imf_float2half_ru(float); -_iml_half_internal __imf_float2half_rz(float); -int __imf_half2int_rd(_iml_half_internal); -int __imf_half2int_rn(_iml_half_internal); -int __imf_half2int_ru(_iml_half_internal); -int __imf_half2int_rz(_iml_half_internal); -long long __imf_half2ll_rd(_iml_half_internal); -long long __imf_half2ll_rn(_iml_half_internal); -long long __imf_half2ll_ru(_iml_half_internal); -long long __imf_half2ll_rz(_iml_half_internal); -short __imf_half2short_rd(_iml_half_internal); -short __imf_half2short_rn(_iml_half_internal); -short __imf_half2short_ru(_iml_half_internal); -short __imf_half2short_rz(_iml_half_internal); -unsigned int __imf_half2uint_rd(_iml_half_internal); -unsigned int __imf_half2uint_rn(_iml_half_internal); -unsigned int __imf_half2uint_ru(_iml_half_internal); -unsigned int __imf_half2uint_rz(_iml_half_internal); -unsigned long long __imf_half2ull_rd(_iml_half_internal); -unsigned long long __imf_half2ull_rn(_iml_half_internal); -unsigned long long __imf_half2ull_ru(_iml_half_internal); -unsigned long long __imf_half2ull_rz(_iml_half_internal); -unsigned short __imf_half2ushort_rd(_iml_half_internal); -unsigned short __imf_half2ushort_rn(_iml_half_internal); -unsigned short __imf_half2ushort_ru(_iml_half_internal); -unsigned short __imf_half2ushort_rz(_iml_half_internal); -short __imf_half_as_short(_iml_half_internal); -unsigned short __imf_half_as_ushort(_iml_half_internal); -_iml_half_internal __imf_int2half_rd(int); -_iml_half_internal __imf_int2half_rn(int); -_iml_half_internal __imf_int2half_ru(int); -_iml_half_internal __imf_int2half_rz(int); -_iml_half_internal __imf_ll2half_rd(long long); -_iml_half_internal __imf_ll2half_rn(long long); -_iml_half_internal __imf_ll2half_ru(long long); -_iml_half_internal __imf_ll2half_rz(long long); -_iml_half_internal __imf_short2half_rd(short); -_iml_half_internal __imf_short2half_rn(short); -_iml_half_internal __imf_short2half_ru(short); -_iml_half_internal __imf_short2half_rz(short); -_iml_half_internal __imf_short_as_half(short); -_iml_half_internal __imf_uint2half_rd(unsigned int); -_iml_half_internal __imf_uint2half_rn(unsigned int); -_iml_half_internal __imf_uint2half_ru(unsigned int); -_iml_half_internal __imf_uint2half_rz(unsigned int); -_iml_half_internal __imf_ull2half_rd(unsigned long long); -_iml_half_internal __imf_ull2half_rn(unsigned long long); -_iml_half_internal __imf_ull2half_ru(unsigned long long); -_iml_half_internal __imf_ull2half_rz(unsigned long long); -_iml_half_internal __imf_ushort2half_rd(unsigned short); -_iml_half_internal __imf_ushort2half_rn(unsigned short); -_iml_half_internal __imf_ushort2half_ru(unsigned short); -_iml_half_internal __imf_ushort2half_rz(unsigned short); -_iml_half_internal __imf_ushort_as_half(unsigned short); -_iml_half_internal __imf_double2half(double); -unsigned short __imf_bfloat162ushort_rd(uint16_t); -unsigned short __imf_bfloat162ushort_rn(uint16_t); -unsigned short __imf_bfloat162ushort_ru(uint16_t); -unsigned short __imf_bfloat162ushort_rz(uint16_t); -short __imf_bfloat162short_rd(uint16_t); -short __imf_bfloat162short_rn(uint16_t); -short __imf_bfloat162short_ru(uint16_t); -short __imf_bfloat162short_rz(uint16_t); -unsigned int __imf_bfloat162uint_rd(uint16_t); -unsigned int __imf_bfloat162uint_rn(uint16_t); -unsigned int __imf_bfloat162uint_ru(uint16_t); -unsigned int __imf_bfloat162uint_rz(uint16_t); -int __imf_bfloat162int_rd(uint16_t); -int __imf_bfloat162int_rn(uint16_t); -int __imf_bfloat162int_ru(uint16_t); -int __imf_bfloat162int_rz(uint16_t); -unsigned long long __imf_bfloat162ull_rd(uint16_t); -unsigned long long __imf_bfloat162ull_rn(uint16_t); -unsigned long long __imf_bfloat162ull_ru(uint16_t); -unsigned long long __imf_bfloat162ull_rz(uint16_t); -long long __imf_bfloat162ll_rd(uint16_t); -long long __imf_bfloat162ll_rn(uint16_t); -long long __imf_bfloat162ll_ru(uint16_t); -long long __imf_bfloat162ll_rz(uint16_t); -float __imf_bfloat162float(uint16_t); -uint16_t __imf_float2bfloat16(float); -uint16_t __imf_float2bfloat16_rd(float); -uint16_t __imf_float2bfloat16_rn(float); -uint16_t __imf_float2bfloat16_ru(float); -uint16_t __imf_float2bfloat16_rz(float); -uint16_t __imf_ushort2bfloat16_rd(unsigned short); -uint16_t __imf_ushort2bfloat16_rn(unsigned short); -uint16_t __imf_ushort2bfloat16_ru(unsigned short); -uint16_t __imf_ushort2bfloat16_rz(unsigned short); -uint16_t __imf_uint2bfloat16_rd(unsigned int); -uint16_t __imf_uint2bfloat16_rn(unsigned int); -uint16_t __imf_uint2bfloat16_ru(unsigned int); -uint16_t __imf_uint2bfloat16_rz(unsigned int); -uint16_t __imf_ull2bfloat16_rd(unsigned long long); -uint16_t __imf_ull2bfloat16_rn(unsigned long long); -uint16_t __imf_ull2bfloat16_ru(unsigned long long); -uint16_t __imf_ull2bfloat16_rz(unsigned long long); -uint16_t __imf_short2bfloat16_rd(short); -uint16_t __imf_short2bfloat16_rn(short); -uint16_t __imf_short2bfloat16_ru(short); -uint16_t __imf_short2bfloat16_rz(short); -uint16_t __imf_int2bfloat16_rd(int); -uint16_t __imf_int2bfloat16_rn(int); -uint16_t __imf_int2bfloat16_ru(int); -uint16_t __imf_int2bfloat16_rz(int); -uint16_t __imf_ll2bfloat16_rd(long long); -uint16_t __imf_ll2bfloat16_rn(long long); -uint16_t __imf_ll2bfloat16_ru(long long); -uint16_t __imf_ll2bfloat16_rz(long long); -uint16_t __imf_double2bfloat16(double); -short __imf_bfloat16_as_short(uint16_t); -unsigned short __imf_bfloat16_as_ushort(uint16_t); -uint16_t __imf_short_as_bfloat16(short); -uint16_t __imf_ushort_as_bfloat16(unsigned short); -}; - namespace sycl { inline namespace _V1 { namespace ext::intel::math { +/// -------------------------------------------------------------------------- +/// float to integral conversions +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL int __imf_float2int_rd(float); +__DPCPP_SYCL_EXTERNAL int __imf_float2int_rn(float); +__DPCPP_SYCL_EXTERNAL int __imf_float2int_ru(float); +__DPCPP_SYCL_EXTERNAL int __imf_float2int_rz(float); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_float2uint_rd(float); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_float2uint_rn(float); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_float2uint_ru(float); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_float2uint_rz(float); +__DPCPP_SYCL_EXTERNAL long long int __imf_float2ll_rd(float); +__DPCPP_SYCL_EXTERNAL long long int __imf_float2ll_rn(float); +__DPCPP_SYCL_EXTERNAL long long int __imf_float2ll_ru(float); +__DPCPP_SYCL_EXTERNAL long long int __imf_float2ll_rz(float); +__DPCPP_SYCL_EXTERNAL unsigned long long int __imf_float2ull_rd(float); +__DPCPP_SYCL_EXTERNAL unsigned long long int __imf_float2ull_rn(float); +__DPCPP_SYCL_EXTERNAL unsigned long long int __imf_float2ull_ru(float); +__DPCPP_SYCL_EXTERNAL unsigned long long int __imf_float2ull_rz(float); +}; + template To float2int_rd(From x) { return __imf_float2int_rd(x); } @@ -286,6 +118,29 @@ To float2ull_rz(From x) { return __imf_float2ull_rz(x); } +/// -------------------------------------------------------------------------- +/// integral to float conversions +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_int2float_rd(int); +__DPCPP_SYCL_EXTERNAL float __imf_int2float_rn(int); +__DPCPP_SYCL_EXTERNAL float __imf_int2float_ru(int); +__DPCPP_SYCL_EXTERNAL float __imf_int2float_rz(int); +__DPCPP_SYCL_EXTERNAL float __imf_ll2float_rd(long long int); +__DPCPP_SYCL_EXTERNAL float __imf_ll2float_rn(long long int); +__DPCPP_SYCL_EXTERNAL float __imf_ll2float_ru(long long int); +__DPCPP_SYCL_EXTERNAL float __imf_ll2float_rz(long long int); +__DPCPP_SYCL_EXTERNAL float __imf_uint2float_rd(unsigned int); +__DPCPP_SYCL_EXTERNAL float __imf_uint2float_rn(unsigned int); +__DPCPP_SYCL_EXTERNAL float __imf_uint2float_ru(unsigned int); +__DPCPP_SYCL_EXTERNAL float __imf_uint2float_rz(unsigned int); +__DPCPP_SYCL_EXTERNAL float __imf_ull2float_rd(unsigned long long int); +__DPCPP_SYCL_EXTERNAL float __imf_ull2float_rn(unsigned long long int); +__DPCPP_SYCL_EXTERNAL float __imf_ull2float_ru(unsigned long long int); +__DPCPP_SYCL_EXTERNAL float __imf_ull2float_rz(unsigned long long int); +}; + template To ll2float_rd(From x) { return __imf_ll2float_rd(x); @@ -362,6 +217,16 @@ To uint2float_rz(From x) { return __imf_uint2float_rz(x); } +/// -------------------------------------------------------------------------- +/// Reinterpret bits in float type as int/unsigned int type and vice versa. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL int __imf_float_as_int(float); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_float_as_uint(float); +__DPCPP_SYCL_EXTERNAL float __imf_int_as_float(int); +__DPCPP_SYCL_EXTERNAL float __imf_uint_as_float(unsigned int); +}; + template To float_as_int(From x) { return __imf_float_as_int(x); } @@ -380,6 +245,17 @@ To uint_as_float(From x) { return __imf_uint_as_float(x); } +/// -------------------------------------------------------------------------- +/// double to float conversions +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_double2float_rd(double); +__DPCPP_SYCL_EXTERNAL float __imf_double2float_rn(double); +__DPCPP_SYCL_EXTERNAL float __imf_double2float_ru(double); +__DPCPP_SYCL_EXTERNAL float __imf_double2float_rz(double); +}; + template To double2float_rd(From x) { return __imf_double2float_rd(x); @@ -400,6 +276,14 @@ To double2float_rz(From x) { return __imf_double2float_rz(x); } +/// -------------------------------------------------------------------------- +/// Reinterpret high/low 32 bits in a double as a signed integer. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL int __imf_double2hiint(double); +__DPCPP_SYCL_EXTERNAL int __imf_double2loint(double); +}; + template To double2hiint(From x) { return __imf_double2hiint(x); } @@ -408,6 +292,29 @@ template To double2loint(From x) { return __imf_double2loint(x); } +/// -------------------------------------------------------------------------- +/// double to integral conversions +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL int __imf_double2int_rd(double); +__DPCPP_SYCL_EXTERNAL int __imf_double2int_rn(double); +__DPCPP_SYCL_EXTERNAL int __imf_double2int_ru(double); +__DPCPP_SYCL_EXTERNAL int __imf_double2int_rz(double); +__DPCPP_SYCL_EXTERNAL long long __imf_double2ll_rd(double); +__DPCPP_SYCL_EXTERNAL long long __imf_double2ll_rn(double); +__DPCPP_SYCL_EXTERNAL long long __imf_double2ll_ru(double); +__DPCPP_SYCL_EXTERNAL long long __imf_double2ll_rz(double); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_double2uint_rd(double); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_double2uint_rn(double); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_double2uint_ru(double); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_double2uint_rz(double); +__DPCPP_SYCL_EXTERNAL unsigned long long __imf_double2ull_rd(double); +__DPCPP_SYCL_EXTERNAL unsigned long long __imf_double2ull_rn(double); +__DPCPP_SYCL_EXTERNAL unsigned long long __imf_double2ull_ru(double); +__DPCPP_SYCL_EXTERNAL unsigned long long __imf_double2ull_rz(double); +}; + template To double2int_rd(From x) { return __imf_double2int_rd(x); } @@ -484,6 +391,14 @@ To double2ull_rz(From x) { return __imf_double2ull_rz(x); } +/// -------------------------------------------------------------------------- +/// Reinterpret bits in double as long long and vice versa. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL long long __imf_double_as_longlong(double); +__DPCPP_SYCL_EXTERNAL double __imf_longlong_as_double(long long); +}; + template To double_as_longlong(From x) { return __imf_double_as_longlong(x); @@ -494,11 +409,26 @@ To longlong_as_double(From x) { return __imf_longlong_as_double(x); } +/// -------------------------------------------------------------------------- +/// Reinterpret 2 32-bit integers as high and low 32-bits in a double value. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL double __imf_hiloint2double(int, int); +}; + template To hiloint2double(From x, From y) { return __imf_hiloint2double(x, y); } +/// -------------------------------------------------------------------------- +/// signed and unsigned int type to double conversions in rounding towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL double __imf_int2double_rn(int); +__DPCPP_SYCL_EXTERNAL double __imf_uint2double_rn(unsigned); +}; + template To int2double_rn(From x) { return __imf_int2double_rn(x); } @@ -508,6 +438,21 @@ To uint2double_rn(From x) { return __imf_uint2double_rn(x); } +/// -------------------------------------------------------------------------- +/// signed and unsigned long long type to double conversions +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL double __imf_ll2double_rd(long long); +__DPCPP_SYCL_EXTERNAL double __imf_ll2double_rn(long long); +__DPCPP_SYCL_EXTERNAL double __imf_ll2double_ru(long long); +__DPCPP_SYCL_EXTERNAL double __imf_ll2double_rz(long long); +__DPCPP_SYCL_EXTERNAL double __imf_ull2double_rd(unsigned long long); +__DPCPP_SYCL_EXTERNAL double __imf_ull2double_rn(unsigned long long); +__DPCPP_SYCL_EXTERNAL double __imf_ull2double_ru(unsigned long long); +__DPCPP_SYCL_EXTERNAL double __imf_ull2double_rz(unsigned long long); +}; + template To ll2double_rd(From x) { return __imf_ll2double_rd(x); @@ -548,11 +493,29 @@ To ull2double_rz(From x) { return __imf_ull2double_rz(x); } +/// -------------------------------------------------------------------------- +/// half to float conversions +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_half2float(_iml_half_internal); +}; + template To half2float(From x) { return __imf_half2float(sycl::bit_cast<_iml_half_internal>(x)); } +/// -------------------------------------------------------------------------- +/// float to half conversions +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_float2half_rd(float); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_float2half_rn(float); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_float2half_ru(float); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_float2half_rz(float); +}; + template To float2half_rn(From x) { return sycl::bit_cast(__imf_float2half_rn(x)); @@ -573,11 +536,49 @@ To float2half_rz(From x) { return sycl::bit_cast(__imf_float2half_rz(x)); } +/// -------------------------------------------------------------------------- +/// double to half conversions in rounding to-nearest-even. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_double2half(double); +}; + template To double2half(From x) { return sycl::bit_cast(__imf_double2half(x)); } +/// -------------------------------------------------------------------------- +/// half to integral type conversions +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL int __imf_half2int_rd(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL int __imf_half2int_rn(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL int __imf_half2int_ru(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL int __imf_half2int_rz(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL long long __imf_half2ll_rd(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL long long __imf_half2ll_rn(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL long long __imf_half2ll_ru(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL long long __imf_half2ll_rz(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL short __imf_half2short_rd(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL short __imf_half2short_rn(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL short __imf_half2short_ru(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL short __imf_half2short_rz(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_half2uint_rd(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_half2uint_rn(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_half2uint_ru(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_half2uint_rz(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL unsigned long long __imf_half2ull_rd(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL unsigned long long __imf_half2ull_rn(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL unsigned long long __imf_half2ull_ru(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL unsigned long long __imf_half2ull_rz(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL unsigned short __imf_half2ushort_rd(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL unsigned short __imf_half2ushort_rn(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL unsigned short __imf_half2ushort_ru(_iml_half_internal); +__DPCPP_SYCL_EXTERNAL unsigned short __imf_half2ushort_rz(_iml_half_internal); +}; + template To half2int_rn(From x) { return __imf_half2int_rn(sycl::bit_cast<_iml_half_internal>(x)); @@ -698,6 +699,37 @@ To half2ull_rz(From x) { return __imf_half2ull_rz(sycl::bit_cast<_iml_half_internal>(x)); } +/// -------------------------------------------------------------------------- +/// integral type to half conversions +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_int2half_rd(int); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_int2half_rn(int); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_int2half_ru(int); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_int2half_rz(int); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_ll2half_rd(long long); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_ll2half_rn(long long); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_ll2half_ru(long long); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_ll2half_rz(long long); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_short2half_rd(short); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_short2half_rn(short); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_short2half_ru(short); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_short2half_rz(short); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_uint2half_rd(unsigned int); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_uint2half_rn(unsigned int); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_uint2half_ru(unsigned int); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_uint2half_rz(unsigned int); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_ull2half_rd(unsigned long long); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_ull2half_rn(unsigned long long); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_ull2half_ru(unsigned long long); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_ull2half_rz(unsigned long long); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_ushort2half_rd(unsigned short); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_ushort2half_rn(unsigned short); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_ushort2half_ru(unsigned short); +__DPCPP_SYCL_EXTERNAL _iml_half_internal __imf_ushort2half_rz(unsigned short); +}; + template To int2half_rn(From x) { return sycl::bit_cast(__imf_int2half_rn(x)); @@ -818,6 +850,16 @@ To ull2half_rz(From x) { return sycl::bit_cast(__imf_ull2half_rz(x)); } +/// -------------------------------------------------------------------------- +/// Reinterpret bits in bfloat16 type as short/unsigned short and vice versa. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL short __imf_bfloat16_as_short(uint16_t); +__DPCPP_SYCL_EXTERNAL unsigned short __imf_bfloat16_as_ushort(uint16_t); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_short_as_bfloat16(short); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_ushort_as_bfloat16(unsigned short); +}; + template To bfloat16_as_ushort(From x) { @@ -842,11 +884,29 @@ To short_as_bfloat16(From x) { __imf_short_as_bfloat16(x)); } +/// -------------------------------------------------------------------------- +/// bfloat16 to float conversions +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_bfloat162float(uint16_t); +}; + template To bfloat162float(From x) { return __imf_bfloat162float(sycl::bit_cast(x)); } +/// -------------------------------------------------------------------------- +/// float to bfloat16 conversions +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL uint16_t __imf_float2bfloat16_rd(float); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_float2bfloat16_rn(float); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_float2bfloat16_ru(float); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_float2bfloat16_rz(float); +}; + template To float2bfloat16(From x) { return sycl::bit_cast( @@ -877,6 +937,37 @@ To float2bfloat16_rz(From x) { __imf_float2bfloat16_rz(x)); } +/// -------------------------------------------------------------------------- +/// integral to bfloat16 conversions +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL uint16_t __imf_ushort2bfloat16_rd(unsigned short); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_ushort2bfloat16_rn(unsigned short); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_ushort2bfloat16_ru(unsigned short); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_ushort2bfloat16_rz(unsigned short); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_uint2bfloat16_rd(unsigned int); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_uint2bfloat16_rn(unsigned int); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_uint2bfloat16_ru(unsigned int); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_uint2bfloat16_rz(unsigned int); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_ull2bfloat16_rd(unsigned long long); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_ull2bfloat16_rn(unsigned long long); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_ull2bfloat16_ru(unsigned long long); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_ull2bfloat16_rz(unsigned long long); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_short2bfloat16_rd(short); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_short2bfloat16_rn(short); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_short2bfloat16_ru(short); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_short2bfloat16_rz(short); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_int2bfloat16_rd(int); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_int2bfloat16_rn(int); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_int2bfloat16_ru(int); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_int2bfloat16_rz(int); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_rd(long long); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_rn(long long); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_ru(long long); +__DPCPP_SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_rz(long long); +}; + template To ushort2bfloat16_rd(From x) { @@ -1017,11 +1108,49 @@ To ll2bfloat16_rz(From x) { return sycl::bit_cast(__imf_ll2bfloat16_rz(x)); } +/// -------------------------------------------------------------------------- +/// double to bfloat16 conversions in rounding to-nearest-even. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL uint16_t __imf_double2bfloat16(double); +}; + template To double2bfloat16(From x) { return sycl::bit_cast(__imf_double2bfloat16(x)); } +/// -------------------------------------------------------------------------- +/// bfloat16 to integral conversions +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL unsigned short __imf_bfloat162ushort_rd(uint16_t); +__DPCPP_SYCL_EXTERNAL unsigned short __imf_bfloat162ushort_rn(uint16_t); +__DPCPP_SYCL_EXTERNAL unsigned short __imf_bfloat162ushort_ru(uint16_t); +__DPCPP_SYCL_EXTERNAL unsigned short __imf_bfloat162ushort_rz(uint16_t); +__DPCPP_SYCL_EXTERNAL short __imf_bfloat162short_rd(uint16_t); +__DPCPP_SYCL_EXTERNAL short __imf_bfloat162short_rn(uint16_t); +__DPCPP_SYCL_EXTERNAL short __imf_bfloat162short_ru(uint16_t); +__DPCPP_SYCL_EXTERNAL short __imf_bfloat162short_rz(uint16_t); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_bfloat162uint_rd(uint16_t); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_bfloat162uint_rn(uint16_t); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_bfloat162uint_ru(uint16_t); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_bfloat162uint_rz(uint16_t); +__DPCPP_SYCL_EXTERNAL int __imf_bfloat162int_rd(uint16_t); +__DPCPP_SYCL_EXTERNAL int __imf_bfloat162int_rn(uint16_t); +__DPCPP_SYCL_EXTERNAL int __imf_bfloat162int_ru(uint16_t); +__DPCPP_SYCL_EXTERNAL int __imf_bfloat162int_rz(uint16_t); +__DPCPP_SYCL_EXTERNAL unsigned long long __imf_bfloat162ull_rd(uint16_t); +__DPCPP_SYCL_EXTERNAL unsigned long long __imf_bfloat162ull_rn(uint16_t); +__DPCPP_SYCL_EXTERNAL unsigned long long __imf_bfloat162ull_ru(uint16_t); +__DPCPP_SYCL_EXTERNAL unsigned long long __imf_bfloat162ull_rz(uint16_t); +__DPCPP_SYCL_EXTERNAL long long __imf_bfloat162ll_rd(uint16_t); +__DPCPP_SYCL_EXTERNAL long long __imf_bfloat162ll_rn(uint16_t); +__DPCPP_SYCL_EXTERNAL long long __imf_bfloat162ll_ru(uint16_t); +__DPCPP_SYCL_EXTERNAL long long __imf_bfloat162ll_rz(uint16_t); +}; + template To bfloat162uint_rd(From x) { diff --git a/sycl/include/sycl/ext/intel/math/imf_integer_utils.hpp b/sycl/include/sycl/ext/intel/math/imf_integer_utils.hpp index 0a72281e1ea54..7d88cc5ca975e 100644 --- a/sycl/include/sycl/ext/intel/math/imf_integer_utils.hpp +++ b/sycl/include/sycl/ext/intel/math/imf_integer_utils.hpp @@ -10,52 +10,51 @@ #pragma once -extern "C" { -unsigned __imf_brev(unsigned); -unsigned long long __imf_brevll(unsigned long long); -unsigned __imf_byte_perm(unsigned, unsigned, unsigned); -long long __imf_llmax(long long x, long long y); -long long __imf_llmin(long long x, long long y); -int __imf_max(int x, int y); -int __imf_min(int x, int y); -unsigned long long __imf_ullmax(unsigned long long x, unsigned long long y); -unsigned long long __imf_ullmin(unsigned long long x, unsigned long long y); -unsigned __imf_umax(unsigned x, unsigned y); -unsigned __imf_umin(unsigned x, unsigned y); -int __imf_clz(int); -int __imf_clzll(long long); -int __imf_ffs(int); -int __imf_ffsll(long long); -int __imf_mul24(int, int); -int __imf_mulhi(int, int); -long long __imf_mul64hi(long long, long long); -int __imf_popc(unsigned); -int __imf_popcll(unsigned long long); -int __imf_rhadd(int, int); -int __imf_hadd(int, int); -unsigned __imf_sad(int, int, unsigned); -unsigned __imf_uhadd(unsigned, unsigned); -unsigned __imf_umul24(unsigned, unsigned); -unsigned __imf_umulhi(unsigned, unsigned); -unsigned long long __imf_umul64hi(unsigned long long, unsigned long long); -unsigned __imf_urhadd(unsigned, unsigned); -unsigned __imf_usad(unsigned, unsigned, unsigned); -} - namespace sycl { inline namespace _V1 { namespace ext::intel::math { +/// -------------------------------------------------------------------------- +/// Reverse the bit order of unsigned integral type. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL unsigned __imf_brev(unsigned); +__DPCPP_SYCL_EXTERNAL unsigned long long __imf_brevll(unsigned long long); +}; + template unsigned brev(Tp x) { return __imf_brev(x); } template unsigned long long brevll(Tp x) { return __imf_brevll(x); } +/// -------------------------------------------------------------------------- +/// Return selected bytes from two 32-bit unsigned integers. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL unsigned __imf_byte_perm(unsigned, unsigned, unsigned); +}; + template unsigned byte_perm(Tp x, Tp y, Tp z) { return __imf_byte_perm(x, y, z); } +/// -------------------------------------------------------------------------- +/// Return maximum/minimum of the integral type input values. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL long long __imf_llmax(long long x, long long y); +__DPCPP_SYCL_EXTERNAL long long __imf_llmin(long long x, long long y); +__DPCPP_SYCL_EXTERNAL int __imf_max(int x, int y); +__DPCPP_SYCL_EXTERNAL int __imf_min(int x, int y); +__DPCPP_SYCL_EXTERNAL unsigned long long __imf_ullmax(unsigned long long x, + unsigned long long y); +__DPCPP_SYCL_EXTERNAL unsigned long long __imf_ullmin(unsigned long long x, + unsigned long long y); +__DPCPP_SYCL_EXTERNAL unsigned __imf_umax(unsigned x, unsigned y); +__DPCPP_SYCL_EXTERNAL unsigned __imf_umin(unsigned x, unsigned y); +}; + template int max(Tp x, Tp y) { return __imf_max(x, y); } template int min(Tp x, Tp y) { return __imf_min(x, y); } @@ -86,14 +85,45 @@ unsigned long long ullmin(Tp x, Tp y) { return __imf_ullmin(x, y); } +/// -------------------------------------------------------------------------- +/// Return the number of consecutive leading 0 bits in 32/64-bit integer +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL int __imf_clz(int); +__DPCPP_SYCL_EXTERNAL int __imf_clzll(long long); +}; + template int clz(Tp x) { return __imf_clz(x); } template int clzll(Tp x) { return __imf_clzll(x); } +/// -------------------------------------------------------------------------- +/// Find the position of the LSB set to 1 in a 32/64-bit integer +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL int __imf_ffs(int); +__DPCPP_SYCL_EXTERNAL int __imf_ffsll(long long); +}; + template int ffs(Tp x) { return __imf_ffs(x); } template int ffsll(Tp x) { return __imf_ffsll(x); } +/// -------------------------------------------------------------------------- +/// hadd(x), uhadd(x) +/// Return average of signed/unsigned int type, avoiding overflow in +/// intermediate sum. +/// rhadd(x), urhadd(x) +/// Return rounded average of signed/unsigned int type, avoiding overflow in +/// intermediate sum. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL int __imf_rhadd(int, int); +__DPCPP_SYCL_EXTERNAL int __imf_hadd(int, int); +__DPCPP_SYCL_EXTERNAL unsigned __imf_uhadd(unsigned, unsigned); +__DPCPP_SYCL_EXTERNAL unsigned __imf_urhadd(unsigned, unsigned); +}; + template int hadd(Tp x, Tp y) { return __imf_hadd(x, y); } template int rhadd(Tp x, Tp y) { return __imf_rhadd(x, y); } @@ -102,6 +132,31 @@ template unsigned urhadd(Tp x, Tp y) { return __imf_urhadd(x, y); } +template unsigned uhadd(Tp x, Tp y) { + return __imf_uhadd(x, y); +} + +/// -------------------------------------------------------------------------- +/// mul24(x), umul24(x) +/// Return the least significant 32 bits of the product of the least +/// significant 24 bits of two signed/unsigned integers. +/// mulhi(x), umulhi(x) +/// Return the most significant 32 bits of the product of the two 32-bit +/// signed/unsigned integers. +/// mul64hi(x), umul64hi(x) +/// Return the most significant 64 bits of the product of the two 64-bit +/// signed/unsigned integers. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL int __imf_mul24(int, int); +__DPCPP_SYCL_EXTERNAL int __imf_mulhi(int, int); +__DPCPP_SYCL_EXTERNAL long long __imf_mul64hi(long long, long long); +__DPCPP_SYCL_EXTERNAL unsigned __imf_umul24(unsigned, unsigned); +__DPCPP_SYCL_EXTERNAL unsigned __imf_umulhi(unsigned, unsigned); +__DPCPP_SYCL_EXTERNAL unsigned long long __imf_umul64hi(unsigned long long, + unsigned long long); +}; + template int mul24(Tp x, Tp y) { return __imf_mul24(x, y); } template unsigned umul24(Tp x, Tp y) { @@ -123,12 +178,28 @@ unsigned long long umul64hi(Tp x, Tp y) { return __imf_umul64hi(x, y); } +/// -------------------------------------------------------------------------- +/// Count the number of bits that are set to 1 in a 32-bit integer. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL int __imf_popc(unsigned); +__DPCPP_SYCL_EXTERNAL int __imf_popcll(unsigned long long); +}; + template int popc(Tp x) { return __imf_popc(x); } template int popcll(Tp x) { return __imf_popcll(x); } +/// -------------------------------------------------------------------------- +/// Return |x - y| + z for unsigned/signed integers +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL unsigned __imf_sad(int, int, unsigned); +__DPCPP_SYCL_EXTERNAL unsigned __imf_usad(unsigned, unsigned, unsigned); +}; + template unsigned sad(Tp1 x, Tp1 y, Tp2 z) { return __imf_sad(x, y, z); @@ -137,11 +208,6 @@ unsigned sad(Tp1 x, Tp1 y, Tp2 z) { template unsigned usad(Tp x, Tp y, Tp z) { return __imf_usad(x, y, z); } - -template unsigned uhadd(Tp x, Tp y) { - return __imf_uhadd(x, y); -} - } // namespace ext::intel::math } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/ext/intel/math/imf_rounding_math.hpp b/sycl/include/sycl/ext/intel/math/imf_rounding_math.hpp index d895d587fc987..166ec0285f8bd 100644 --- a/sycl/include/sycl/ext/intel/math/imf_rounding_math.hpp +++ b/sycl/include/sycl/ext/intel/math/imf_rounding_math.hpp @@ -10,70 +10,21 @@ #pragma once -extern "C" { -float __imf_fadd_rz(float, float); -float __imf_fadd_rn(float, float); -float __imf_fadd_ru(float, float); -float __imf_fadd_rd(float, float); -float __imf_fsub_rz(float, float); -float __imf_fsub_rn(float, float); -float __imf_fsub_ru(float, float); -float __imf_fsub_rd(float, float); -float __imf_fmul_rz(float, float); -float __imf_fmul_rn(float, float); -float __imf_fmul_ru(float, float); -float __imf_fmul_rd(float, float); -float __imf_fdiv_rz(float, float); -float __imf_fdiv_rn(float, float); -float __imf_fdiv_ru(float, float); -float __imf_fdiv_rd(float, float); -float __imf_frcp_rz(float); -float __imf_frcp_rn(float); -float __imf_frcp_ru(float); -float __imf_frcp_rd(float); -float __imf_fmaf_rz(float, float, float); -float __imf_fmaf_rn(float, float, float); -float __imf_fmaf_ru(float, float, float); -float __imf_fmaf_rd(float, float, float); -float __imf_sqrtf_rz(float); -float __imf_sqrtf_rn(float); -float __imf_sqrtf_ru(float); -float __imf_sqrtf_rd(float); - -double __imf_dadd_rz(double, double); -double __imf_dadd_rn(double, double); -double __imf_dadd_ru(double, double); -double __imf_dadd_rd(double, double); -double __imf_dsub_rz(double, double); -double __imf_dsub_rn(double, double); -double __imf_dsub_ru(double, double); -double __imf_dsub_rd(double, double); -double __imf_dmul_rz(double, double); -double __imf_dmul_rn(double, double); -double __imf_dmul_ru(double, double); -double __imf_dmul_rd(double, double); -double __imf_ddiv_rz(double, double); -double __imf_ddiv_rn(double, double); -double __imf_ddiv_ru(double, double); -double __imf_ddiv_rd(double, double); -double __imf_drcp_rz(double); -double __imf_drcp_rn(double); -double __imf_drcp_ru(double); -double __imf_drcp_rd(double); -double __imf_fma_rz(double, double, double); -double __imf_fma_rn(double, double, double); -double __imf_fma_ru(double, double, double); -double __imf_fma_rd(double, double, double); -double __imf_sqrt_rz(double); -double __imf_sqrt_rn(double); -double __imf_sqrt_ru(double); -double __imf_sqrt_rd(double); -}; - namespace sycl { inline namespace _V1 { namespace ext::intel::math { +/// -------------------------------------------------------------------------- +/// Add 2 float values +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_fadd_rz(float, float); +__DPCPP_SYCL_EXTERNAL float __imf_fadd_rn(float, float); +__DPCPP_SYCL_EXTERNAL float __imf_fadd_ru(float, float); +__DPCPP_SYCL_EXTERNAL float __imf_fadd_rd(float, float); +}; + template Tp fadd_rd(Tp x, Tp y) { return __imf_fadd_rd(x, y); } @@ -90,6 +41,17 @@ template Tp fadd_rz(Tp x, Tp y) { return __imf_fadd_rz(x, y); } +/// -------------------------------------------------------------------------- +/// Substract 2 float values +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_fsub_rz(float, float); +__DPCPP_SYCL_EXTERNAL float __imf_fsub_rn(float, float); +__DPCPP_SYCL_EXTERNAL float __imf_fsub_ru(float, float); +__DPCPP_SYCL_EXTERNAL float __imf_fsub_rd(float, float); +}; + template Tp fsub_rd(Tp x, Tp y) { return __imf_fsub_rd(x, y); } @@ -106,6 +68,17 @@ template Tp fsub_rz(Tp x, Tp y) { return __imf_fsub_rz(x, y); } +/// -------------------------------------------------------------------------- +/// Multiply 2 float values +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_fmul_rz(float, float); +__DPCPP_SYCL_EXTERNAL float __imf_fmul_rn(float, float); +__DPCPP_SYCL_EXTERNAL float __imf_fmul_ru(float, float); +__DPCPP_SYCL_EXTERNAL float __imf_fmul_rd(float, float); +}; + template Tp fmul_rd(Tp x, Tp y) { return __imf_fmul_rd(x, y); } @@ -122,6 +95,17 @@ template Tp fmul_rz(Tp x, Tp y) { return __imf_fmul_rz(x, y); } +/// -------------------------------------------------------------------------- +/// Divide 2 float values +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_fdiv_rz(float, float); +__DPCPP_SYCL_EXTERNAL float __imf_fdiv_rn(float, float); +__DPCPP_SYCL_EXTERNAL float __imf_fdiv_ru(float, float); +__DPCPP_SYCL_EXTERNAL float __imf_fdiv_rd(float, float); +}; + template Tp fdiv_rd(Tp x, Tp y) { return __imf_fdiv_rd(x, y); } @@ -138,6 +122,17 @@ template Tp fdiv_rz(Tp x, Tp y) { return __imf_fdiv_rz(x, y); } +/// -------------------------------------------------------------------------- +/// Return reciprocal of a float value +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_frcp_rz(float); +__DPCPP_SYCL_EXTERNAL float __imf_frcp_rn(float); +__DPCPP_SYCL_EXTERNAL float __imf_frcp_ru(float); +__DPCPP_SYCL_EXTERNAL float __imf_frcp_rd(float); +}; + template Tp frcp_rd(Tp x) { return __imf_frcp_rd(x); } template Tp frcp_rn(Tp x) { return __imf_frcp_rn(x); } @@ -146,6 +141,17 @@ template Tp frcp_ru(Tp x) { return __imf_frcp_ru(x); } template Tp frcp_rz(Tp x) { return __imf_frcp_rz(x); } +/// -------------------------------------------------------------------------- +/// Return result of 'x * y + z' for float values +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_fmaf_rz(float, float, float); +__DPCPP_SYCL_EXTERNAL float __imf_fmaf_rn(float, float, float); +__DPCPP_SYCL_EXTERNAL float __imf_fmaf_ru(float, float, float); +__DPCPP_SYCL_EXTERNAL float __imf_fmaf_rd(float, float, float); +}; + template Tp fmaf_rd(Tp x, Tp y, Tp z) { return __imf_fmaf_rd(x, y, z); } @@ -162,6 +168,17 @@ template Tp fmaf_rz(Tp x, Tp y, Tp z) { return __imf_fmaf_rz(x, y, z); } +/// -------------------------------------------------------------------------- +/// Square root of a float value +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL float __imf_sqrtf_rz(float); +__DPCPP_SYCL_EXTERNAL float __imf_sqrtf_rn(float); +__DPCPP_SYCL_EXTERNAL float __imf_sqrtf_ru(float); +__DPCPP_SYCL_EXTERNAL float __imf_sqrtf_rd(float); +}; + template Tp fsqrt_rd(Tp x) { return __imf_sqrtf_rd(x); } template Tp fsqrt_rn(Tp x) { return __imf_sqrtf_rn(x); } @@ -170,6 +187,18 @@ template Tp fsqrt_ru(Tp x) { return __imf_sqrtf_ru(x); } template Tp fsqrt_rz(Tp x) { return __imf_sqrtf_rz(x); } + +/// -------------------------------------------------------------------------- +/// Add 2 double values +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL double __imf_dadd_rz(double, double); +__DPCPP_SYCL_EXTERNAL double __imf_dadd_rn(double, double); +__DPCPP_SYCL_EXTERNAL double __imf_dadd_ru(double, double); +__DPCPP_SYCL_EXTERNAL double __imf_dadd_rd(double, double); +}; + template Tp dadd_rd(Tp x, Tp y) { return __imf_dadd_rd(x, y); } @@ -186,6 +215,17 @@ template Tp dadd_rz(Tp x, Tp y) { return __imf_dadd_rz(x, y); } +/// -------------------------------------------------------------------------- +/// Substract 2 double values +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL double __imf_dsub_rz(double, double); +__DPCPP_SYCL_EXTERNAL double __imf_dsub_rn(double, double); +__DPCPP_SYCL_EXTERNAL double __imf_dsub_ru(double, double); +__DPCPP_SYCL_EXTERNAL double __imf_dsub_rd(double, double); +}; + template Tp dsub_rd(Tp x, Tp y) { return __imf_dsub_rd(x, y); } @@ -202,6 +242,17 @@ template Tp dsub_rz(Tp x, Tp y) { return __imf_dsub_rz(x, y); } +/// -------------------------------------------------------------------------- +/// Multiply 2 double values +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL double __imf_dmul_rz(double, double); +__DPCPP_SYCL_EXTERNAL double __imf_dmul_rn(double, double); +__DPCPP_SYCL_EXTERNAL double __imf_dmul_ru(double, double); +__DPCPP_SYCL_EXTERNAL double __imf_dmul_rd(double, double); +}; + template Tp dmul_rd(Tp x, Tp y) { return __imf_dmul_rd(x, y); } @@ -218,6 +269,17 @@ template Tp dmul_rz(Tp x, Tp y) { return __imf_dmul_rz(x, y); } +/// -------------------------------------------------------------------------- +/// Divide 2 double values +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL double __imf_ddiv_rz(double, double); +__DPCPP_SYCL_EXTERNAL double __imf_ddiv_rn(double, double); +__DPCPP_SYCL_EXTERNAL double __imf_ddiv_ru(double, double); +__DPCPP_SYCL_EXTERNAL double __imf_ddiv_rd(double, double); +}; + template Tp ddiv_rd(Tp x, Tp y) { return __imf_ddiv_rd(x, y); } @@ -234,6 +296,17 @@ template Tp ddiv_rz(Tp x, Tp y) { return __imf_ddiv_rz(x, y); } +/// -------------------------------------------------------------------------- +/// Return reciprocal of a double value +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL double __imf_drcp_rz(double); +__DPCPP_SYCL_EXTERNAL double __imf_drcp_rn(double); +__DPCPP_SYCL_EXTERNAL double __imf_drcp_ru(double); +__DPCPP_SYCL_EXTERNAL double __imf_drcp_rd(double); +}; + template Tp drcp_rd(Tp x) { return __imf_drcp_rd(x); } template Tp drcp_rn(Tp x) { return __imf_drcp_rn(x); } @@ -242,6 +315,17 @@ template Tp drcp_ru(Tp x) { return __imf_drcp_ru(x); } template Tp drcp_rz(Tp x) { return __imf_drcp_rz(x); } +/// -------------------------------------------------------------------------- +/// Return result of 'x * y + z' for double values +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL double __imf_fma_rz(double, double, double); +__DPCPP_SYCL_EXTERNAL double __imf_fma_rn(double, double, double); +__DPCPP_SYCL_EXTERNAL double __imf_fma_ru(double, double, double); +__DPCPP_SYCL_EXTERNAL double __imf_fma_rd(double, double, double); +}; + template Tp fma_rd(Tp x, Tp y, Tp z) { return __imf_fma_rd(x, y, z); } @@ -258,6 +342,17 @@ template Tp fma_rz(Tp x, Tp y, Tp z) { return __imf_fma_rz(x, y, z); } +/// -------------------------------------------------------------------------- +/// Square root of a double value +/// Support rounding down/up/to-nearest-even/towards-zero +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL double __imf_sqrt_rz(double); +__DPCPP_SYCL_EXTERNAL double __imf_sqrt_rn(double); +__DPCPP_SYCL_EXTERNAL double __imf_sqrt_ru(double); +__DPCPP_SYCL_EXTERNAL double __imf_sqrt_rd(double); +}; + template Tp dsqrt_rd(Tp x) { return __imf_sqrt_rd(x); } template Tp dsqrt_rn(Tp x) { return __imf_sqrt_rn(x); } diff --git a/sycl/include/sycl/ext/intel/math/imf_simd.hpp b/sycl/include/sycl/ext/intel/math/imf_simd.hpp index effcf460b88b8..cbb7b0ef3cc32 100644 --- a/sycl/include/sycl/ext/intel/math/imf_simd.hpp +++ b/sycl/include/sycl/ext/intel/math/imf_simd.hpp @@ -13,20 +13,6 @@ #include extern "C" { -unsigned int __imf_vabs2(unsigned int); -unsigned int __imf_vabs4(unsigned int); -unsigned int __imf_vneg2(unsigned int); -unsigned int __imf_vneg4(unsigned int); -unsigned int __imf_vnegss2(unsigned int); -unsigned int __imf_vnegss4(unsigned int); -unsigned int __imf_vabsdiffs2(unsigned int, unsigned int); -unsigned int __imf_vabsdiffs4(unsigned int, unsigned int); -unsigned int __imf_vabsdiffu2(unsigned int, unsigned int); -unsigned int __imf_vabsdiffu4(unsigned int, unsigned int); -unsigned int __imf_vabsss2(unsigned int); -unsigned int __imf_vabsss4(unsigned int); -unsigned int __imf_vadd2(unsigned int, unsigned int); -unsigned int __imf_vadd4(unsigned int, unsigned int); unsigned int __imf_vaddss2(unsigned int, unsigned int); unsigned int __imf_vaddss4(unsigned int, unsigned int); unsigned int __imf_vaddus2(unsigned int, unsigned int); @@ -140,6 +126,17 @@ namespace sycl { inline namespace _V1 { namespace ext::intel::math { +/// -------------------------------------------------------------------------- +/// vabs2(x) +/// Return per-halfword absolute value. +/// vabs4(x) +/// Return per-byte absolute value. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL unsigned int __imf_vabs2(unsigned int); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_vabs4(unsigned int); +}; + template std::enable_if_t, unsigned int> vabs2(Tp x) { return __imf_vabs2(x); @@ -150,6 +147,17 @@ std::enable_if_t, unsigned int> vabs4(Tp x) { return __imf_vabs4(x); } +/// -------------------------------------------------------------------------- +/// vneg2(x) +/// Return per-halfword negation. +/// vneg4(x) +/// Return per-byte negation. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL unsigned int __imf_vneg2(unsigned int); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_vneg4(unsigned int); +}; + template std::enable_if_t, unsigned int> vneg2(Tp x) { return __imf_vneg2(x); @@ -160,6 +168,17 @@ std::enable_if_t, unsigned int> vneg4(Tp x) { return __imf_vneg4(x); } +/// -------------------------------------------------------------------------- +/// vnegss2(x) +/// Return per-halfword negation with signed saturation. +/// vnegss4(x) +/// Return per-byte negation with signed saturation. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL unsigned int __imf_vnegss2(unsigned int); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_vnegss4(unsigned int); +}; + template std::enable_if_t, unsigned int> vnegss2(Tp x) { return __imf_vnegss2(x); @@ -170,6 +189,19 @@ std::enable_if_t, unsigned int> vnegss4(Tp x) { return __imf_vnegss4(x); } +/// -------------------------------------------------------------------------- +/// vabsdiffs2(x), vabsdiffu2(x) +/// Return per-halfword absolute difference of signed/unsigned integer. +/// vabsdiffs4(x), vabsdiffu4(x) +/// Return per-byte absolute difference of signed/unsigned integer. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL unsigned int __imf_vabsdiffs2(unsigned int, unsigned int); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_vabsdiffs4(unsigned int, unsigned int); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_vabsdiffu2(unsigned int, unsigned int); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_vabsdiffu4(unsigned int, unsigned int); +}; + template std::enable_if_t, unsigned int> vabsdiffs2(Tp x, Tp y) { @@ -194,6 +226,17 @@ vabsdiffu4(Tp x, Tp y) { return __imf_vabsdiffu4(x, y); } +/// -------------------------------------------------------------------------- +/// vabsss2(x) +/// Return per-halfword absolute value with signed saturation. +/// vabsss4(x) +/// Return per-byte absolute value with signed saturation. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL unsigned int __imf_vabsss2(unsigned int); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_vabsss4(unsigned int); +}; + template std::enable_if_t, unsigned int> vabsss2(Tp x) { return __imf_vabsss2(x); @@ -204,6 +247,17 @@ std::enable_if_t, unsigned int> vabsss4(Tp x) { return __imf_vabsss4(x); } +/// -------------------------------------------------------------------------- +/// vadd2(x, y) +/// Return per-halfword unsigned addition ignoring overflow. +/// vadd4(x, y) +/// Return per-byte unsigned addition ignoring overflow. +/// -------------------------------------------------------------------------- +extern "C" { +__DPCPP_SYCL_EXTERNAL unsigned int __imf_vadd2(unsigned int, unsigned int); +__DPCPP_SYCL_EXTERNAL unsigned int __imf_vadd4(unsigned int, unsigned int); +}; + template std::enable_if_t, unsigned int> vadd2(Tp x, Tp y) {