diff --git a/libdevice/device_imf.hpp b/libdevice/device_imf.hpp index a1547bb540449..9c55dfb7ad8d4 100644 --- a/libdevice/device_imf.hpp +++ b/libdevice/device_imf.hpp @@ -488,6 +488,77 @@ static inline double __trunc(double x) { #endif } +static inline float __fast_exp10f(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_expf(0x1.26bb1cp1f * x); +#elif defined(__SPIR__) + return __spirv_ocl_native_exp(0x1.26bb1cp1f * x); +#endif +} + +static inline float __fast_expf(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_expf(x); +#elif defined(__SPIR__) + return __spirv_ocl_native_exp(x); +#endif +} + +static inline float __fast_logf(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_logf(x); +#elif defined(__SPIR__) + return __spirv_ocl_native_log(x); +#endif +} + +static inline float __fast_log2f(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_log2f(x); +#elif defined(__SPIR__) + return __spirv_ocl_native_log(x) / 0x1.62e43p-1f; +#endif +} + +static inline float __fast_log10f(float x) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_log10f(x); +#elif defined(__SPIR__) + return __spirv_ocl_native_log(x) / 0x1.26bb1cp1f; +#endif +} + +static inline float __fast_powf(float x, float y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_powf(x, y); +#elif defined(__SPIR__) + return __spirv_ocl_native_powr(x, y); +#endif +} + +static inline float __fast_fdividef(float x, float y) { + unsigned ybits = __builtin_bit_cast(unsigned, y); + unsigned xbits = __builtin_bit_cast(unsigned, x); + ybits &= 0x7FFF'FFFF; + xbits &= 0x7FFF'FFFF; + unsigned yexp_bits = (ybits >> 23) & 0xFF; + unsigned xexp_bits = (xbits >> 23) & 0xFF; + unsigned yman_bits = ybits & 0x7F'FFFF; + unsigned xman_bits = xbits & 0x7F'FFFF; + if (ybits > 0x7E80'0000) { + if ((xexp_bits = 0xFF) && (xman_bits == 0)) + return __builtin_bit_cast(float, 0x7FC00000); + else + return 0; + } + +#if defined(__LIBDEVICE_HOST_IMPL__) + return x / y; +#elif defined(__SPIR__) + return __spirv_ocl_native_divide(x, y); +#endif +} + static inline _iml_half __trunc(_iml_half x) { _iml_half_internal x_i = x.get_internal(); #if defined(__LIBDEVICE_HOST_IMPL__) diff --git a/libdevice/imf/imf_fp32_dl.cpp b/libdevice/imf/imf_fp32_dl.cpp index 48f5a39051ebe..6f77e30859944 100644 --- a/libdevice/imf/imf_fp32_dl.cpp +++ b/libdevice/imf/imf_fp32_dl.cpp @@ -25,4 +25,26 @@ DEVICE_EXTERN_C_INLINE float __devicelib_imf_fabsf(float x) { DEVICE_EXTERN_C_INLINE int64_t __devicelib_imf_llabs(int64_t x) { return x >= 0 ? x : -x; } +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fast_exp10f(float x) { return __fast_exp10f(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fast_expf(float x) { return __fast_expf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fast_fdividef(float x, float y) { + return __fast_fdividef(x, y); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fast_logf(float x) { return __fast_logf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fast_log2f(float x) { return __fast_log2f(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fast_log10f(float x) { return __fast_log10f(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fast_powf(float x, float y) { return __fast_powf(x, y); } #endif /*__LIBDEVICE_IMF_ENABLED__*/ diff --git a/libdevice/imf_wrapper.cpp b/libdevice/imf_wrapper.cpp index d8be6031b5af3..7c6571da1aab9 100644 --- a/libdevice/imf_wrapper.cpp +++ b/libdevice/imf_wrapper.cpp @@ -1788,4 +1788,50 @@ DEVICE_EXTERN_C_INLINE _iml_half_internal __imf_ushort_as_half(unsigned short x) { return __devicelib_imf_ushort_as_half(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fast_exp10f(float); + +DEVICE_EXTERN_C_INLINE +float __imf_fast_exp10f(float x) { return __devicelib_imf_fast_exp10f(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fast_expf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_fast_expf(float x) { return __devicelib_imf_fast_expf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fast_fdividef(float, float); + +DEVICE_EXTERN_C_INLINE +float __imf_fast_fdividef(float x, float y) { + return __devicelib_imf_fast_fdividef(x, y); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fast_logf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_fast_logf(float x) { return __devicelib_imf_fast_logf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fast_log2f(float); + +DEVICE_EXTERN_C_INLINE +float __imf_fast_log2f(float x) { return __devicelib_imf_fast_log2f(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fast_log10f(float); + +DEVICE_EXTERN_C_INLINE +float __imf_fast_log10f(float x) { return __devicelib_imf_fast_log10f(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fast_powf(float, float); + +DEVICE_EXTERN_C_INLINE +float __imf_fast_powf(float x, float y) { + return __devicelib_imf_fast_powf(x, y); +} #endif // __LIBDEVICE_IMF_ENABLED__ diff --git a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp index a8753e4de5cd5..4bf347d15f055 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp @@ -213,6 +213,13 @@ SYCLDeviceLibFuncMap SDLMap = { {"__devicelib_imf_fmaxf", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_fminf", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_copysignf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fast_exp10f", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fast_expf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fast_logf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fast_log2f", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fast_log10f", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fast_powf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fast_fdividef", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_float2int_rd", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_float2int_rn", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_float2int_ru", DeviceLibExt::cl_intel_devicelib_imf}, diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 5236e7018848c..6598f9dea1b48 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -3124,6 +3124,13 @@ extern __DPCPP_SYCL_EXTERNAL float __imf_invf(float x); extern __DPCPP_SYCL_EXTERNAL float __imf_fmaxf(float x, float y); extern __DPCPP_SYCL_EXTERNAL float __imf_fminf(float x, float y); extern __DPCPP_SYCL_EXTERNAL float __imf_copysignf(float x, float y); +extern __DPCPP_SYCL_EXTERNAL float __imf_fast_exp10f(float x); +extern __DPCPP_SYCL_EXTERNAL float __imf_fast_expf(float x); +extern __DPCPP_SYCL_EXTERNAL float __imf_fast_logf(float x); +extern __DPCPP_SYCL_EXTERNAL float __imf_fast_log2f(float x); +extern __DPCPP_SYCL_EXTERNAL float __imf_fast_log10f(float x); +extern __DPCPP_SYCL_EXTERNAL float __imf_fast_powf(float x, float y); +extern __DPCPP_SYCL_EXTERNAL float __imf_fast_fdividef(float x, float y); extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_rd(float x); extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_rn(float x); extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_ru(float x);