diff --git a/libdevice/cmath_wrapper.cpp b/libdevice/cmath_wrapper.cpp index 8a71985488606..1d45fe081d03c 100644 --- a/libdevice/cmath_wrapper.cpp +++ b/libdevice/cmath_wrapper.cpp @@ -17,12 +17,20 @@ float logf(float x) { return __devicelib_logf(x); } DEVICE_EXTERN_C float expf(float x) { return __devicelib_expf(x); } - +// On Windows, the math.h includes wrapper definition for functions: +// frexpf, ldexpf, hypotf, so we can't provide these 3 functions in +// device libraries which may lead to multiple definition error. +// The "hypotf" on Windows will call an internal function "_hypotf" +// and frexpf, ldexpf will call corresponding double version: +// frexp and ldexp. frexpf and ldexpf can only be used on platforms +// with fp64 support currently. +#ifndef _WIN32 DEVICE_EXTERN_C float frexpf(float x, int *exp) { return __devicelib_frexpf(x, exp); } DEVICE_EXTERN_C float ldexpf(float x, int exp) { return __devicelib_ldexpf(x, exp); } +#endif DEVICE_EXTERN_C float log10f(float x) { return __devicelib_log10f(x); } @@ -54,8 +62,13 @@ float sqrtf(float x) { return __devicelib_sqrtf(x); } DEVICE_EXTERN_C float cbrtf(float x) { return __devicelib_cbrtf(x); } +#ifndef _WIN32 DEVICE_EXTERN_C float hypotf(float x, float y) { return __devicelib_hypotf(x, y); } +#else +DEVICE_EXTERN_C +float _hypotf(float x, float y) { return __devicelib_hypotf(x, y); } +#endif DEVICE_EXTERN_C float erff(float x) { return __devicelib_erff(x); } @@ -128,3 +141,230 @@ float asinhf(float x) { return __devicelib_asinhf(x); } DEVICE_EXTERN_C float atanhf(float x) { return __devicelib_atanhf(x); } + +#if defined(_WIN32) +#include +union _Fval { // pun floating type as integer array + unsigned short _Sh[8]; + float _Val; +}; + +union _Dconst { // pun float types as integer array + unsigned short _Word[2]; // TRANSITION, ABI: Twice as large as necessary. + float _Float; +}; + +#define _F0 1 // little-endian +#define _F1 0 + +// IEEE 754 float properties +#define FHUGE_EXP (int)(_FMAX * 900L / 1000) + +#define NBITS (16 + _FOFF) +#define FSIGN(x) (((_Fval *)(char *)&(x))->_Sh[_F0] & _FSIGN) + +#define INIT(w0) \ + { 0, w0 } + +#define _FXbig (float)((NBITS + 1) * 347L / 1000) +DEVICE_EXTERN_C +short _FDtest(float *px) { // categorize *px + _Fval *ps = (_Fval *)(char *)px; + short ret = 0; + if ((ps->_Sh[_F0] & _FMASK) == _FMAX << _FOFF) + ret = (short)((ps->_Sh[_F0] & _FFRAC) != 0 || ps->_Sh[_F1] != 0 ? _NANCODE + : _INFCODE); + else if ((ps->_Sh[_F0] & ~_FSIGN) != 0 || ps->_Sh[_F1] != 0) + ret = (ps->_Sh[_F0] & _FMASK) == 0 ? _DENORM : _FINITE; + + return ret; +} + +DEVICE_EXTERN_C +short _FDnorm(_Fval *ps) { // normalize float fraction + short xchar; + unsigned short sign = (unsigned short)(ps->_Sh[_F0] & _FSIGN); + + xchar = 1; + if ((ps->_Sh[_F0] &= _FFRAC) != 0 || ps->_Sh[_F1]) { // nonzero, scale + if (ps->_Sh[_F0] == 0) { + ps->_Sh[_F0] = ps->_Sh[_F1]; + ps->_Sh[_F1] = 0; + xchar -= 16; + } + + for (; ps->_Sh[_F0] < 1 << _FOFF; --xchar) { // shift left by 1 + ps->_Sh[_F0] = (unsigned short)(ps->_Sh[_F0] << 1 | ps->_Sh[_F1] >> 15); + ps->_Sh[_F1] <<= 1; + } + for (; 1 << (_FOFF + 1) <= ps->_Sh[_F0]; ++xchar) { // shift right by 1 + ps->_Sh[_F1] = (unsigned short)(ps->_Sh[_F1] >> 1 | ps->_Sh[_F0] << 15); + ps->_Sh[_F0] >>= 1; + } + ps->_Sh[_F0] &= _FFRAC; + } + ps->_Sh[_F0] |= sign; + return xchar; +} + +DEVICE_EXTERN_C +short _FDscale(float *px, long lexp) { // scale *px by 2^xexp with checking + _Dconst _FInf = {INIT(_FMAX << _FOFF)}; + _Fval *ps = (_Fval *)(char *)px; + short xchar = (short)((ps->_Sh[_F0] & _FMASK) >> _FOFF); + + if (xchar == _FMAX) + return (short)((ps->_Sh[_F0] & _FFRAC) != 0 || ps->_Sh[_F1] != 0 + ? _NANCODE + : _INFCODE); + if (xchar == 0 && 0 < (xchar = _FDnorm(ps))) + return 0; + + short ret = 0; + if (0 < lexp && _FMAX - xchar <= lexp) { // overflow, return +/-INF + *px = ps->_Sh[_F0] & _FSIGN ? -_FInf._Float : _FInf._Float; + ret = _INFCODE; + } else if (-xchar < lexp) { // finite result, repack + ps->_Sh[_F0] = + (unsigned short)(ps->_Sh[_F0] & ~_FMASK | (lexp + xchar) << _FOFF); + ret = _FINITE; + } else { // denormalized, scale + unsigned short sign = (unsigned short)(ps->_Sh[_F0] & _FSIGN); + + ps->_Sh[_F0] = (unsigned short)(1 << _FOFF | ps->_Sh[_F0] & _FFRAC); + lexp += xchar - 1; + if (lexp < -(16 + 1 + _FOFF) || 0 <= lexp) { // underflow, return +/-0 + ps->_Sh[_F0] = sign; + ps->_Sh[_F1] = 0; + ret = 0; + } else { // nonzero, align fraction + ret = _FINITE; + short xexp = (short)lexp; + unsigned short psx = 0; + + if (xexp <= -16) { // scale by words + psx = ps->_Sh[_F1] | (psx != 0 ? 1 : 0); + ps->_Sh[_F1] = ps->_Sh[_F0]; + ps->_Sh[_F0] = 0; + xexp += 16; + } + if ((xexp = (short)-xexp) != 0) { // scale by bits + psx = (ps->_Sh[_F1] << (16 - xexp)) | (psx != 0 ? 1 : 0); + ps->_Sh[_F1] = (unsigned short)(ps->_Sh[_F1] >> xexp | + ps->_Sh[_F0] << (16 - xexp)); + ps->_Sh[_F0] >>= xexp; + } + + ps->_Sh[_F0] |= sign; + if ((0x8000 < psx || 0x8000 == psx && (ps->_Sh[_F1] & 0x0001) != 0) && + (++ps->_Sh[_F1] & 0xffff) == 0) + ++ps->_Sh[_F0]; // round up + else if (ps->_Sh[_F0] == sign && ps->_Sh[_F1] == 0) + ret = 0; + } + } + + return ret; +} + +DEVICE_EXTERN_C +short _FExp(float *px, float y, + short eoff) { // compute y * e^(*px), (*px) finite, |y| not huge + static const float hugexp = FHUGE_EXP; + _Dconst _FInf = {INIT(_FMAX << _FOFF)}; + static const float p[] = {1.0F, 60.09114349F}; + static const float q[] = {12.01517514F, 120.18228722F}; + static const float c1 = (22713.0F / 32768.0F); + static const float c2 = 1.4286068203094172321214581765680755e-6F; + static const float invln2 = 1.4426950408889634073599246810018921F; + short ret = 0; + if (*px < -hugexp || y == 0.0F) { // certain underflow + *px = 0.0F; + } else if (hugexp < *px) { // certain overflow + *px = _FInf._Float; + ret = _INFCODE; + } else { // xexp won't overflow + float g = *px * invln2; + short xexp = (short)(g + (g < 0.0F ? -0.5F : +0.5F)); + + g = xexp; + g = (float)((*px - g * c1) - g * c2); + if (-__FLT_EPSILON__ < g && g < __FLT_EPSILON__) { + *px = y; + } else { // g * g worth computing + const float z = g * g; + const float w = q[0] * z + q[1]; + + g *= z + p[1]; + *px = (w + g) / (w - g) * 2.0F * y; + --xexp; + } + ret = _FDscale(px, (long)xexp + eoff); + } + return ret; +} + +DEVICE_EXTERN_C +float _FCosh(float x, float y) { // compute y * cosh(x), |y| <= 1 + switch (_FDtest(&x)) { // test for special codes + case _NANCODE: + case _INFCODE: + return x; + case 0: + return y; + default: // finite + if (y == 0.0F) + return y; + + if (x < 0.0F) + x = -x; + + if (x < _FXbig) { // worth adding in exp(-x) + _FExp(&x, 1.0F, -1); + return y * (x + 0.25F / x); + } + _FExp(&x, y, -1); + return x; + } +} + +DEVICE_EXTERN_C +float _FSinh(float x, float y) { // compute y * sinh(x), |y| <= 1 + _Dconst _FRteps = {INIT((_FBIAS - NBITS / 2) << _FOFF)}; + static const float p[] = {0.00020400F, 0.00832983F, 0.16666737F, 0.99999998F}; + short neg; + + switch (_FDtest(&x)) { // test for special codes + case _NANCODE: + return x; + case _INFCODE: + return y != 0.0F ? x : FSIGN(x) ? -y : y; + case 0: + return x * y; + default: // finite + if (y == 0.0F) + return x < 0.0F ? -y : y; + + if (x < 0.0F) { + x = -x; + neg = 1; + } else + neg = 0; + + if (x < _FRteps._Float) { + x *= y; // x tiny + } else if (x < 1.0F) { + float w = x * x; + + x += ((p[0] * w + p[1]) * w + p[2]) * w * x; + x *= y; + } else if (x < _FXbig) { // worth adding in exp(-x) + _FExp(&x, 1.0F, -1); + x = y * (x - 0.25F / x); + } else + _FExp(&x, y, -1); + + return neg ? -x : x; + } +} +#endif diff --git a/libdevice/cmath_wrapper_fp64.cpp b/libdevice/cmath_wrapper_fp64.cpp index ef8d8e6c50504..84e6e8ab035ac 100644 --- a/libdevice/cmath_wrapper_fp64.cpp +++ b/libdevice/cmath_wrapper_fp64.cpp @@ -131,3 +131,270 @@ double asinh(double x) { return __devicelib_asinh(x); } DEVICE_EXTERN_C double atanh(double x) { return __devicelib_atanh(x); } + +#if defined(_WIN32) +#include +// FLOAT PROPERTIES +#define _D0 3 // little-endian, small long doubles +#define _D1 2 +#define _D2 1 +#define _D3 0 + +// IEEE 754 double properties +#define HUGE_EXP (int)(_DMAX * 900L / 1000) + +#define NBITS (48 + _DOFF) + +#define INIT(w0) \ + { 0, 0, 0, w0 } + +// double declarations +union _Dval { // pun floating type as integer array + unsigned short _Sh[8]; + double _Val; +}; + +union _Dconst { // pun float types as integer array + unsigned short _Word[4]; // TRANSITION, ABI: Twice as large as necessary. + double _Double; +}; +#define DSIGN(x) (((_Dval *)(char *)&(x))->_Sh[_D0] & _DSIGN) + +#define _Xbig (double)((NBITS + 1) * 347L / 1000) + +DEVICE_EXTERN_C +short _Dtest(double *px) { // categorize *px + _Dval *ps = (_Dval *)(char *)px; + + short ret = 0; + if ((ps->_Sh[_D0] & _DMASK) == _DMAX << _DOFF) { + ret = (short)((ps->_Sh[_D0] & _DFRAC) != 0 || ps->_Sh[_D1] != 0 || + ps->_Sh[_D2] != 0 || ps->_Sh[_D3] != 0 + ? _NANCODE + : _INFCODE); + } else if ((ps->_Sh[_D0] & ~_DSIGN) != 0 || ps->_Sh[_D1] != 0 || + ps->_Sh[_D2] != 0 || ps->_Sh[_D3] != 0) + ret = (ps->_Sh[_D0] & _DMASK) == 0 ? _DENORM : _FINITE; + + return ret; +} + +DEVICE_EXTERN_C +short _Dnorm(_Dval *ps) { // normalize double fraction + short xchar; + unsigned short sign = (unsigned short)(ps->_Sh[_D0] & _DSIGN); + + xchar = 1; + if ((ps->_Sh[_D0] &= _DFRAC) != 0 || ps->_Sh[_D1] || ps->_Sh[_D2] || + ps->_Sh[_D3]) { // nonzero, scale + for (; ps->_Sh[_D0] == 0; xchar -= 16) { // shift left by 16 + ps->_Sh[_D0] = ps->_Sh[_D1]; + ps->_Sh[_D1] = ps->_Sh[_D2]; + ps->_Sh[_D2] = ps->_Sh[_D3]; + ps->_Sh[_D3] = 0; + } + for (; ps->_Sh[_D0] < 1 << _DOFF; --xchar) { // shift left by 1 + ps->_Sh[_D0] = (unsigned short)(ps->_Sh[_D0] << 1 | ps->_Sh[_D1] >> 15); + ps->_Sh[_D1] = (unsigned short)(ps->_Sh[_D1] << 1 | ps->_Sh[_D2] >> 15); + ps->_Sh[_D2] = (unsigned short)(ps->_Sh[_D2] << 1 | ps->_Sh[_D3] >> 15); + ps->_Sh[_D3] <<= 1; + } + for (; 1 << (_DOFF + 1) <= ps->_Sh[_D0]; ++xchar) { // shift right by 1 + ps->_Sh[_D3] = (unsigned short)(ps->_Sh[_D3] >> 1 | ps->_Sh[_D2] << 15); + ps->_Sh[_D2] = (unsigned short)(ps->_Sh[_D2] >> 1 | ps->_Sh[_D1] << 15); + ps->_Sh[_D1] = (unsigned short)(ps->_Sh[_D1] >> 1 | ps->_Sh[_D0] << 15); + ps->_Sh[_D0] >>= 1; + } + ps->_Sh[_D0] &= _DFRAC; + } + ps->_Sh[_D0] |= sign; + return xchar; +} + +DEVICE_EXTERN_C +short _Dscale(double *px, long lexp) { // scale *px by 2^xexp with checking + _Dval *ps = (_Dval *)(char *)px; + _Dconst _Inf = {INIT(_DMAX << _DOFF)}; + short xchar = (short)((ps->_Sh[_D0] & _DMASK) >> _DOFF); + + if (xchar == _DMAX) + return (short)((ps->_Sh[_D0] & _DFRAC) != 0 || ps->_Sh[_D1] != 0 || + ps->_Sh[_D2] != 0 || ps->_Sh[_D3] != 0 + ? _NANCODE + : _INFCODE); + if (xchar == 0 && 0 < (xchar = _Dnorm(ps))) + return 0; + + short ret = 0; + if (0 < lexp && _DMAX - xchar <= lexp) { // overflow, return +/-INF + *px = ps->_Sh[_D0] & _DSIGN ? -_Inf._Double : _Inf._Double; + ret = _INFCODE; + } else if (-xchar < lexp) { // finite result, repack + ps->_Sh[_D0] = + (unsigned short)(ps->_Sh[_D0] & ~_DMASK | (lexp + xchar) << _DOFF); + ret = _FINITE; + } else { // denormalized, scale + unsigned short sign = (unsigned short)(ps->_Sh[_D0] & _DSIGN); + + ps->_Sh[_D0] = (unsigned short)(1 << _DOFF | ps->_Sh[_D0] & _DFRAC); + lexp += xchar - 1; + if (lexp < -(48 + 1 + _DOFF) || + 0 <= lexp) { // certain underflow, return +/-0 + ps->_Sh[_D0] = sign; + ps->_Sh[_D1] = 0; + ps->_Sh[_D2] = 0; + ps->_Sh[_D3] = 0; + ret = 0; + } else { // nonzero, align fraction + short xexp = (short)lexp; + unsigned short psx = 0; + ret = _FINITE; + + for (; xexp <= -16; xexp += 16) { // scale by words + psx = ps->_Sh[_D3] | (psx != 0 ? 1 : 0); + ps->_Sh[_D3] = ps->_Sh[_D2]; + ps->_Sh[_D2] = ps->_Sh[_D1]; + ps->_Sh[_D1] = ps->_Sh[_D0]; + ps->_Sh[_D0] = 0; + } + if ((xexp = (short)-xexp) != 0) { // scale by bits + psx = (ps->_Sh[_D3] << (16 - xexp)) | (psx != 0 ? 1 : 0); + ps->_Sh[_D3] = (unsigned short)(ps->_Sh[_D3] >> xexp | + ps->_Sh[_D2] << (16 - xexp)); + ps->_Sh[_D2] = (unsigned short)(ps->_Sh[_D2] >> xexp | + ps->_Sh[_D1] << (16 - xexp)); + ps->_Sh[_D1] = (unsigned short)(ps->_Sh[_D1] >> xexp | + ps->_Sh[_D0] << (16 - xexp)); + ps->_Sh[_D0] >>= xexp; + } + + ps->_Sh[_D0] |= sign; + if ((0x8000 < psx || 0x8000 == psx && (ps->_Sh[_D3] & 0x0001) != 0) && + (++ps->_Sh[_D3] & 0xffff) == 0 && (++ps->_Sh[_D2] & 0xffff) == 0 && + (++ps->_Sh[_D1] & 0xffff) == 0) + ++ps->_Sh[_D0]; // round up + else if (ps->_Sh[_D0] == sign && ps->_Sh[_D1] == 0 && ps->_Sh[_D2] == 0 && + ps->_Sh[_D3] == 0) + ret = 0; + } + } + return ret; +} + +DEVICE_EXTERN_C +short _Exp(double *px, double y, + short eoff) { // compute y * e^(*px), (*px) finite, |y| not huge + static const double invln2 = 1.4426950408889634073599246810018921; + static const double c1 = 22713.0 / 32768.0; + static const double c2 = 1.4286068203094172321214581765680755e-6; + static const double p[] = {1.0, 420.30235984910635, 15132.70094680474802}; + static const double q[] = {30.01511290683317, 3362.72154416553028, + 30265.40189360949691}; + + _Dconst _Eps = {INIT((_DBIAS - NBITS - 1) << _DOFF)}; + _Dconst _Inf = {INIT(_DMAX << _DOFF)}; + short ret = 0; + if (*px < -HUGE_EXP || y == 0.0) // certain underflow + *px = 0.0; + else if (HUGE_EXP < *px) { // certain overflow + *px = _Inf._Double; + ret = _INFCODE; + } else { // xexp won't overflow + double g = *px * invln2; + short xexp = (short)(g + (g < 0.0 ? -0.5 : +0.5)); + g = xexp; + g = (*px - g * c1) - g * c2; + if (-_Eps._Double < g && g < _Eps._Double) + *px = y; + else { // g * g worth computing + const double z = g * g; + const double w = (q[0] * z + q[1]) * z + q[2]; + + g *= (z + p[1]) * z + p[2]; + *px = (w + g) / (w - g) * 2.0 * y; + --xexp; + } + ret = _Dscale(px, (long)xexp + eoff); + } + + return ret; +} + +DEVICE_EXTERN_C +double _Cosh(double x, double y) { // compute y * cosh(x), |y| <= 1 + switch (_Dtest(&x)) { // test for special codes + case _NANCODE: + case _INFCODE: + return x; + case 0: + return y; + default: // finite + if (y == 0.0) + return y; + + if (x < 0.0) + x = -x; + + if (x < _Xbig) { // worth adding in exp(-x) + _Exp(&x, 1.0, -1); + return y * (x + 0.25 / x); + } + _Exp(&x, y, -1); + return x; + } +} + +DEVICE_EXTERN_C +double _Poly(double x, const double *tab, int n) { // compute polynomial + double y; + + for (y = *tab; 0 <= --n;) + y = y * x + *++tab; + + return y; +} + +DEVICE_EXTERN_C +double _Sinh(double x, double y) { // compute y * sinh(x), |y| <= 1 + + short neg; + // coefficients + static const double p[] = {0.0000000001632881, 0.0000000250483893, + 0.0000027557344615, 0.0001984126975233, + 0.0083333333334816, 0.1666666666666574, + 1.0000000000000001}; + _Dconst _Rteps = {INIT((_DBIAS - NBITS / 2) << _DOFF)}; + switch (_Dtest(&x)) { // test for special codes + case _NANCODE: + return x; + case _INFCODE: + return y != 0.0 ? x : DSIGN(x) ? -y : y; + case 0: + return x * y; + default: // finite + if (y == 0.0) + return x < 0.0 ? -y : y; + + if (x < 0.0) { + x = -x; + neg = 1; + } else + neg = 0; + + if (x < _Rteps._Double) + x *= y; // x tiny + else if (x < 1.0) { + double w = x * x; + + x += x * w * _Poly(w, p, 5); + x *= y; + } else if (x < _Xbig) { // worth adding in exp(-x) + _Exp(&x, 1.0, -1); + x = y * (x - 0.25 / x); + } else + _Exp(&x, y, -1); + + return neg ? -x : x; + } +} +#endif diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index 784dcfbac20df..9671987643f41 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -1551,6 +1551,15 @@ extern SYCL_EXTERNAL void __assert_fail(const char *expr, const char *file, } #elif defined(_WIN32) extern "C" { +extern SYCL_EXTERNAL double _Cosh(double x, double y); +extern SYCL_EXTERNAL short _Dtest(double *px); +extern SYCL_EXTERNAL short _Exp(double *px, double y, short eoff); +extern SYCL_EXTERNAL float _FCosh(float x, float y); +extern SYCL_EXTERNAL short _FDtest(float *px); +extern SYCL_EXTERNAL short _FExp(float *px, float y, short eoff); +extern SYCL_EXTERNAL float _FSinh(float x, float y); +extern SYCL_EXTERNAL double _Sinh(double x, double y); +extern SYCL_EXTERNAL float _hypotf(float x, float y); extern SYCL_EXTERNAL void _wassert(const wchar_t *wexpr, const wchar_t *wfile, unsigned line); } diff --git a/sycl/test/devicelib/math_fp64_test.cpp b/sycl/test/devicelib/math_fp64_test.cpp index 45fef78494bcd..dec0bf7341d92 100644 --- a/sycl/test/devicelib/math_fp64_test.cpp +++ b/sycl/test/devicelib/math_fp64_test.cpp @@ -1,13 +1,13 @@ -// UNSUPPORTED: windows +// REQUIRES: cpu, linux // RUN: %clangxx -fsycl -c %s -o %t.o // RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-cmath-fp64.o -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out +#include "math_utils.hpp" #include #include #include -#include "math_utils.hpp" namespace s = cl::sycl; constexpr s::access::mode sycl_read = s::access::mode::read; @@ -15,11 +15,11 @@ constexpr s::access::mode sycl_write = s::access::mode::write; #define TEST_NUM 38 -double ref[TEST_NUM] = { -1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, -0, 2, 0, 0, 1, 0, 2, 0, 0, 0, -0, 0, 1, 0, 1, 2, 0, 1, 2, 5, -0, 0, 0, 0, 0.5, 0.5, NAN, NAN,}; +double ref_val[TEST_NUM] = { + 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, + 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, + 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, + 0, 0, 0, 0, 0.5, 0.5, NAN, NAN}; double refIptr = 1; @@ -46,53 +46,53 @@ void device_math_test(s::queue &deviceQueue) { auto iptr_access = buffer3.template get_access(cgh); auto quo_access = buffer4.template get_access(cgh); cgh.single_task([=]() { - int i = 0; - res_access[i++] = std::cos(0.0); - res_access[i++] = std::sin(0.0); - res_access[i++] = std::log(1.0); - res_access[i++] = std::acos(1.0); - res_access[i++] = std::asin(0.0); - res_access[i++] = std::atan(0.0); - res_access[i++] = std::atan2(0.0, 1.0); - res_access[i++] = std::cosh(0.0); - res_access[i++] = std::exp(0.0); - res_access[i++] = std::fmod(1.5, 1.0); - res_access[i++] = std::frexp(0.0, &exp_access[0]); - res_access[i++] = std::ldexp(1.0, 1); - res_access[i++] = std::log10(1.0); - res_access[i++] = std::modf(1.0, &iptr_access[0]); - res_access[i++] = std::pow(1.0, 1.0); - res_access[i++] = std::sinh(0.0); - res_access[i++] = std::sqrt(4.0); - res_access[i++] = std::tan(0.0); - res_access[i++] = std::tanh(0.0); - res_access[i++] = std::acosh(1.0); - res_access[i++] = std::asinh(0.0); - res_access[i++] = std::atanh(0.0); - res_access[i++] = std::cbrt(1.0); - res_access[i++] = std::erf(0.0); - res_access[i++] = std::erfc(0.0); - res_access[i++] = std::exp2(1.0); - res_access[i++] = std::expm1(0.0); - res_access[i++] = std::fdim(1.0, 0.0); - res_access[i++] = std::fma(1.0, 1.0, 1.0); - res_access[i++] = std::hypot(3.0, 4.0); - res_access[i++] = std::ilogb(1.0); - res_access[i++] = std::log1p(0.0); - res_access[i++] = std::log2(1.0); - res_access[i++] = std::logb(1.0); - res_access[i++] = std::remainder(0.5, 1.0); - res_access[i++] = std::remquo(0.5, 1.0, &quo_access[0]); + int i = 0; + res_access[i++] = cos(0.0); + res_access[i++] = sin(0.0); + res_access[i++] = log(1.0); + res_access[i++] = acos(1.0); + res_access[i++] = asin(0.0); + res_access[i++] = atan(0.0); + res_access[i++] = atan2(0.0, 1.0); + res_access[i++] = cosh(0.0); + res_access[i++] = exp(0.0); + res_access[i++] = fmod(1.5, 1.0); + res_access[i++] = frexp(0.0, &exp_access[0]); + res_access[i++] = ldexp(1.0, 1); + res_access[i++] = log10(1.0); + res_access[i++] = modf(1.0, &iptr_access[0]); + res_access[i++] = pow(1.0, 1.0); + res_access[i++] = sinh(0.0); + res_access[i++] = sqrt(4.0); + res_access[i++] = tan(0.0); + res_access[i++] = tanh(0.0); + res_access[i++] = acosh(1.0); + res_access[i++] = asinh(0.0); + res_access[i++] = atanh(0.0); + res_access[i++] = cbrt(1.0); + res_access[i++] = erf(0.0); + res_access[i++] = erfc(0.0); + res_access[i++] = exp2(1.0); + res_access[i++] = expm1(0.0); + res_access[i++] = fdim(1.0, 0.0); + res_access[i++] = fma(1.0, 1.0, 1.0); + res_access[i++] = hypot(3.0, 4.0); + res_access[i++] = ilogb(1.0); + res_access[i++] = log1p(0.0); + res_access[i++] = log2(1.0); + res_access[i++] = logb(1.0); + res_access[i++] = remainder(0.5, 1.0); + res_access[i++] = remquo(0.5, 1.0, &quo_access[0]); double a = NAN; - res_access[i++] = std::tgamma(a); - res_access[i++] = std::lgamma(a); + res_access[i++] = tgamma(a); + res_access[i++] = lgamma(a); }); }); } // Compare result with reference for (int i = 0; i < TEST_NUM; ++i) { - assert(approx_equal_fp(result[i], ref[i])); + assert(approx_equal_fp(result[i], ref_val[i])); } // Test modf integral part diff --git a/sycl/test/devicelib/math_fp64_windows_test.cpp b/sycl/test/devicelib/math_fp64_windows_test.cpp new file mode 100644 index 0000000000000..64d4e000c5ff5 --- /dev/null +++ b/sycl/test/devicelib/math_fp64_windows_test.cpp @@ -0,0 +1,132 @@ +// REQUIRES: cpu, windows +// RUN: %clangxx -fsycl -c %s -o %t.o +// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/../bin/libsycl-cmath-fp64.o -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +#include "math_utils.hpp" +#include +#include +#include + +namespace s = cl::sycl; +constexpr s::access::mode sycl_read = s::access::mode::read; +constexpr s::access::mode sycl_write = s::access::mode::write; + +#define TEST_NUM 41 + +double ref_val[TEST_NUM] = { + 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, + 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, + 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, + 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 1, 2, 0}; + +double refIptr = 1; + +void device_math_test(s::queue &deviceQueue) { + s::range<1> numOfItems{TEST_NUM}; + double result[TEST_NUM] = {-1}; + + // Variable exponent is an integer value to store the exponent in frexp function + int exponent = -1; + + // Variable iptr stores the integral part of float point in modf function + double iptr = -1; + + // Variable quo stores the sign and some bits of x/y in remquo function + int quo = -1; + + // Varaible enm stores the enum value retured by MSVC function + short enm[2] = {10, 10}; + { + s::buffer buffer1(result, numOfItems); + s::buffer buffer2(&exponent, s::range<1>{1}); + s::buffer buffer3(&iptr, s::range<1>{1}); + s::buffer buffer4(&quo, s::range<1>{1}); + s::buffer buffer5(enm, s::range<1>{2}); + deviceQueue.submit([&](cl::sycl::handler &cgh) { + auto res_access = buffer1.template get_access(cgh); + auto exp_access = buffer2.template get_access(cgh); + auto iptr_access = buffer3.template get_access(cgh); + auto quo_access = buffer4.template get_access(cgh); + auto enm_access = buffer5.template get_access(cgh); + cgh.single_task([=]() { + int i = 0; + res_access[i++] = cos(0.0); + res_access[i++] = sin(0.0); + res_access[i++] = log(1.0); + res_access[i++] = acos(1.0); + res_access[i++] = asin(0.0); + res_access[i++] = atan(0.0); + res_access[i++] = atan2(0.0, 1.0); + res_access[i++] = cosh(0.0); + res_access[i++] = exp(0.0); + res_access[i++] = fmod(1.5, 1.0); + res_access[i++] = frexp(0.0, &exp_access[0]); + res_access[i++] = ldexp(1.0, 1); + res_access[i++] = log10(1.0); + res_access[i++] = modf(1.0, &iptr_access[0]); + res_access[i++] = pow(1.0, 1.0); + res_access[i++] = sinh(0.0); + res_access[i++] = sqrt(4.0); + res_access[i++] = tan(0.0); + res_access[i++] = tanh(0.0); + res_access[i++] = acosh(1.0); + res_access[i++] = asinh(0.0); + res_access[i++] = atanh(0.0); + res_access[i++] = cbrt(1.0); + res_access[i++] = erf(0.0); + res_access[i++] = erfc(0.0); + res_access[i++] = exp2(1.0); + res_access[i++] = expm1(0.0); + res_access[i++] = fdim(1.0, 0.0); + res_access[i++] = fma(1.0, 1.0, 1.0); + res_access[i++] = hypot(3.0, 4.0); + res_access[i++] = ilogb(1.0); + res_access[i++] = log1p(0.0); + res_access[i++] = log2(1.0); + res_access[i++] = logb(1.0); + res_access[i++] = remainder(0.5, 1.0); + res_access[i++] = remquo(0.5, 1.0, &quo_access[0]); + double a = NAN; + res_access[i++] = tgamma(a); + res_access[i++] = lgamma(a); + enm_access[0] = _Dtest(&a); + a = 0.0; + enm_access[1] = _Exp(&a, 1.0, 0); + res_access[i++] = a; + res_access[i++] = _Cosh(0.0, 2.0); + res_access[i++] = _Sinh(0.0, 1.0); + }); + }); + } + + // Compare result with reference + for (int i = 0; i < TEST_NUM; ++i) { + assert(approx_equal_fp(result[i], ref_val[i])); + } + + // Test modf integral part + assert(approx_equal_fp(iptr, refIptr)); + + // Test frexp exponent + assert(exponent == 0); + + // Test remquo sign + assert(quo == 0); + + // Test enum value returned by _Dtest + assert(enm[0] == _NANCODE); + + // Test enum value returned by _Exp + assert(enm[1] == _FINITE); +} + +int main() { + s::queue deviceQueue; + if (deviceQueue.get_device().has_extension("cl_khr_fp64")) { + device_math_test(deviceQueue); + std::cout << "Pass" << std::endl; + } + return 0; +} diff --git a/sycl/test/devicelib/math_test.cpp b/sycl/test/devicelib/math_test.cpp index 0d77a2251caba..1e3885960b2c5 100644 --- a/sycl/test/devicelib/math_test.cpp +++ b/sycl/test/devicelib/math_test.cpp @@ -1,13 +1,13 @@ -// UNSUPPORTED: windows +// REQUIRES: cpu, linux // RUN: %clangxx -fsycl -c %s -o %t.o // RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-cmath.o -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out +#include "math_utils.hpp" #include #include #include -#include "math_utils.hpp" namespace s = cl::sycl; constexpr s::access::mode sycl_read = s::access::mode::read; @@ -15,11 +15,11 @@ constexpr s::access::mode sycl_write = s::access::mode::write; #define TEST_NUM 38 -float ref[TEST_NUM] = { -1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, -0, 2, 0, 0, 1, 0, 2, 0, 0, 0, -0, 0, 1, 0, 1, 2, 0, 1, 2, 5, -0, 0, 0, 0, 0.5, 0.5, NAN, NAN,}; +float ref_val[TEST_NUM] = { + 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, + 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, + 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, + 0, 0, 0, 0, 0.5, 0.5, NAN, NAN}; float refIptr = 1; @@ -47,52 +47,52 @@ void device_math_test(s::queue &deviceQueue) { auto quo_access = buffer4.template get_access(cgh); cgh.single_task([=]() { int i = 0; - res_access[i++] = std::cos(0.0f); - res_access[i++] = std::sin(0.0f); - res_access[i++] = std::log(1.0f); - res_access[i++] = std::acos(1.0f); - res_access[i++] = std::asin(0.0f); - res_access[i++] = std::atan(0.0f); - res_access[i++] = std::atan2(0.0f, 1.0f); - res_access[i++] = std::cosh(0.0f); - res_access[i++] = std::exp(0.0f); - res_access[i++] = std::fmod(1.5f, 1.0f); - res_access[i++] = std::frexp(0.0f, &exp_access[0]); - res_access[i++] = std::ldexp(1.0f, 1); - res_access[i++] = std::log10(1.0f); - res_access[i++] = std::modf(1.0f, &iptr_access[0]); - res_access[i++] = std::pow(1.0f, 1.0f); - res_access[i++] = std::sinh(0.0f); - res_access[i++] = std::sqrt(4.0f); - res_access[i++] = std::tan(0.0f); - res_access[i++] = std::tanh(0.0f); - res_access[i++] = std::acosh(1.0f); - res_access[i++] = std::asinh(0.0f); - res_access[i++] = std::atanh(0.0f); - res_access[i++] = std::cbrt(1.0f); - res_access[i++] = std::erf(0.0f); - res_access[i++] = std::erfc(0.0f); - res_access[i++] = std::exp2(1.0f); - res_access[i++] = std::expm1(0.0f); - res_access[i++] = std::fdim(1.0f, 0.0f); - res_access[i++] = std::fma(1.0f, 1.0f, 1.0f); - res_access[i++] = std::hypot(3.0f, 4.0f); - res_access[i++] = std::ilogb(1.0f); - res_access[i++] = std::log1p(0.0f); - res_access[i++] = std::log2(1.0f); - res_access[i++] = std::logb(1.0f); - res_access[i++] = std::remainder(0.5f, 1.0f); - res_access[i++] = std::remquo(0.5f, 1.0f, &quo_access[0]); + res_access[i++] = cosf(0.0f); + res_access[i++] = sinf(0.0f); + res_access[i++] = logf(1.0f); + res_access[i++] = acosf(1.0f); + res_access[i++] = asinf(0.0f); + res_access[i++] = atanf(0.0f); + res_access[i++] = atan2f(0.0f, 1.0f); + res_access[i++] = coshf(0.0f); + res_access[i++] = expf(0.0f); + res_access[i++] = fmodf(1.5f, 1.0f); + res_access[i++] = frexpf(0.0f, &exp_access[0]); + res_access[i++] = ldexpf(1.0f, 1); + res_access[i++] = log10f(1.0f); + res_access[i++] = modff(1.0f, &iptr_access[0]); + res_access[i++] = powf(1.0f, 1.0f); + res_access[i++] = sinhf(0.0f); + res_access[i++] = sqrtf(4.0f); + res_access[i++] = tanf(0.0f); + res_access[i++] = tanhf(0.0f); + res_access[i++] = acoshf(1.0f); + res_access[i++] = asinhf(0.0f); + res_access[i++] = atanhf(0.0f); + res_access[i++] = cbrtf(1.0f); + res_access[i++] = erff(0.0f); + res_access[i++] = erfcf(0.0f); + res_access[i++] = exp2f(1.0f); + res_access[i++] = expm1f(0.0f); + res_access[i++] = fdimf(1.0f, 0.0f); + res_access[i++] = fmaf(1.0f, 1.0f, 1.0f); + res_access[i++] = hypotf(3.0f, 4.0f); + res_access[i++] = ilogbf(1.0f); + res_access[i++] = log1pf(0.0f); + res_access[i++] = log2f(1.0f); + res_access[i++] = logbf(1.0f); + res_access[i++] = remainderf(0.5f, 1.0f); + res_access[i++] = remquof(0.5f, 1.0f, &quo_access[0]); float a = NAN; - res_access[i++] = std::tgamma(a); - res_access[i++] = std::lgamma(a); + res_access[i++] = tgammaf(a); + res_access[i++] = lgammaf(a); }); }); } // Compare result with reference for (int i = 0; i < TEST_NUM; ++i) { - assert(approx_equal_fp(result[i], ref[i])); + assert(approx_equal_fp(result[i], ref_val[i])); } // Test modf integral part diff --git a/sycl/test/devicelib/math_windows_test.cpp b/sycl/test/devicelib/math_windows_test.cpp new file mode 100644 index 0000000000000..fa295c8bff84c --- /dev/null +++ b/sycl/test/devicelib/math_windows_test.cpp @@ -0,0 +1,121 @@ +// REQUIRES: cpu, windows +// RUN: %clangxx -fsycl -c %s -o %t.o +// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/../bin/libsycl-cmath.o -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +#include "math_utils.hpp" +#include +#include +#include + +namespace s = cl::sycl; +constexpr s::access::mode sycl_read = s::access::mode::read; +constexpr s::access::mode sycl_write = s::access::mode::write; + +#define TEST_NUM 39 + +float ref_val[TEST_NUM] = { + 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, + 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, + 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, + 0, 0, 0.5, 0.5, NAN, NAN, 1, 2, 0}; + +float refIptr = 1; + +void device_math_test(s::queue &deviceQueue) { + s::range<1> numOfItems{TEST_NUM}; + float result[TEST_NUM] = {-1}; + + // Variable iptr stores the integral part of float point in modf function + float iptr = -1; + + // Variable quo stores the sign and some bits of x/y in remquo function + int quo = -1; + + // Varaible enm stores the enum value retured by MSVC function + short enm[2] = {10, 10}; + + { + s::buffer buffer1(result, numOfItems); + s::buffer buffer2(&iptr, s::range<1>{1}); + s::buffer buffer3(&quo, s::range<1>{1}); + s::buffer buffer4(enm, s::range<1>{2}); + deviceQueue.submit([&](cl::sycl::handler &cgh) { + auto res_access = buffer1.template get_access(cgh); + auto iptr_access = buffer2.template get_access(cgh); + auto quo_access = buffer3.template get_access(cgh); + auto enm_access = buffer4.template get_access(cgh); + cgh.single_task([=]() { + int i = 0; + res_access[i++] = cosf(0.0f); + res_access[i++] = sinf(0.0f); + res_access[i++] = logf(1.0f); + res_access[i++] = acosf(1.0f); + res_access[i++] = asinf(0.0f); + res_access[i++] = atanf(0.0f); + res_access[i++] = atan2f(0.0f, 1.0f); + res_access[i++] = coshf(0.0f); + res_access[i++] = expf(0.0f); + res_access[i++] = fmodf(1.5f, 1.0f); + res_access[i++] = log10f(1.0f); + res_access[i++] = modff(1.0f, &iptr_access[0]); + res_access[i++] = powf(1.0f, 1.0f); + res_access[i++] = sinhf(0.0f); + res_access[i++] = sqrtf(4.0f); + res_access[i++] = tanf(0.0f); + res_access[i++] = tanhf(0.0f); + res_access[i++] = acoshf(1.0f); + res_access[i++] = asinhf(0.0f); + res_access[i++] = atanhf(0.0f); + res_access[i++] = cbrtf(1.0f); + res_access[i++] = erff(0.0f); + res_access[i++] = erfcf(0.0f); + res_access[i++] = exp2f(1.0f); + res_access[i++] = expm1f(0.0f); + res_access[i++] = fdimf(1.0f, 0.0f); + res_access[i++] = fmaf(1.0f, 1.0f, 1.0f); + res_access[i++] = hypotf(3.0f, 4.0f); + res_access[i++] = ilogbf(1.0f); + res_access[i++] = log1pf(0.0f); + res_access[i++] = log2f(1.0f); + res_access[i++] = logbf(1.0f); + res_access[i++] = remainderf(0.5f, 1.0f); + res_access[i++] = remquof(0.5f, 1.0f, &quo_access[0]); + float a = NAN; + res_access[i++] = tgammaf(a); + res_access[i++] = lgammaf(a); + enm_access[0] = _FDtest(&a); + a = 0.0f; + enm_access[1] = _FExp(&a, 1.0f, 0); + res_access[i++] = a; + res_access[i++] = _FCosh(0.0f, 2.0f); + res_access[i++] = _FSinh(0.0f, 1.0f); + }); + }); + } + + // Compare result with reference + for (int i = 0; i < TEST_NUM; ++i) { + assert(approx_equal_fp(result[i], ref_val[i])); + } + + // Test modf integral part + assert(approx_equal_fp(iptr, refIptr)); + + // Test remquo sign + assert(quo == 0); + + // Test enum value returned by _FDtest + assert(enm[0] == _NANCODE); + + // Test enum value returned by _FExp + assert(enm[1] == _FINITE); +} + +int main() { + s::queue deviceQueue; + device_math_test(deviceQueue); + std::cout << "Pass" << std::endl; + return 0; +}