diff --git a/CHANGELOG.md b/CHANGELOG.md index 42f39233..241e4905 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. can be carried out using 256, 512, 1024 and 2048-bit wide vectors according to runtime availability of vector registers and operators. https://github.com/shibatch/sleef/pull/182 +- 3.5-ULP versions of sinh, cosh, tanh, sinhf, coshf, tanhf, and the + corresponding testing functionalities are added. + https://github.com/shibatch/sleef/pull/192 ## 3.2 - 2018-02-26 ### Added - The whole build system of the project migrated from makefiles to diff --git a/src/libm-tester/iut.c b/src/libm-tester/iut.c index 2946b9a0..abc6c489 100644 --- a/src/libm-tester/iut.c +++ b/src/libm-tester/iut.c @@ -175,6 +175,21 @@ int main(int argc, char **argv) { sscanf(buf, "tanh %" PRIx64, &u); u = d2u(xtanh(u2d(u))); printf("%" PRIx64 "\n", u); + } else if (startsWith(buf, "sinh_u35 ")) { + uint64_t u; + sscanf(buf, "sinh_u35 %" PRIx64, &u); + u = d2u(xsinh_u35(u2d(u))); + printf("%" PRIx64 "\n", u); + } else if (startsWith(buf, "cosh_u35 ")) { + uint64_t u; + sscanf(buf, "cosh_u35 %" PRIx64, &u); + u = d2u(xcosh_u35(u2d(u))); + printf("%" PRIx64 "\n", u); + } else if (startsWith(buf, "tanh_u35 ")) { + uint64_t u; + sscanf(buf, "tanh_u35 %" PRIx64, &u); + u = d2u(xtanh_u35(u2d(u))); + printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "asinh ")) { uint64_t u; sscanf(buf, "asinh %" PRIx64, &u); @@ -454,6 +469,21 @@ int main(int argc, char **argv) { sscanf(buf, "tanhf %x", &u); u = f2u(xtanhf(u2f(u))); printf("%x\n", u); + } else if (startsWith(buf, "sinhf_u35 ")) { + uint32_t u; + sscanf(buf, "sinhf_u35 %x", &u); + u = f2u(xsinhf_u35(u2f(u))); + printf("%x\n", u); + } else if (startsWith(buf, "coshf_u35 ")) { + uint32_t u; + sscanf(buf, "coshf_u35 %x", &u); + u = f2u(xcoshf_u35(u2f(u))); + printf("%x\n", u); + } else if (startsWith(buf, "tanhf_u35 ")) { + uint32_t u; + sscanf(buf, "tanhf_u35 %x", &u); + u = f2u(xtanhf_u35(u2f(u))); + printf("%x\n", u); } else if (startsWith(buf, "asinhf ")) { uint32_t u; sscanf(buf, "asinhf %x", &u); diff --git a/src/libm-tester/iutsimd.c b/src/libm-tester/iutsimd.c index fa54f034..8cbf9eb5 100644 --- a/src/libm-tester/iutsimd.c +++ b/src/libm-tester/iutsimd.c @@ -417,6 +417,9 @@ int do_test(int argc, char **argv) { func_d_d("sinh", xsinh); func_d_d("cosh", xcosh); func_d_d("tanh", xtanh); + func_d_d("sinh_u35", xsinh_u35); + func_d_d("cosh_u35", xcosh_u35); + func_d_d("tanh_u35", xtanh_u35); func_d_d("asinh", xasinh); func_d_d("acosh", xacosh); func_d_d("atanh", xatanh); @@ -496,6 +499,9 @@ int do_test(int argc, char **argv) { func_f_f("sinhf", xsinhf); func_f_f("coshf", xcoshf); func_f_f("tanhf", xtanhf); + func_f_f("sinhf_u35", xsinhf_u35); + func_f_f("coshf_u35", xcoshf_u35); + func_f_f("tanhf_u35", xtanhf_u35); func_f_f("asinhf", xasinhf); func_f_f("acoshf", xacoshf); func_f_f("atanhf", xatanhf); diff --git a/src/libm-tester/tester.c b/src/libm-tester/tester.c index 6d2fcbac..9dd5dcd2 100644 --- a/src/libm-tester/tester.c +++ b/src/libm-tester/tester.c @@ -157,6 +157,9 @@ double child_sqrt_u35(double x) { child_d_d("sqrt_u35", x); } double child_sinh(double x) { child_d_d("sinh", x); } double child_cosh(double x) { child_d_d("cosh", x); } double child_tanh(double x) { child_d_d("tanh", x); } +double child_sinh_u35(double x) { child_d_d("sinh_u35", x); } +double child_cosh_u35(double x) { child_d_d("cosh_u35", x); } +double child_tanh_u35(double x) { child_d_d("tanh_u35", x); } double child_asinh(double x) { child_d_d("asinh", x); } double child_acosh(double x) { child_d_d("acosh", x); } double child_atanh(double x) { child_d_d("atanh", x); } @@ -285,6 +288,9 @@ float child_sqrtf_u35(float x) { child_f_f("sqrtf_u35", x); } float child_sinhf(float x) { child_f_f("sinhf", x); } float child_coshf(float x) { child_f_f("coshf", x); } float child_tanhf(float x) { child_f_f("tanhf", x); } +float child_sinhf_u35(float x) { child_f_f("sinhf_u35", x); } +float child_coshf_u35(float x) { child_f_f("coshf_u35", x); } +float child_tanhf_u35(float x) { child_f_f("tanhf_u35", x); } float child_asinhf(float x) { child_f_f("asinhf", x); } float child_acoshf(float x) { child_f_f("acoshf", x); } float child_atanhf(float x) { child_f_f("atanhf", x); } @@ -2402,6 +2408,27 @@ void do_test() { showResult(success); } + { + fprintf(stderr, "sinh_u35 denormal/nonnumber test : "); + double xa[] = { +0.0, -0.0, +1, -1, +1e+10, -1e+10, DBL_MAX, -DBL_MAX, DBL_MIN, -DBL_MIN, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN }; + for(i=0;i 3.5) || + (d > 709 && !(u0 <= 3.5 || (isinf(t) && t > 0))) || + (d < -709 && !(u0 <= 3.5 || (isinf(t) && t < 0)))) { + printf("Pure C sinh_u35 arg=%.20g ulp=%.20g\n", d, u0); + fflush(stdout); ecnt++; + } + } + + { + mpfr_set_d(frx, d, GMP_RNDN); + mpfr_cosh(frx, frx, GMP_RNDN); + + double u0 = countULPdp(t = xcosh_u35(d), frx); + + if ((fabs(d) <= 709 && u0 > 3.5) || !(u0 <= 3.5 || (isinf(t) && t > 0))) { + printf("Pure C cosh_u35 arg=%.20g ulp=%.20g\n", d, u0); + fflush(stdout); ecnt++; + } + } + + { + mpfr_set_d(frx, d, GMP_RNDN); + mpfr_tanh(frx, frx, GMP_RNDN); + + double u0 = countULPdp(t = xtanh_u35(d), frx); + + if (u0 > 3.5) { + printf("Pure C tanh_u35 arg=%.20g ulp=%.20g\n", d, u0); + fflush(stdout); ecnt++; + } + } { mpfr_set_d(frx, d, GMP_RNDN); diff --git a/src/libm-tester/tester2simddp.c b/src/libm-tester/tester2simddp.c index c1744a93..1c11e73a 100644 --- a/src/libm-tester/tester2simddp.c +++ b/src/libm-tester/tester2simddp.c @@ -677,6 +677,47 @@ int main(int argc,char **argv) fflush(stdout); ecnt++; } } + + { + mpfr_set_d(frx, d, GMP_RNDN); + mpfr_sinh(frx, frx, GMP_RNDN); + + double u0 = countULPdp(t = vget(xsinh_u35(vd), e), frx); + + if ((fabs(d) <= 709 && u0 > 3.5) || + (d > 709 && !(u0 <= 3.5 || (isinf(t) && t > 0))) || + (d < -709 && !(u0 <= 3.5 || (isinf(t) && t < 0)))) { + printf(ISANAME " sinh_u35 arg=%.20g ulp=%.20g\n", d, u0); + printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); + fflush(stdout); ecnt++; + } + } + + { + mpfr_set_d(frx, d, GMP_RNDN); + mpfr_cosh(frx, frx, GMP_RNDN); + + double u0 = countULPdp(t = vget(xcosh_u35(vd), e), frx); + + if ((fabs(d) <= 709 && u0 > 3.5) || !(u0 <= 3.5 || (isinf(t) && t > 0))) { + printf(ISANAME " cosh_u35 arg=%.20g ulp=%.20g\n", d, u0); + printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); + fflush(stdout); ecnt++; + } + } + + { + mpfr_set_d(frx, d, GMP_RNDN); + mpfr_tanh(frx, frx, GMP_RNDN); + + double u0 = countULPdp(t = vget(xtanh_u35(vd), e), frx); + + if (u0 > 3.5) { + printf(ISANAME " tanh_u35 arg=%.20g ulp=%.20g\n", d, u0); + printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); + fflush(stdout); ecnt++; + } + } { mpfr_set_d(frx, d, GMP_RNDN); diff --git a/src/libm-tester/tester2simdsp.c b/src/libm-tester/tester2simdsp.c index 88096268..527f9c0c 100644 --- a/src/libm-tester/tester2simdsp.c +++ b/src/libm-tester/tester2simdsp.c @@ -643,6 +643,44 @@ int main(int argc,char **argv) } } + { + mpfr_set_d(frx, d, GMP_RNDN); + mpfr_sinh(frx, frx, GMP_RNDN); + + double u0 = countULPsp(t = vget(xsinhf_u35(vd), e), frx); + + if ((fabs(d) <= 88 && u0 > 3.5) || + (d > 88 && !(u0 <= 3.5 || (isinf(t) && t > 0))) || + (d < -88 && !(u0 <= 3.5 || (isinf(t) && t < 0)))) { + printf(ISANAME " sinhf_u35 arg=%.20g ulp=%.20g\n", d, u0); + fflush(stdout); ecnt++; + } + } + + { + mpfr_set_d(frx, d, GMP_RNDN); + mpfr_cosh(frx, frx, GMP_RNDN); + + double u0 = countULPsp(t = vget(xcoshf_u35(vd), e), frx); + + if ((fabs(d) <= 88 && u0 > 3.5) || !(u0 <= 3.5 || (isinf(t) && t > 0))) { + printf(ISANAME " coshf_u35 arg=%.20g ulp=%.20g\n", d, u0); + fflush(stdout); ecnt++; + } + } + + { + mpfr_set_d(frx, d, GMP_RNDN); + mpfr_tanh(frx, frx, GMP_RNDN); + + double u0 = countULPsp(t = vget(xtanhf_u35(vd), e), frx); + + if (u0 > 3.5) { + printf(ISANAME " tanhf_u35 arg=%.20g ulp=%.20g\n", d, u0); + fflush(stdout); ecnt++; + } + } + { mpfr_set_d(frx, d, GMP_RNDN); mpfr_asinh(frx, frx, GMP_RNDN); diff --git a/src/libm-tester/tester2sp.c b/src/libm-tester/tester2sp.c index 64ffcf40..54395b93 100644 --- a/src/libm-tester/tester2sp.c +++ b/src/libm-tester/tester2sp.c @@ -538,6 +538,44 @@ int main(int argc,char **argv) fflush(stdout); ecnt++; } } + + { + mpfr_set_d(frx, d, GMP_RNDN); + mpfr_sinh(frx, frx, GMP_RNDN); + + double u0 = countULPsp(t = xsinhf_u35(d), frx); + + if ((fabs(d) <= 88 && u0 > 3.5) || + (d > 88 && !(u0 <= 3.5 || (isinf(t) && t > 0))) || + (d < -88 && !(u0 <= 3.5 || (isinf(t) && t < 0)))) { + printf("Pure C sinhf_u35 arg=%.20g ulp=%.20g\n", d, u0); + fflush(stdout); ecnt++; + } + } + + { + mpfr_set_d(frx, d, GMP_RNDN); + mpfr_cosh(frx, frx, GMP_RNDN); + + double u0 = countULPsp(t = xcoshf_u35(d), frx); + + if ((fabs(d) <= 88 && u0 > 3.5) || !(u0 <= 3.5 || (isinf(t) && t > 0))) { + printf("Pure C coshf_u35 arg=%.20g ulp=%.20g\n", d, u0); + fflush(stdout); ecnt++; + } + } + + { + mpfr_set_d(frx, d, GMP_RNDN); + mpfr_tanh(frx, frx, GMP_RNDN); + + double u0 = countULPsp(t = xtanhf_u35(d), frx); + + if (u0 > 3.5) { + printf("Pure C tanhf_u35 arg=%.20g ulp=%.20g\n", d, u0); + fflush(stdout); ecnt++; + } + } { mpfr_set_d(frx, d, GMP_RNDN); diff --git a/src/libm/funcproto.h b/src/libm/funcproto.h index 13ea274c..3639ba62 100644 --- a/src/libm/funcproto.h +++ b/src/libm/funcproto.h @@ -62,6 +62,10 @@ funcSpec funcList[] = { { "sinh", 10, 0, 0, 0 }, { "cosh", 10, 0, 0, 0 }, { "tanh", 10, 0, 0, 0 }, + { "sinh", 35, 3, 0, 0 }, + { "cosh", 35, 3, 0, 0 }, + { "tanh", 35, 3, 0, 0 }, + { "asinh", 10, 0, 0, 0 }, { "acosh", 10, 0, 0, 0 }, { "atanh", 10, 0, 0, 0 }, diff --git a/src/libm/norename.h b/src/libm/norename.h index d76c514a..969bbe87 100755 --- a/src/libm/norename.h +++ b/src/libm/norename.h @@ -35,6 +35,9 @@ vdouble xpow(vdouble x, vdouble y); vdouble xsinh(vdouble d); vdouble xcosh(vdouble d); vdouble xtanh(vdouble d); +vdouble xsinh_u35(vdouble d); +vdouble xcosh_u35(vdouble d); +vdouble xtanh_u35(vdouble d); vdouble xasinh(vdouble s); vdouble xacosh(vdouble s); vdouble xatanh(vdouble s); @@ -130,6 +133,9 @@ vfloat xpowf(vfloat x, vfloat y); vfloat xsinhf(vfloat x); vfloat xcoshf(vfloat x); vfloat xtanhf(vfloat x); +vfloat xsinhf_u35(vfloat x); +vfloat xcoshf_u35(vfloat x); +vfloat xtanhf_u35(vfloat x); vfloat xasinhf(vfloat x); vfloat xacoshf(vfloat x); vfloat xatanhf(vfloat x); diff --git a/src/libm/rename.h b/src/libm/rename.h index 62c10441..258275e4 100644 --- a/src/libm/rename.h +++ b/src/libm/rename.h @@ -30,6 +30,9 @@ #define xsinh Sleef_sinh_u10 #define xcosh Sleef_cosh_u10 #define xtanh Sleef_tanh_u10 +#define xsinh_u35 Sleef_sinh_u35 +#define xcosh_u35 Sleef_cosh_u35 +#define xtanh_u35 Sleef_tanh_u35 #define xasinh Sleef_asinh_u10 #define xacosh Sleef_acosh_u10 #define xatanh Sleef_atanh_u10 @@ -106,6 +109,9 @@ #define xsinhf Sleef_sinhf_u10 #define xcoshf Sleef_coshf_u10 #define xtanhf Sleef_tanhf_u10 +#define xsinhf_u35 Sleef_sinhf_u35 +#define xcoshf_u35 Sleef_coshf_u35 +#define xtanhf_u35 Sleef_tanhf_u35 #define xasinhf Sleef_asinhf_u10 #define xacoshf Sleef_acoshf_u10 #define xatanhf Sleef_atanhf_u10 diff --git a/src/libm/sleefdp.c b/src/libm/sleefdp.c index 21fad775..5db9491a 100644 --- a/src/libm/sleefdp.c +++ b/src/libm/sleefdp.c @@ -1343,6 +1343,31 @@ EXPORT CONST double xexp(double d) { return u; } +static INLINE CONST double expm1k(double d) { + int q = (int)rintk(d * R_LN2); + double s, u; + + s = mla(q, -L2U, d); + s = mla(q, -L2L, s); + + u = 2.08860621107283687536341e-09; + u = mla(u, s, 2.51112930892876518610661e-08); + u = mla(u, s, 2.75573911234900471893338e-07); + u = mla(u, s, 2.75572362911928827629423e-06); + u = mla(u, s, 2.4801587159235472998791e-05); + u = mla(u, s, 0.000198412698960509205564975); + u = mla(u, s, 0.00138888888889774492207962); + u = mla(u, s, 0.00833333333331652721664984); + u = mla(u, s, 0.0416666666666665047591422); + u = mla(u, s, 0.166666666666666851703837); + u = mla(u, s, 0.5); + u = s * s * u + s; + + if (q != 0) u = ldexp2k(u + 1, q) - 1; + + return u; +} + static INLINE CONST Sleef_double2 logk(double d) { Sleef_double2 x, x2, s; double m, t; @@ -1539,6 +1564,42 @@ EXPORT CONST double xtanh(double x) { return y; } +EXPORT CONST double xsinh_u35(double x) { + double e = expm1k(fabsk(x)); + double y = (e + 2) / (e + 1) * (0.5 * e); + + y = fabsk(x) > 709 ? SLEEF_INFINITY : y; + y = xisnan(y) ? SLEEF_INFINITY : y; + y = mulsign(y, x); + y = xisnan(x) ? SLEEF_NAN : y; + + return y; +} + +EXPORT CONST double xcosh_u35(double x) { + double e = xexp(fabsk(x)); + double y = 0.5 / e + 0.5 * e; + + y = fabsk(x) > 709 ? SLEEF_INFINITY : y; + y = xisnan(y) ? SLEEF_INFINITY : y; + y = xisnan(x) ? SLEEF_NAN : y; + + return y; +} + +EXPORT CONST double xtanh_u35(double x) { + double y = fabsk(x); + double d = expm1k(2*y); + y = d / (d + 2); + + y = fabsk(x) > 18.714973875 ? 1.0 : y; + y = xisnan(y) ? 1.0 : y; + y = mulsign(y, x); + y = xisnan(x) ? SLEEF_NAN : y; + + return y; +} + static INLINE CONST Sleef_double2 logk2(Sleef_double2 d) { Sleef_double2 x, x2, m, s; double t; diff --git a/src/libm/sleeflibm_header.h.org b/src/libm/sleeflibm_header.h.org index d0a4e297..f8267ca8 100644 --- a/src/libm/sleeflibm_header.h.org +++ b/src/libm/sleeflibm_header.h.org @@ -125,6 +125,9 @@ IMPORT CONST double Sleef_pow_u10(double, double); IMPORT CONST double Sleef_sinh_u10(double); IMPORT CONST double Sleef_cosh_u10(double); IMPORT CONST double Sleef_tanh_u10(double); +IMPORT CONST double Sleef_sinh_u35(double); +IMPORT CONST double Sleef_cosh_u35(double); +IMPORT CONST double Sleef_tanh_u35(double); IMPORT CONST double Sleef_asinh_u10(double); IMPORT CONST double Sleef_acosh_u10(double); IMPORT CONST double Sleef_atanh_u10(double); @@ -194,6 +197,9 @@ IMPORT CONST float Sleef_powf_u10(float, float); IMPORT CONST float Sleef_sinhf_u10(float); IMPORT CONST float Sleef_coshf_u10(float); IMPORT CONST float Sleef_tanhf_u10(float); +IMPORT CONST float Sleef_sinhf_u35(float); +IMPORT CONST float Sleef_coshf_u35(float); +IMPORT CONST float Sleef_tanhf_u35(float); IMPORT CONST float Sleef_asinhf_u10(float); IMPORT CONST float Sleef_acoshf_u10(float); IMPORT CONST float Sleef_atanhf_u10(float); diff --git a/src/libm/sleefsimddp.c b/src/libm/sleefsimddp.c index fc630977..e61e273e 100644 --- a/src/libm/sleefsimddp.c +++ b/src/libm/sleefsimddp.c @@ -1589,6 +1589,32 @@ EXPORT CONST vdouble xexp(vdouble d) { return u; } +static INLINE CONST vdouble expm1k(vdouble d) { + vdouble u = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(R_LN2))), s; + vint q = vrint_vi_vd(u); + + s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L2U), d); + s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L2L), s); + + u = vcast_vd_d(2.08860621107283687536341e-09); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.51112930892876518610661e-08)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.75573911234900471893338e-07)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.75572362911928827629423e-06)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.4801587159235472998791e-05)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.000198412698960509205564975)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00138888888889774492207962)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00833333333331652721664984)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0416666666666665047591422)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.166666666666666851703837)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.5)); + u = vmla_vd_vd_vd_vd(vmul_vd_vd_vd(s, s), u, s); + + u = vsel_vd_vo_vd_vd(vcast_vo64_vo32(veq_vo_vi_vi(q, vcast_vi_i(0))), u, + vsub_vd_vd_vd(vldexp2_vd_vd_vi(vadd_vd_vd_vd(u, vcast_vd_d(1)), q), vcast_vd_d(1))); + + return u; +} + static INLINE CONST vdouble2 logk(vdouble d) { vdouble2 x, x2, s; vdouble t, m; @@ -1826,6 +1852,40 @@ EXPORT CONST vdouble xtanh(vdouble x) { return y; } +EXPORT CONST vdouble xsinh_u35(vdouble x) { + vdouble e = expm1k(vabs_vd_vd(x)); + + vdouble y = vdiv_vd_vd_vd(vadd_vd_vd_vd(e, vcast_vd_d(2)), vadd_vd_vd_vd(e, vcast_vd_d(1))); + y = vmul_vd_vd_vd(y, vmul_vd_vd_vd(vcast_vd_d(0.5), e)); + + y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(709)), visnan_vo_vd(y)), vcast_vd_d(SLEEF_INFINITY), y); + y = vmulsign_vd_vd_vd(y, x); + y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); + + return y; +} + +EXPORT CONST vdouble xcosh_u35(vdouble x) { + vdouble e = xexp(vabs_vd_vd(x)); + vdouble y = vmla_vd_vd_vd_vd(vcast_vd_d(0.5), e, vdiv_vd_vd_vd(vcast_vd_d(0.5), e)); + + y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(709)), visnan_vo_vd(y)), vcast_vd_d(SLEEF_INFINITY), y); + y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); + + return y; +} + +EXPORT CONST vdouble xtanh_u35(vdouble x) { + vdouble d = expm1k(vmul_vd_vd_vd(vcast_vd_d(2), vabs_vd_vd(x))); + vdouble y = vdiv_vd_vd_vd(d, vadd_vd_vd_vd(vcast_vd_d(2), d)); + + y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(18.714973875)), visnan_vo_vd(y)), vcast_vd_d(1.0), y); + y = vmulsign_vd_vd_vd(y, x); + y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); + + return y; +} + static INLINE CONST vdouble2 logk2(vdouble2 d) { vdouble2 x, x2, m, s; vdouble t; diff --git a/src/libm/sleefsimdsp.c b/src/libm/sleefsimdsp.c index 9c28fc56..932fbbf5 100644 --- a/src/libm/sleefsimdsp.c +++ b/src/libm/sleefsimdsp.c @@ -1125,6 +1125,28 @@ EXPORT CONST vfloat xexpf(vfloat d) { return u; } +static INLINE CONST vfloat expm1fk(vfloat d) { + vint2 q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(R_LN2f))); + vfloat s, u; + + s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf), d); + s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf), s); + + u = vcast_vf_f(0.000198527617612853646278381); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00139304355252534151077271)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833336077630519866943359)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0416664853692054748535156)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.166666671633720397949219)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.5)); + + u = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(s, s), u, s); + + u = vsel_vf_vo_vf_vf(veq_vo_vi2_vi2(q, vcast_vi2_i(0)), u, + vsub_vf_vf_vf(vldexp2_vf_vf_vi2(vadd_vf_vf_vf(u, vcast_vf_f(1)), q), vcast_vf_f(1))); + + return u; +} + #ifdef ENABLE_NEON32 EXPORT CONST vfloat xsqrtf_u35(vfloat d) { vfloat e = vreinterpret_vf_vi2(vadd_vi2_vi2_vi2(vcast_vi2_i(0x20000000), vand_vi2_vi2_vi2(vcast_vi2_i(0x7f000000), vsrl_vi2_vi2_i(vreinterpret_vi2_vf(d), 1)))); @@ -1464,6 +1486,42 @@ EXPORT CONST vfloat xtanhf(vfloat x) { return y; } +EXPORT CONST vfloat xsinhf_u35(vfloat x) { + vfloat e = expm1fk(vabs_vf_vf(x)); + vfloat y = vdiv_vf_vf_vf(vadd_vf_vf_vf(e, vcast_vf_f(2)), vadd_vf_vf_vf(e, vcast_vf_f(1))); + y = vmul_vf_vf_vf(y, vmul_vf_vf_vf(vcast_vf_f(0.5f), e)); + + y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(88)), + visnan_vo_vf(y)), vcast_vf_f(SLEEF_INFINITYf), y); + y = vmulsign_vf_vf_vf(y, x); + y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); + + return y; +} + +EXPORT CONST vfloat xcoshf_u35(vfloat x) { + vfloat e = xexpf(vabs_vf_vf(x)); + vfloat y = vmla_vf_vf_vf_vf(vcast_vf_f(0.5f), e, vdiv_vf_vf_vf(vcast_vf_f(0.5), e)); + + y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(88)), + visnan_vo_vf(y)), vcast_vf_f(SLEEF_INFINITYf), y); + y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); + + return y; +} + +EXPORT CONST vfloat xtanhf_u35(vfloat x) { + vfloat d = expm1fk(vmul_vf_vf_vf(vcast_vf_f(2), vabs_vf_vf(x))); + vfloat y = vdiv_vf_vf_vf(d, vadd_vf_vf_vf(vcast_vf_f(2), d)); + + y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(8.664339742f)), + visnan_vo_vf(y)), vcast_vf_f(1.0f), y); + y = vmulsign_vf_vf_vf(y, x); + y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); + + return y; +} + static INLINE CONST vfloat2 logk2f(vfloat2 d) { vfloat2 x, x2, m, s; vfloat t; diff --git a/src/libm/sleefsp.c b/src/libm/sleefsp.c index 593bf093..65756667 100644 --- a/src/libm/sleefsp.c +++ b/src/libm/sleefsp.c @@ -1111,6 +1111,26 @@ static INLINE CONST float expkf(Sleef_float2 d) { return u; } +static INLINE CONST float expm1kf(float d) { + int q = (int)rintfk(d * R_LN2f); + float s, u; + + s = mlaf(q, -L2Uf, d); + s = mlaf(q, -L2Lf, s); + + u = 0.000198527617612853646278381; + u = mlaf(u, s, 0.00139304355252534151077271); + u = mlaf(u, s, 0.00833336077630519866943359); + u = mlaf(u, s, 0.0416664853692054748535156); + u = mlaf(u, s, 0.166666671633720397949219); + u = mlaf(u, s, 0.5); + u = s * s * u + s; + + if (q != 0) u = ldexp2kf(u + 1, q) - 1; + + return u; +} + static INLINE CONST Sleef_float2 logkf(float d) { Sleef_float2 x, x2, s; float m, t; @@ -1257,6 +1277,42 @@ EXPORT CONST float xtanhf(float x) { return y; } +EXPORT CONST float xsinhf_u35(float x) { + float e = expm1kf(fabsfk(x)); + float y = (e + 2) / (e + 1) * (0.5f * e); + + y = fabsfk(x) > 88 ? SLEEF_INFINITYf : y; + y = xisnanf(y) ? SLEEF_INFINITYf : y; + y = mulsignf(y, x); + y = xisnanf(x) ? SLEEF_NANf : y; + + return y; +} + +EXPORT CONST float xcoshf_u35(float x) { + float e = xexpf(fabsfk(x)); + float y = 0.5f * e + 0.5f / e; + + y = fabsfk(x) > 88 ? SLEEF_INFINITYf : y; + y = xisnanf(y) ? SLEEF_INFINITYf : y; + y = xisnanf(x) ? SLEEF_NANf : y; + + return y; +} + +EXPORT CONST float xtanhf_u35(float x) { + float y = fabsfk(x); + float d = expm1kf(2*y); + y = d / (d + 2); + + y = fabsfk(x) > 18.714973875f ? 1.0f : y; + y = xisnanf(y) ? 1.0f : y; + y = mulsignf(y, x); + y = xisnanf(x) ? SLEEF_NANf : y; + + return y; +} + static INLINE CONST Sleef_float2 logk2f(Sleef_float2 d) { Sleef_float2 x, x2, m, s; float t;