Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Libm] Use sqrt intrinsic function in xsqrt if available #169

Merged
merged 5 commits into from
Feb 9, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions Configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ endif()

# The library currently supports the following SIMD architectures
set(SLEEF_SUPPORTED_EXTENSIONS
SSE2 SSE4 AVX FMA4 AVX2 AVX2128 AVX512F # x86
AVX512F AVX2 AVX2128 FMA4 AVX SSE4 SSE2 # x86
ADVSIMD # Aarch64
NEON32 # Aarch32
CACHE STRING "List of SIMD architectures supported by libsleef."
Expand Down Expand Up @@ -156,6 +156,9 @@ if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)")
set(FLAGS_STRICTMATH "-ffp-contract=off")
set(FLAGS_FASTMATH "-ffast-math")

# Without the options below, gcc generates calls to libm
set(FLAGS_NO_ERRNO "-fno-math-errno -fno-trapping-math")

# Intel vector extensions.
foreach(SIMD ${SLEEF_SUPPORTED_EXTENSIONS})
set(FLAGS_ENABLE_${SIMD} ${CLANG_FLAGS_ENABLE_${SIMD}})
Expand All @@ -180,6 +183,7 @@ elseif(MSVC)
set(FLAGS_ENABLE_AVX2128 /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /D__AVX2__ /arch:AVX2)
set(FLAGS_ENABLE_AVX512F /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /D__AVX2__ /D__AVX512F__ /arch:AVX2)
set(FLAGS_WALL "/D_CRT_SECURE_NO_WARNINGS")
set(FLAGS_NO_ERRNO "")
elseif(CMAKE_C_COMPILER_ID MATCHES "Intel")
set(FLAGS_ENABLE_SSE2 "-msse2")
set(FLAGS_ENABLE_SSE4 "-msse4.1")
Expand All @@ -190,9 +194,10 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Intel")
set(FLAGS_STRICTMATH "-fp-model strict -Qoption,cpp,--extended_float_type -qoverride-limits")
set(FLAGS_FASTMATH "-fp-model fast=2 -Qoption,cpp,--extended_float_type -qoverride-limits")
set(FLAGS_WALL "-fmax-errors=3 -Wall -Wno-unused -Wno-attributes")
set(FLAGS_NO_ERRNO "")
endif()

set(SLEEF_C_FLAGS "${FLAGS_WALL} ${FLAGS_STRICTMATH}")
set(SLEEF_C_FLAGS "${FLAGS_WALL} ${FLAGS_STRICTMATH} ${FLAGS_NO_ERRNO}")
if(CMAKE_C_COMPILER_ID MATCHES "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER 6.99)
set(DFT_C_FLAGS "${FLAGS_WALL}")
else()
Expand Down
4 changes: 2 additions & 2 deletions doc/html/aarch32.xhtml
Original file line number Diff line number Diff line change
Expand Up @@ -514,8 +514,8 @@ This is the vectorized function of <a href="purec.xhtml#Sleef_expm1f_u10"><b cla
<p class="synopsis">
#include &lt;sleef.h&gt;<br/>
<br/>
<b class="type">float32x4_t</b> <b class="func">Sleef_sqrtf4_u05</b>(<b class="type">float32x4_t</b> <i class="var">a</i>);<br/>
<b class="type">float32x4_t</b> <b class="func">Sleef_sqrtf4_u05neon</b>(<b class="type">float32x4_t</b> <i class="var">a</i>);<br/>
<b class="type">float32x4_t</b> <b class="func">Sleef_sqrtf4</b>(<b class="type">float32x4_t</b> <i class="var">a</i>);<br/>
<b class="type">float32x4_t</b> <b class="func">Sleef_sqrtf4_neon</b>(<b class="type">float32x4_t</b> <i class="var">a</i>);<br/>
<br/>
<span class="normal">Link with</span> -lsleef.
</p>
Expand Down
8 changes: 4 additions & 4 deletions doc/html/aarch64.xhtml
Original file line number Diff line number Diff line change
Expand Up @@ -969,8 +969,8 @@ This is the vectorized function of <a href="purec.xhtml#Sleef_expm1f_u10"><b cla
<p class="synopsis">
#include &lt;sleef.h&gt;<br/>
<br/>
<b class="type">float64x2_t</b> <b class="func">Sleef_sqrtd2_u05</b>(<b class="type">float64x2_t</b> <i class="var">a</i>);<br/>
<b class="type">float64x2_t</b> <b class="func">Sleef_sqrtd2_u05advsimd</b>(<b class="type">float64x2_t</b> <i class="var">a</i>);<br/>
<b class="type">float64x2_t</b> <b class="func">Sleef_sqrtd2</b>(<b class="type">float64x2_t</b> <i class="var">a</i>);<br/>
<b class="type">float64x2_t</b> <b class="func">Sleef_sqrtd2_advsimd</b>(<b class="type">float64x2_t</b> <i class="var">a</i>);<br/>
<br/>
<span class="normal">Link with</span> -lsleef.
</p>
Expand All @@ -989,8 +989,8 @@ This is the vectorized function of <a href="purec.xhtml#Sleef_sqrt_u05"><b class
<p class="synopsis">
#include &lt;sleef.h&gt;<br/>
<br/>
<b class="type">float32x4_t</b> <b class="func">Sleef_sqrtf4_u05</b>(<b class="type">float32x4_t</b> <i class="var">a</i>);<br/>
<b class="type">float32x4_t</b> <b class="func">Sleef_sqrtf4_u05advsimd</b>(<b class="type">float32x4_t</b> <i class="var">a</i>);<br/>
<b class="type">float32x4_t</b> <b class="func">Sleef_sqrtf4</b>(<b class="type">float32x4_t</b> <i class="var">a</i>);<br/>
<b class="type">float32x4_t</b> <b class="func">Sleef_sqrtf4_advsimd</b>(<b class="type">float32x4_t</b> <i class="var">a</i>);<br/>
<br/>
<span class="normal">Link with</span> -lsleef.
</p>
Expand Down
40 changes: 20 additions & 20 deletions doc/html/x86.xhtml
Original file line number Diff line number Diff line change
Expand Up @@ -1513,18 +1513,18 @@ These are the vectorized functions of <a href="purec.xhtml#Sleef_expm1f_u10"><b
<p class="synopsis">
#include &lt;sleef.h&gt;<br/>
<br/>
<b class="type">__m128d</b> <b class="func">Sleef_sqrtd2_u05</b>(<b class="type">__m128d</b> <i class="var">a</i>);<br/>
<b class="type">__m128d</b> <b class="func">Sleef_sqrtd2_u05sse2</b>(<b class="type">__m128d</b> <i class="var">a</i>);<br/>
<b class="type">__m128d</b> <b class="func">Sleef_sqrtd2_u05sse4</b>(<b class="type">__m128d</b> <i class="var">a</i>);<br/>
<b class="type">__m128d</b> <b class="func">Sleef_sqrtd2_u05avx2128</b>(<b class="type">__m128d</b> <i class="var">a</i>);<br/>
<b class="type">__m128d</b> <b class="func">Sleef_sqrtd2</b>(<b class="type">__m128d</b> <i class="var">a</i>);<br/>
<b class="type">__m128d</b> <b class="func">Sleef_sqrtd2_sse2</b>(<b class="type">__m128d</b> <i class="var">a</i>);<br/>
<b class="type">__m128d</b> <b class="func">Sleef_sqrtd2_sse4</b>(<b class="type">__m128d</b> <i class="var">a</i>);<br/>
<b class="type">__m128d</b> <b class="func">Sleef_sqrtd2_avx2128</b>(<b class="type">__m128d</b> <i class="var">a</i>);<br/>
<br/>
<b class="type">__m256d</b> <b class="func">Sleef_sqrtd4_u05</b>(<b class="type">__m256d</b> <i class="var">a</i>);<br/>
<b class="type">__m256d</b> <b class="func">Sleef_sqrtd4_u05avx</b>(<b class="type">__m256d</b> <i class="var">a</i>);<br/>
<b class="type">__m256d</b> <b class="func">Sleef_sqrtd4_u05fma4</b>(<b class="type">__m256d</b> <i class="var">a</i>);<br/>
<b class="type">__m256d</b> <b class="func">Sleef_sqrtd4_u05avx2</b>(<b class="type">__m256d</b> <i class="var">a</i>);<br/>
<b class="type">__m256d</b> <b class="func">Sleef_sqrtd4</b>(<b class="type">__m256d</b> <i class="var">a</i>);<br/>
<b class="type">__m256d</b> <b class="func">Sleef_sqrtd4_avx</b>(<b class="type">__m256d</b> <i class="var">a</i>);<br/>
<b class="type">__m256d</b> <b class="func">Sleef_sqrtd4_fma4</b>(<b class="type">__m256d</b> <i class="var">a</i>);<br/>
<b class="type">__m256d</b> <b class="func">Sleef_sqrtd4_avx2</b>(<b class="type">__m256d</b> <i class="var">a</i>);<br/>
<br/>
<b class="type">__m512d</b> <b class="func">Sleef_sqrtd8_u05</b>(<b class="type">__m512d</b> <i class="var">a</i>);<br/>
<b class="type">__m512d</b> <b class="func">Sleef_sqrtd8_u05avx512f</b>(<b class="type">__m512d</b> <i class="var">a</i>);<br/>
<b class="type">__m512d</b> <b class="func">Sleef_sqrtd8</b>(<b class="type">__m512d</b> <i class="var">a</i>);<br/>
<b class="type">__m512d</b> <b class="func">Sleef_sqrtd8_avx512f</b>(<b class="type">__m512d</b> <i class="var">a</i>);<br/>
<br/>
<span class="normal">Link with</span> -lsleef.
</p>
Expand All @@ -1543,18 +1543,18 @@ These are the vectorized functions of <a href="purec.xhtml#Sleef_sqrt_u05"><b cl
<p class="synopsis">
#include &lt;sleef.h&gt;<br/>
<br/>
<b class="type">__m128</b> <b class="func">Sleef_sqrtf4_u05</b>(<b class="type">__m128</b> <i class="var">a</i>);<br/>
<b class="type">__m128</b> <b class="func">Sleef_sqrtf4_u05sse2</b>(<b class="type">__m128</b> <i class="var">a</i>);<br/>
<b class="type">__m128</b> <b class="func">Sleef_sqrtf4_u05sse4</b>(<b class="type">__m128</b> <i class="var">a</i>);<br/>
<b class="type">__m128</b> <b class="func">Sleef_sqrtf4_u05avx2128</b>(<b class="type">__m128</b> <i class="var">a</i>);<br/>
<b class="type">__m128</b> <b class="func">Sleef_sqrtf4</b>(<b class="type">__m128</b> <i class="var">a</i>);<br/>
<b class="type">__m128</b> <b class="func">Sleef_sqrtf4_sse2</b>(<b class="type">__m128</b> <i class="var">a</i>);<br/>
<b class="type">__m128</b> <b class="func">Sleef_sqrtf4_sse4</b>(<b class="type">__m128</b> <i class="var">a</i>);<br/>
<b class="type">__m128</b> <b class="func">Sleef_sqrtf4_avx2128</b>(<b class="type">__m128</b> <i class="var">a</i>);<br/>
<br/>
<b class="type">__m256</b> <b class="func">Sleef_sqrtf8_u05</b>(<b class="type">__m256</b> <i class="var">a</i>);<br/>
<b class="type">__m256</b> <b class="func">Sleef_sqrtf8_u05avx</b>(<b class="type">__m256</b> <i class="var">a</i>);<br/>
<b class="type">__m256</b> <b class="func">Sleef_sqrtf8_u05fma4</b>(<b class="type">__m256</b> <i class="var">a</i>);<br/>
<b class="type">__m256</b> <b class="func">Sleef_sqrtf8_u05avx2</b>(<b class="type">__m256</b> <i class="var">a</i>);<br/>
<b class="type">__m256</b> <b class="func">Sleef_sqrtf8</b>(<b class="type">__m256</b> <i class="var">a</i>);<br/>
<b class="type">__m256</b> <b class="func">Sleef_sqrtf8_avx</b>(<b class="type">__m256</b> <i class="var">a</i>);<br/>
<b class="type">__m256</b> <b class="func">Sleef_sqrtf8_fma4</b>(<b class="type">__m256</b> <i class="var">a</i>);<br/>
<b class="type">__m256</b> <b class="func">Sleef_sqrtf8_avx2</b>(<b class="type">__m256</b> <i class="var">a</i>);<br/>
<br/>
<b class="type">__m512</b> <b class="func">Sleef_sqrtf16_u05</b>(<b class="type">__m512</b> <i class="var">a</i>);<br/>
<b class="type">__m512</b> <b class="func">Sleef_sqrtf16_u05avx512f</b>(<b class="type">__m512</b> <i class="var">a</i>);<br/>
<b class="type">__m512</b> <b class="func">Sleef_sqrtf16</b>(<b class="type">__m512</b> <i class="var">a</i>);<br/>
<b class="type">__m512</b> <b class="func">Sleef_sqrtf16_avx512f</b>(<b class="type">__m512</b> <i class="var">a</i>);<br/>
<br/>
<span class="normal">Link with</span> -lsleef.
</p>
Expand Down
1 change: 1 addition & 0 deletions src/arch/helperadvsimd.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#define ENABLE_FMA_SP

#define FULL_FP_ROUNDING
#define ACCURATE_SQRT

#define ISANAME "AArch64 AdvSIMD"

Expand Down
1 change: 1 addition & 0 deletions src/arch/helperavx.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#define VECTLENSP (1 << LOG2VECTLENSP)

#define FULL_FP_ROUNDING
#define ACCURATE_SQRT

#if defined(_MSC_VER)
#include <intrin.h>
Expand Down
1 change: 1 addition & 0 deletions src/arch/helperavx2.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#define FULL_FP_ROUNDING
#define SPLIT_KERNEL
#define ACCURATE_SQRT

#if defined(_MSC_VER)
#include <intrin.h>
Expand Down
1 change: 1 addition & 0 deletions src/arch/helperavx2_128.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#define FULL_FP_ROUNDING
#define SPLIT_KERNEL
#define ACCURATE_SQRT

#if defined(_MSC_VER)
#include <intrin.h>
Expand Down
1 change: 1 addition & 0 deletions src/arch/helperavx512f.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#define FULL_FP_ROUNDING
#define SPLIT_KERNEL
#define ACCURATE_SQRT

#if defined(_MSC_VER)
#include <intrin.h>
Expand Down
2 changes: 2 additions & 0 deletions src/arch/helperpurec.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#define LOG2VECTLENSP (LOG2VECTLENDP+1)
#define VECTLENSP (1 << LOG2VECTLENSP)

#define ACCURATE_SQRT

#define DFTPRIORITY LOG2VECTLENDP
#define ISANAME "Pure C Array"

Expand Down
2 changes: 2 additions & 0 deletions src/arch/helpersse2.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
#define LOG2VECTLENSP (LOG2VECTLENDP+1)
#define VECTLENSP (1 << LOG2VECTLENSP)

#define ACCURATE_SQRT

#if defined(_MSC_VER)
#include <intrin.h>
#else
Expand Down
15 changes: 15 additions & 0 deletions src/libm-tester/iut.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,21 @@ int main(int argc, char **argv) {
sscanf(buf, "fma %" PRIx64 " %" PRIx64 " %" PRIx64, &u, &v, &w);
u = d2u(xfma(u2d(u), u2d(v), u2d(w)));
printf("%" PRIx64 "\n", u);
} else if (startsWith(buf, "sqrt ")) {
uint64_t u;
sscanf(buf, "sqrt %" PRIx64, &u);
u = d2u(xsqrt(u2d(u)));
printf("%" PRIx64 "\n", u);
} else if (startsWith(buf, "sqrt_u05 ")) {
uint64_t u;
sscanf(buf, "sqrt_u05 %" PRIx64, &u);
u = d2u(xsqrt_u05(u2d(u)));
printf("%" PRIx64 "\n", u);
} else if (startsWith(buf, "sqrt_u35 ")) {
uint64_t u;
sscanf(buf, "sqrt_u35 %" PRIx64, &u);
u = d2u(xsqrt_u35(u2d(u)));
printf("%" PRIx64 "\n", u);
} else if (startsWith(buf, "cbrt ")) {
uint64_t u;
sscanf(buf, "cbrt %" PRIx64, &u);
Expand Down Expand Up @@ -404,6 +414,11 @@ int main(int argc, char **argv) {
sscanf(buf, "cbrtf %x", &u);
u = f2u(xcbrtf(u2f(u)));
printf("%x\n", u);
} else if (startsWith(buf, "sqrtf ")) {
uint32_t u;
sscanf(buf, "sqrtf %x", &u);
u = f2u(xsqrtf(u2f(u)));
printf("%x\n", u);
} else if (startsWith(buf, "sqrtf_u05 ")) {
uint32_t u;
sscanf(buf, "sqrtf_u05 %x", &u);
Expand Down
3 changes: 3 additions & 0 deletions src/libm-tester/iutsimd.c
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,9 @@ int do_test(int argc, char **argv) {
func_d_d("log", xlog);
func_d_d("exp", xexp);

func_d_d("sqrt", xsqrt);
func_d_d("sqrt_u05", xsqrt_u05);
func_d_d("sqrt_u35", xsqrt_u35);
func_d_d("cbrt", xcbrt);
func_d_d("cbrt_u1", xcbrt_u1);

Expand Down Expand Up @@ -475,6 +477,7 @@ int do_test(int argc, char **argv) {
func_f_f("logf", xlogf);
func_f_f("expf", xexpf);

func_f_f("sqrtf", xsqrtf);
func_f_f("sqrtf_u05", xsqrtf_u05);
func_f_f("sqrtf_u35", xsqrtf_u35);
func_f_f("cbrtf", xcbrtf);
Expand Down
55 changes: 51 additions & 4 deletions src/libm-tester/tester.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,9 @@ double child_atan2_u1(double y, double x) { child_d_d_d("atan2_u1", y, x); }
Sleef_double2 child_sincos_u1(double x) { child_d2_d("sincos_u1", x); }

double child_pow(double x, double y) { child_d_d_d("pow", x, y); }
double child_sqrt(double x) { child_d_d("sqrt", x); }
double child_sqrt_u05(double x) { child_d_d("sqrt_u05", x); }
double child_sqrt_u35(double x) { child_d_d("sqrt_u35", x); }

double child_sinh(double x) { child_d_d("sinh", x); }
double child_cosh(double x) { child_d_d("cosh", x); }
Expand Down Expand Up @@ -276,6 +278,7 @@ float child_atan2f_u1(float y, float x) { child_f_f_f("atan2f_u1", y, x); }
Sleef_float2 child_sincosf_u1(float x) { child_f2_f("sincosf_u1", x); }

float child_powf(float x, float y) { child_f_f_f("powf", x, y); }
float child_sqrtf(float x) { child_f_f("sqrtf", x); }
float child_sqrtf_u05(float x) { child_f_f("sqrtf_u05", x); }
float child_sqrtf_u35(float x) { child_f_f("sqrtf_u35", x); }

Expand Down Expand Up @@ -2420,13 +2423,27 @@ void do_test() {
showResult(success);
}

{
fprintf(stderr, "sqrt denormal/nonnumber test : ");
double xa[] = { +0.0, -0.0, +1, -1, +1e+10, -1e+10, DBL_MAX, -DBL_MAX, DBL_MIN, -DBL_MIN, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN };
for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) cmpDenorm_d(mpfr_sqrt, child_sqrt, xa[i]);
showResult(success);
}

{
fprintf(stderr, "sqrt_u05 denormal/nonnumber test : ");
double xa[] = { +0.0, -0.0, +1, -1, +1e+10, -1e+10, DBL_MAX, -DBL_MAX, DBL_MIN, -DBL_MIN, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN };
for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) cmpDenorm_d(mpfr_sqrt, child_sqrt_u05, xa[i]);
showResult(success);
}

{
fprintf(stderr, "sqrt_u35 denormal/nonnumber test : ");
double xa[] = { +0.0, -0.0, +1, -1, +1e+10, -1e+10, DBL_MAX, -DBL_MAX, DBL_MIN, -DBL_MIN, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN };
for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) cmpDenorm_d(mpfr_sqrt, child_sqrt_u35, xa[i]);
showResult(success);
}

{
fprintf(stderr, "cbrt denormal/nonnumber test : ");
double xa[] = { +0.0, -0.0, +1, -1, +1e+10, -1e+10, DBL_MAX, -DBL_MAX, DBL_MIN, -DBL_MIN, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN };
Expand Down Expand Up @@ -2944,6 +2961,13 @@ void do_test() {
showResult(success);
}

{
fprintf(stderr, "sqrtf denormal/nonnumber test : ");
float xa[] = { +0.0, -0.0, +1, -1, +1e+7, -1e+7, FLT_MAX, -FLT_MAX, FLT_MIN, -FLT_MIN, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, NAN };
for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) cmpDenorm_f(mpfr_sqrt, child_sqrtf, xa[i]);
showResult(success);
}

{
fprintf(stderr, "sqrtf_u05 denormal/nonnumber test : ");
float xa[] = { +0.0, -0.0, +1, -1, +1e+7, -1e+7, FLT_MAX, -FLT_MAX, FLT_MIN, -FLT_MIN, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, NAN };
Expand Down Expand Up @@ -3677,13 +3701,27 @@ void do_test() {

//

fprintf(stderr, "sqrt : ");
for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_d(mpfr_sqrt, child_sqrt, d, 1.0);
for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_d(mpfr_sqrt, child_sqrt, pow(2.1, d), 1.0);
showResult(success);

//

fprintf(stderr, "sqrt_u05 : ");
for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_d(mpfr_sqrt, child_sqrt_u05, d, 0.506);
for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_d(mpfr_sqrt, child_sqrt_u05, pow(2.1, d), 0.506);
showResult(success);

//

fprintf(stderr, "sqrt_u35 : ");
for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_d(mpfr_sqrt, child_sqrt_u35, d, 3.5);
for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_d(mpfr_sqrt, child_sqrt_u35, pow(2.1, d), 3.5);
showResult(success);

//

fprintf(stderr, "cbrt : ");
for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_d(mpfr_cbrt, child_cbrt, d, 3.5);
for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_d(mpfr_cbrt, child_cbrt, pow(2.1, d), 3.5);
Expand Down Expand Up @@ -4391,13 +4429,13 @@ void do_test() {

//

fprintf(stderr, "sqrtf_u35 : ");
fprintf(stderr, "sqrtf : ");
if (!enableFlushToZero) {
for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_sqrt, child_sqrtf_u35, d, 3.5);
for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_sqrt, child_sqrtf, d, 1.0);
}
for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_f(mpfr_sqrt, child_sqrtf_u35, pow(2.1, d), 3.5);
for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_f(mpfr_sqrt, child_sqrtf, pow(2.1, d), 1.0);
showResult(success);

//

fprintf(stderr, "sqrtf_u05 : ");
Expand All @@ -4409,6 +4447,15 @@ void do_test() {

//

fprintf(stderr, "sqrtf_u35 : ");
if (!enableFlushToZero) {
for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_sqrt, child_sqrtf_u35, d, 3.5);
}
for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_f(mpfr_sqrt, child_sqrtf_u35, pow(2.1, d), 3.5);
showResult(success);

//

fprintf(stderr, "cbrtf : ");
if (!enableFlushToZero) {
for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_cbrt, child_cbrtf, d, 3.5);
Expand Down
Loading