From c0f95f91c27d8d3d9ded0fa9f2970c969d1e80a8 Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Wed, 17 Jul 2024 01:40:04 +0000 Subject: [PATCH 1/4] [libc][math] Implement double precision cbrt correctly rounded to all rounding modes. --- libc/config/darwin/arm/entrypoints.txt | 1 + libc/config/linux/aarch64/entrypoints.txt | 1 + libc/config/linux/arm/entrypoints.txt | 1 + libc/config/linux/riscv/entrypoints.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 1 + libc/config/windows/entrypoints.txt | 1 + libc/spec/stdc.td | 1 + libc/src/math/CMakeLists.txt | 1 + libc/src/math/cbrt.h | 18 ++ libc/src/math/generic/CMakeLists.txt | 16 + libc/src/math/generic/cbrt.cpp | 340 ++++++++++++++++++++++ libc/test/src/math/CMakeLists.txt | 12 + libc/test/src/math/cbrt_test.cpp | 104 +++++++ libc/test/src/math/smoke/CMakeLists.txt | 10 + libc/test/src/math/smoke/cbrt_test.cpp | 35 +++ 15 files changed, 543 insertions(+) create mode 100644 libc/src/math/cbrt.h create mode 100644 libc/src/math/generic/cbrt.cpp create mode 100644 libc/test/src/math/cbrt_test.cpp create mode 100644 libc/test/src/math/smoke/cbrt_test.cpp diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt index 383118dc781e5..32a08f20b328f 100644 --- a/libc/config/darwin/arm/entrypoints.txt +++ b/libc/config/darwin/arm/entrypoints.txt @@ -123,6 +123,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf + libc.src.math.cbrt libc.src.math.cbrtf libc.src.math.copysign libc.src.math.copysignf diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index dee6ac673643e..9b718c3f81151 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -345,6 +345,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf + libc.src.math.cbrt libc.src.math.cbrtf libc.src.math.ceil libc.src.math.ceilf diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt index b0ee0e989b5ed..a72f8668808a5 100644 --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -216,6 +216,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf + libc.src.math.cbrt libc.src.math.cbrtf libc.src.math.ceil libc.src.math.ceilf diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index 516a4b6ce3433..266c94d54a9df 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -347,6 +347,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf + libc.src.math.cbrt libc.src.math.cbrtf libc.src.math.ceil libc.src.math.ceilf diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index b6c55e7aa3033..4d19a28f4a2b3 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -370,6 +370,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.canonicalize libc.src.math.canonicalizef libc.src.math.canonicalizel + libc.src.math.cbrt libc.src.math.cbrtf libc.src.math.ceil libc.src.math.ceilf diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt index 499c6bfe3a229..afc9ca87ff094 100644 --- a/libc/config/windows/entrypoints.txt +++ b/libc/config/windows/entrypoints.txt @@ -121,6 +121,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf + libc.src.math.cbrt libc.src.math.cbrtf libc.src.math.copysign libc.src.math.copysignf diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index aa56152aee141..a4c6b40b98388 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -382,6 +382,7 @@ def StdC : StandardSpec<"stdc"> { ], [], // Enumerations [ + FunctionSpec<"cbrt", RetValSpec, [ArgSpec]>, FunctionSpec<"cbrtf", RetValSpec, [ArgSpec]>, FunctionSpec<"copysign", RetValSpec, [ArgSpec, ArgSpec]>, diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 6462afbc54a4f..dc2339896f2bb 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -65,6 +65,7 @@ add_math_entrypoint_object(canonicalizel) add_math_entrypoint_object(canonicalizef16) add_math_entrypoint_object(canonicalizef128) +add_math_entrypoint_object(cbrt) add_math_entrypoint_object(cbrtf) add_math_entrypoint_object(ceil) diff --git a/libc/src/math/cbrt.h b/libc/src/math/cbrt.h new file mode 100644 index 0000000000000..a7d5fe80e57b3 --- /dev/null +++ b/libc/src/math/cbrt.h @@ -0,0 +1,18 @@ +//===-- Implementation header for cbrt --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_CBRT_H +#define LLVM_LIBC_SRC_MATH_CBRT_H + +namespace LIBC_NAMESPACE { + +double cbrt(double x); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_CBRT_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index c2f58fb1a4f71..318728d6e315c 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -4180,3 +4180,19 @@ add_entrypoint_object( libc.src.__support.FPUtil.multiply_add libc.src.__support.macros.optimization ) + +add_entrypoint_object( + cbrt + SRCS + cbrt.cpp + HDRS + ../cbrt.h + COMPILE_OPTIONS + -O3 + DEPENDS + libc.hdr.fenv_macros + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.macros.optimization +) diff --git a/libc/src/math/generic/cbrt.cpp b/libc/src/math/generic/cbrt.cpp new file mode 100644 index 0000000000000..a60e2ea44b6e9 --- /dev/null +++ b/libc/src/math/generic/cbrt.cpp @@ -0,0 +1,340 @@ +//===-- Implementation of cbrt function -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/cbrt.h" +#include "hdr/fenv_macros.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/double_double.h" +#include "src/__support/FPUtil/dyadic_float.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/common.h" +#include "src/__support/integer_literals.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY + +#if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0) +#define LIBC_MATH_CBRT_SKIP_ACCURATE_PASS +#endif + +namespace LIBC_NAMESPACE_DECL { + +using DoubleDouble = fputil::DoubleDouble; +using Float128 = typename fputil::DyadicFloat<128>; + +namespace { + +// Initial approximation of x^(-2/3) for 1 <= x < 2. +// Polynomial generated by Sollya with: +// > P = fpminimax(x^(-2/3), 7, [|D...|], [1, 2]); +// > dirtyinfnorm(P/x^(-2/3) - 1, [1, 2]); +// 0x1.28...p-21 +constexpr double intial_approximation(double x) { + constexpr double COEFFS[8] = { + 0x1.bc52aedead5c6p1, -0x1.b52bfebf110b3p2, 0x1.1d8d71d53d126p3, + -0x1.de2db9e81cf87p2, 0x1.0154ca06153bdp2, -0x1.5973c66ee6da7p0, + 0x1.07bf6ac832552p-2, -0x1.5e53d9ce41cb8p-6, + }; + + double x_sq = x * x; + + double c0 = fputil::multiply_add(x, COEFFS[1], COEFFS[0]); + double c1 = fputil::multiply_add(x, COEFFS[3], COEFFS[2]); + double c2 = fputil::multiply_add(x, COEFFS[5], COEFFS[4]); + double c3 = fputil::multiply_add(x, COEFFS[7], COEFFS[6]); + + double x_4 = x_sq * x_sq; + double d0 = fputil::multiply_add(x_sq, c1, c0); + double d1 = fputil::multiply_add(x_sq, c3, c2); + + return fputil::multiply_add(x_4, d1, d0); +} + +// Get the error term for Newton iteration: +// h(x) = x^3 * a^2 - 1, +#ifdef LIBC_TARGET_CPU_HAS_FMA +constexpr double get_error(const DoubleDouble &x_3, const DoubleDouble &a_sq) { + return fputil::multiply_add(x_3.hi, a_sq.hi, -1.0) + + fputil::multiply_add(x_3.lo, a_sq.hi, x_3.hi * a_sq.lo); +} +#else +constexpr double get_error(const DoubleDouble &x_3, const DoubleDouble &a_sq) { + DoubleDouble x_3_a_sq = fputil::quick_mult(a_sq, x_3); + return (x_3_a_sq.hi - 1.0) + x_3_a_sq.lo; +} +#endif + +} // anonymous namespace + +// Correctly rounded cbrt algorithm: +// +// === Step 1 - Range reduction === +// For x = (-1)^s * 2^e * (1.m), we get 2 reduced arguments x_r and a as: +// x_r = 1.m +// a = (-1)^s * 2^(e % 3) * (1.m) +// Then cbrt(x) = x^(1/3) can be computed as: +// x^(1/3) = 2^(e / 3) * a^(1/3). +// +// In order to avoid division, we compute a^(-2/3) using Newton method and then +// multiply the results by a: +// a^(1/3) = a * a^(-2/3). +// +// === Step 2 - First approximation to a^(-2/3) === +// First, we use a degree-7 minimax polynomial generated by Sollya to +// approximate x_r^(-2/3) for 1 <= x_r < 2. +// p = P(x_r) ~ x_r^(-2/3), +// with relative errors bounded by: +// | p / x_r^(-2/3) - 1 | < 1.16 * 2^-21. +// +// Then we multiply with 2^(e % 3) from a small lookup table to get: +// x_0 = 2^(-2*(e % 3)/3) * p +// ~ 2^(-2*(e % 3)/3) * x_r^(-2/3) +// = a^(-2/3) +// With relative errors: +// | x_0 / a^(-2/3) - 1 | < 1.16 * 2^-21. +// This step is done in double precision. +// +// === Step 3 - First Newton iteration === +// We follow the method described in: +// Sibidanov, A. and Zimmermann, P., "Correctly rounded cubic root evaluation +// in double precision", https://core-math.gitlabpages.inria.fr/cbrt64.pdf +// to derive multiplicative Newton iterations as below: +// Let x_n be the nth approximation to a^(-2/3). Define the n^th error as: +// h_n = x_n^3 * a^2 - 1 +// Then: +// a^(-2/3) = x_n / (1 + h_n)^(1/3) +// = x_n * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3 + ...) +// using the Taylor series expansion of (1 + h_n)^(-1/3). +// +// Apply to x_0 above: +// h_0 = x_0^3 * a^2 - 1 +// = a^2 * (x_0 - a^(-2/3)) * (x_0^2 + x_0 * a^(-2/3) + a^(-4/3)), +// it's bounded by: +// |h_0| < 4 * 3 * 1.16 * 2^-21 * 4 < 2^-17. +// So in the first iteration step, we use: +// x_1 = x_0 * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3) +// Its relative error is bounded by: +// | x_1 / a^(-2/3) - 1 | < 35/242 * |h_0|^4 < 2^-70. +// Then we perform Ziv's rounding test and check if the answer is exact. +// This step is done in double-double precision. +// +// === Step 4 - Second Newton iteration === +// If the Ziv's rounding test from the previous step fails, we define the error +// term: +// h_1 = x_1^3 * a^2 - 1, +// And perform another iteration: +// x_2 = x_1 * (1 - h_1 / 3) +// with the relative errors exceed the precision of double-double. +// We then check the Ziv's accuracy test with relative errors < 2^-102 to +// compensate for rounding errors. +// +// === Step 5 - Final iteration === +// If the Ziv's accuracy test from the previous step fails, we perform another +// iteration in 128-bit precision and check for exact outputs. +// +// TODO: It is possible to replace this costly computation step with special +// exceptional handling, similar to what was done in the CORE-MATH project: +// https://gitlab.inria.fr/core-math/core-math/-/blob/master/src/binary64/cbrt/cbrt.c + +LLVM_LIBC_FUNCTION(double, cbrt, (double x)) { + using FPBits = typename fputil::FPBits; + + uint64_t x_u = FPBits(x).uintval(); + uint64_t x_abs = x_u & 0x7fff'ffff'ffff'ffff; + + unsigned exp_bias_correction = 682; // 1023 * 2/3 + + if (LIBC_UNLIKELY(x_abs < FPBits::min_normal().uintval() || + x_abs >= FPBits::inf().uintval())) { + if (x_abs == 0 || x_abs >= FPBits::inf().uintval()) + // x is 0, Inf, or NaN. + return x; + + // x is non-zero denormal number. + // Normalize x. + x *= 0x1.0p60; + exp_bias_correction -= 20; + } + + FPBits x_bits(x); + + // When using biased exponent of x in double precision, + // x_e = real_exponent_of_x + 1023 + // Then: + // x_e / 3 = real_exponent_of_x / 3 + 1023/3 + // = real_exponent_of_x / 3 + 341 + // So to make it the correct biased exponent of x^(1/3), we add + // 1023 - 341 = 682 + // to the quotient x_e / 3. + unsigned x_e = static_cast(x_bits.get_biased_exponent()); + unsigned out_e = (x_e / 3 + exp_bias_correction); + unsigned shift_e = x_e % 3; + + // Set x_r = 1.mantissa + double x_r = + FPBits(x_bits.get_mantissa() | + (static_cast(FPBits::EXP_BIAS) << FPBits::FRACTION_LEN)) + .get_val(); + + // Set a = (-1)^x_sign * 2^(x_e % 3) * (1.mantissa) + uint64_t a_bits = x_bits.uintval() & 0x800F'FFFF'FFFF'FFFF; + a_bits |= + (static_cast(shift_e + static_cast(FPBits::EXP_BIAS)) + << FPBits::FRACTION_LEN); + double a = FPBits(a_bits).get_val(); + + // Initial approximation of x_r^(-2/3). + double p = intial_approximation(x_r); + + // Look up for 2^(-2*n/3) used for first approximation step. + constexpr double EXP2_M2_OVER_3[3] = {1.0, 0x1.428a2f98d728bp-1, + 0x1.965fea53d6e3dp-2}; + + // x0 is an initial approximation of a^(-2/3) for 1 <= |a| < 8. + // Relative error: < 1.16 * 2^(-21). + double x0 = static_cast(EXP2_M2_OVER_3[shift_e] * p); + + // First iteration in double precision. + DoubleDouble a_sq = fputil::exact_mult(a, a); + + // h0 = x0^3 * a^2 - 1 + DoubleDouble x0_sq = fputil::exact_mult(x0, x0); + DoubleDouble x0_3 = fputil::quick_mult(x0, x0_sq); + + double h0 = get_error(x0_3, a_sq); + +#ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS + constexpr double REL_ERROR = 0; +#else + constexpr double REL_ERROR = 0x1.0p-51; +#endif // LIBC_MATH_CBRT_SKIP_ACCURATE_PASS + + // Taylor polynomial of (1 + h)^(-1/3): + // (1 + h)^(-1/3) = 1 - h/3 + 2 h^2 / 9 - 14 h^3 / 81 + ... + constexpr double ERR_COEFFS[3] = { + -0x1.5555555555555p-2 - REL_ERROR, // -1/3 - relative_error + 0x1.c71c71c71c71cp-3, // 2/9 + -0x1.61f9add3c0ca4p-3, // -14/81 + }; + // e0 = -14 * h^2 / 81 + 2 * h / 9 - 1/3 - relative_error. + double e0 = fputil::polyeval(h0, ERR_COEFFS[0], ERR_COEFFS[1], ERR_COEFFS[2]); + double x0_h0 = x0 * h0; + + // x1 = x0 (1 - h0/3 + 2 h0^2 / 9 - 14 h0^3 / 81) + // x1 approximate a^(-2/3) with relative errors bounded by: + // | x1 / a^(-2/3) - 1 | < (34/243) h0^4 < h0 * REL_ERROR + DoubleDouble x1_dd{x0_h0 * e0, x0}; + + // r1 = x1 * a ~ a^(-2/3) * a = a^(1/3). + DoubleDouble r1 = fputil::quick_mult(a, x1_dd); + + // Lambda function to update the exponent of the result. + auto update_exponent = [=](double r) -> double { + uint64_t r_m = FPBits(r).uintval() & 0x800F'FFFF'FFFF'FFFF; + // Adjust exponent and sign. + uint64_t r_bits = + r_m | (static_cast(out_e) << FPBits::FRACTION_LEN); + return FPBits(r_bits).get_val(); + }; + +#ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS + // TODO: We probably don't need to use double-double if accurate tests and + // passes are skipped. + return update_exponent(r1.hi + r1.lo); +#else + // Accurate checks and passes. + double r1_lower = r1.hi + r1.lo; + double r1_upper = + r1.hi + fputil::multiply_add(x0_h0, 2.0 * REL_ERROR * a, r1.lo); + + // Ziv's accuracy test. + if (LIBC_LIKELY(r1_upper == r1_lower)) { + // Test for exact outputs. + // Check if lower (52 - 17 = 35) bits are 0's. + if (LIBC_UNLIKELY((FPBits(r1_lower).uintval() & 0x0000'0007'FFFF'FFFF) == + 0)) { + double r1_err = (r1_lower - r1.hi) - r1.lo; + if (FPBits(r1_err).abs().get_val() < 0x1.0p69) + fputil::clear_except_if_required(FE_INEXACT); + } + + return update_exponent(r1_lower); + } + + // Accuracy test failed, perform another Newton iteration. + double x1 = x1_dd.hi + (e0 + REL_ERROR) * x0_h0; + + // Second iteration in double-double precision. + // h1 = x1^3 * a^2 - 1. + DoubleDouble x1_sq = fputil::exact_mult(x1, x1); + DoubleDouble x1_3 = fputil::quick_mult(x1, x1_sq); + double h1 = get_error(x1_3, a_sq); + + // e1 = -x1*h1/3. + double e1 = h1 * (x1 * -0x1.5555555555555p-2); + // x2 = x1*(1 - h1/3) = x1 + e1 ~ a^(-2/3) with relative errors < 2^-101. + DoubleDouble x2 = fputil::exact_add(x1, e1); + // r2 = a * x2 ~ a * a^(-2/3) = a^(1/3) with relative errors < 2^-100. + DoubleDouble r2 = fputil::quick_mult(a, x2); + + double r2_upper = r2.hi + fputil::multiply_add(a, 0x1.0p-102, r2.lo); + double r2_lower = r2.hi + fputil::multiply_add(a, -0x1.0p-102, r2.lo); + + // Ziv's accuracy test. + if (LIBC_LIKELY(r2_upper == r2_lower)) + return update_exponent(r2_upper); + + // TODO: Investigate removing float128 and just list exceptional cases. + // Apply another Newton iteration with ~126-bit accuracy. + Float128 x2_f128 = fputil::quick_add(Float128(x2.hi), Float128(x2.lo)); + // x2^3 + Float128 x2_3 = + fputil::quick_mul(fputil::quick_mul(x2_f128, x2_f128), x2_f128); + // a^2 + Float128 a_sq_f128 = fputil::quick_mul(Float128(a), Float128(a)); + // x2^3 * a^2 + Float128 x2_3_a_sq = fputil::quick_mul(x2_3, a_sq_f128); + // h2 = x2^3 * a^2 - 1 + Float128 h2_f128 = fputil::quick_add(x2_3_a_sq, Float128(-1.0)); + double h2 = static_cast(h2_f128); + // t2 = 1 - h2 / 3 + Float128 t2 = + fputil::quick_add(Float128(1.0), Float128(h2 * (-0x1.5555555555555p-2))); + // x3 = x2 * (1 - h2 / 3) ~ a^(-2/3) + Float128 x3 = fputil::quick_mul(x2_f128, t2); + // r3 = a * x3 ~ a * a^(-2/3) = a^(1/3) + Float128 r3 = fputil::quick_mul(Float128(a), x3); + + // Check for exact cases: + Float128::MantissaType rounding_bits = + r3.mantissa & 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFFF_u128; + + double result = static_cast(r3); + if ((rounding_bits < 0x0000'0000'0000'0000'0000'0000'0000'000F_u128) || + (rounding_bits >= 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128)) { + // Output is exact. + r3.mantissa &= 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFF0_u128; + + if (rounding_bits >= 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128) { + Float128 tmp{r3.sign, r3.exponent - 123, + 0x8000'0000'0000'0000'0000'0000'0000'0000_u128}; + Float128 r4 = fputil::quick_add(r3, tmp); + result = static_cast(r4); + } else { + result = static_cast(r3); + } + + fputil::clear_except_if_required(FE_INEXACT); + } + + return update_exponent(result); +#endif // LIBC_MATH_CBRT_SKIP_ACCURATE_PASS +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index 0dc7ae6aae2df..64b4d2c58fb6a 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -2225,6 +2225,18 @@ add_fp_unittest( libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + cbrt_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + cbrt_test.cpp + DEPENDS + libc.src.math.cbrt + libc.src.__support.FPUtil.fp_bits +) + add_subdirectory(generic) add_subdirectory(smoke) diff --git a/libc/test/src/math/cbrt_test.cpp b/libc/test/src/math/cbrt_test.cpp new file mode 100644 index 0000000000000..123351496118b --- /dev/null +++ b/libc/test/src/math/cbrt_test.cpp @@ -0,0 +1,104 @@ +//===-- Unittests for cbrt ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/math_macros.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/math/cbrt.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +using LlvmLibcCbrtTest = LIBC_NAMESPACE::testing::FPTest; + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +using LIBC_NAMESPACE::testing::tlog; + +TEST_F(LlvmLibcCbrtTest, InDoubleRange) { + constexpr uint64_t COUNT = 123'451; + uint64_t START = LIBC_NAMESPACE::fputil::FPBits(1.0).uintval(); + uint64_t STOP = LIBC_NAMESPACE::fputil::FPBits(8.0).uintval(); + uint64_t STEP = (STOP - START) / COUNT; + + auto test = [&](mpfr::RoundingMode rounding_mode) { + mpfr::ForceRoundingMode force_rounding(rounding_mode); + if (!force_rounding.success) + return; + + uint64_t fails = 0; + uint64_t tested = 0; + uint64_t total = 0; + double worst_input, worst_output = 0.0; + double ulp = 0.5; + + for (uint64_t i = 0, v = START; i <= COUNT; ++i, v += STEP) { + double x = FPBits(v).get_val(); + if (isnan(x) || isinf(x)) + continue; + + double result = LIBC_NAMESPACE::cbrt(x); + ++total; + if (isnan(result) || isinf(result)) + continue; + + ++tested; + + if (!TEST_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Cbrt, x, result, + 0.5, rounding_mode)) { + ++fails; + while (!TEST_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Cbrt, x, + result, ulp, rounding_mode)) { + worst_input = x; + worst_output = result; + + if (ulp > 1000.0) + break; + + ulp *= 2.0; + } + } + } + if (fails) { + tlog << " Cbrt failed: " << fails << "/" << tested << "/" << total + << " tests.\n"; + tlog << " Max ULPs is at most: " << static_cast(ulp) << ".\n"; + EXPECT_MPFR_MATCH(mpfr::Operation::Cbrt, worst_input, worst_output, 0.5, + rounding_mode); + } + }; + + tlog << " Test Rounding To Nearest...\n"; + test(mpfr::RoundingMode::Nearest); + + tlog << " Test Rounding Downward...\n"; + test(mpfr::RoundingMode::Downward); + + tlog << " Test Rounding Upward...\n"; + test(mpfr::RoundingMode::Upward); + + tlog << " Test Rounding Toward Zero...\n"; + test(mpfr::RoundingMode::TowardZero); +} + +TEST_F(LlvmLibcCbrtTest, SpecialValues) { + constexpr double INPUTS[] = { + 0x1.4f61672324c8p-1028, 0x1.00152f57068b7p-1, 0x1.006509cda9886p-1, + 0x1.018369b92e523p-1, 0x1.10af932ef2bf9p-1, 0x1.1a41117939fdbp-1, + 0x1.2ae8076520d9ap-1, 0x1.a202bfc89ddffp-1, 0x1.a6bb8c803147bp-1, + 0x1.000197b499b1bp+0, 0x1.00065ed266c6cp+0, 0x1.d4306c202c4c2p+0, + 0x1.8fd409efe4851p+1, 0x1.95fd0eb31cc4p+1, 0x1.7cef1d276e335p+2, + 0x1.94910c4fc98p+2, 0x1.a0cc1327bb4c4p+2, 0x1.e7d6ebed549c4p+2, + }; + for (double v : INPUTS) { + double x = FPBits(v).get_val(); + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Cbrt, x, + LIBC_NAMESPACE::cbrt(x), 0.5); + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Cbrt, -x, + LIBC_NAMESPACE::cbrt(-x), 0.5); + } +} diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 7f1bc0c204c68..76d5919ad9156 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -3971,3 +3971,13 @@ add_fp_unittest( DEPENDS libc.src.math.cbrtf ) + +add_fp_unittest( + cbrt_test + SUITE + libc-math-smoke-tests + SRCS + cbrt_test.cpp + DEPENDS + libc.src.math.cbrt +) diff --git a/libc/test/src/math/smoke/cbrt_test.cpp b/libc/test/src/math/smoke/cbrt_test.cpp new file mode 100644 index 0000000000000..724e0e979decc --- /dev/null +++ b/libc/test/src/math/smoke/cbrt_test.cpp @@ -0,0 +1,35 @@ +//===-- Unittests for cbrt ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/cbrt.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcCbrtTest = LIBC_NAMESPACE::testing::FPTest; + +using LIBC_NAMESPACE::testing::tlog; + +TEST_F(LlvmLibcCbrtTest, SpecialNumbers) { + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::cbrt(aNaN)); + EXPECT_FP_EQ_ALL_ROUNDING(inf, LIBC_NAMESPACE::cbrt(inf)); + EXPECT_FP_EQ_ALL_ROUNDING(neg_inf, LIBC_NAMESPACE::cbrt(neg_inf)); + EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::cbrt(zero)); + EXPECT_FP_EQ_ALL_ROUNDING(neg_zero, LIBC_NAMESPACE::cbrt(neg_zero)); + EXPECT_FP_EQ_ALL_ROUNDING(1.0, LIBC_NAMESPACE::cbrt(1.0)); + EXPECT_FP_EQ_ALL_ROUNDING(-1.0, LIBC_NAMESPACE::cbrt(-1.0)); + EXPECT_FP_EQ_ALL_ROUNDING(2.0, LIBC_NAMESPACE::cbrt(8.0)); + EXPECT_FP_EQ_ALL_ROUNDING(-2.0, LIBC_NAMESPACE::cbrt(-8.0)); + EXPECT_FP_EQ_ALL_ROUNDING(3.0, LIBC_NAMESPACE::cbrt(27.0)); + EXPECT_FP_EQ_ALL_ROUNDING(-3.0, LIBC_NAMESPACE::cbrt(-27.0)); + EXPECT_FP_EQ_ALL_ROUNDING(5.0, LIBC_NAMESPACE::cbrt(125.0)); + EXPECT_FP_EQ_ALL_ROUNDING(-5.0, LIBC_NAMESPACE::cbrt(-125.0)); + EXPECT_FP_EQ_ALL_ROUNDING(0x1.0p42, LIBC_NAMESPACE::cbrt(0x1.0p126)); + EXPECT_FP_EQ_ALL_ROUNDING(-0x1.0p42, LIBC_NAMESPACE::cbrt(-0x1.0p126)); + EXPECT_FP_EQ_ALL_ROUNDING(0x1.0p341, LIBC_NAMESPACE::cbrt(0x1.0p1023)); + EXPECT_FP_EQ_ALL_ROUNDING(-0x1.0p341, LIBC_NAMESPACE::cbrt(-0x1.0p1023)); +} From 68677bd3a6d1857a7369f6daff4d0f31d0324a1e Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Wed, 17 Jul 2024 02:02:48 +0000 Subject: [PATCH 2/4] Add GPU entry point. --- libc/config/gpu/entrypoints.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index b0c4652c6b8ee..3c6a92d279e50 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -245,6 +245,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.atanf libc.src.math.atanh libc.src.math.atanhf + libc.src.math.cbrt libc.src.math.cbrtf libc.src.math.ceil libc.src.math.ceilf From d50a1cd31dd21bbb99b247e01626b34db9ad820d Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Wed, 17 Jul 2024 02:59:54 +0000 Subject: [PATCH 3/4] Update status page. --- libc/docs/math/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index 70412e4ed203d..205d14946535e 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -266,7 +266,7 @@ Higher Math Functions +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | atanpi | | | | | | 7.12.4.10 | F.10.1.10 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| cbrt | |check| | | | | | 7.12.7.1 | F.10.4.1 | +| cbrt | |check| | |check| | | | | 7.12.7.1 | F.10.4.1 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | compoundn | | | | | | 7.12.7.2 | F.10.4.2 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ From 8564fed339b9d6abde9dc87c84498cb58d5073b0 Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Wed, 17 Jul 2024 16:06:58 +0000 Subject: [PATCH 4/4] Address comments. --- libc/src/math/generic/CMakeLists.txt | 4 ++++ libc/src/math/generic/cbrt.cpp | 7 +++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 318728d6e315c..415ca3fbce796 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -4191,8 +4191,12 @@ add_entrypoint_object( -O3 DEPENDS libc.hdr.fenv_macros + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval libc.src.__support.macros.optimization + libc.src.__support.integer_literals ) diff --git a/libc/src/math/generic/cbrt.cpp b/libc/src/math/generic/cbrt.cpp index a60e2ea44b6e9..e226054332dfa 100644 --- a/libc/src/math/generic/cbrt.cpp +++ b/libc/src/math/generic/cbrt.cpp @@ -26,7 +26,7 @@ namespace LIBC_NAMESPACE_DECL { using DoubleDouble = fputil::DoubleDouble; -using Float128 = typename fputil::DyadicFloat<128>; +using Float128 = fputil::DyadicFloat<128>; namespace { @@ -143,10 +143,9 @@ constexpr double get_error(const DoubleDouble &x_3, const DoubleDouble &a_sq) { // https://gitlab.inria.fr/core-math/core-math/-/blob/master/src/binary64/cbrt/cbrt.c LLVM_LIBC_FUNCTION(double, cbrt, (double x)) { - using FPBits = typename fputil::FPBits; + using FPBits = fputil::FPBits; - uint64_t x_u = FPBits(x).uintval(); - uint64_t x_abs = x_u & 0x7fff'ffff'ffff'ffff; + uint64_t x_abs = FPBits(x).abs().uintval(); unsigned exp_bias_correction = 682; // 1023 * 2/3