-
Notifications
You must be signed in to change notification settings - Fork 12.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libc][math] Implement powf function correctly rounded to all rounding modes. #71188
Conversation
@llvm/pr-subscribers-libc Author: None (lntue) ChangesWe compute
We follow similar steps as in We add some speedup for common use-cases:
Patch is 71.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71188.diff 19 Files Affected:
diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt
index ecdd2d0624fcf5d..362138c92a6869f 100644
--- a/libc/config/darwin/arm/entrypoints.txt
+++ b/libc/config/darwin/arm/entrypoints.txt
@@ -199,6 +199,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.nextafter
libc.src.math.nextafterf
libc.src.math.nextafterl
+ libc.src.math.powf
libc.src.math.remainderf
libc.src.math.remainder
libc.src.math.remainderl
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 52e9e8036a3fae3..096e2afb06e5a9d 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -316,6 +316,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.nextafter
libc.src.math.nextafterf
libc.src.math.nextafterl
+ libc.src.math.powf
libc.src.math.remainderf
libc.src.math.remainder
libc.src.math.remainderl
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index 52ca12121812d12..7dadbc7f8285477 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -325,6 +325,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.nextafter
libc.src.math.nextafterf
libc.src.math.nextafterl
+ libc.src.math.powf
libc.src.math.remainderf
libc.src.math.remainder
libc.src.math.remainderl
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index ab105e1d6344725..31ea282edb59126 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -329,6 +329,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.nextafter
libc.src.math.nextafterf
libc.src.math.nextafterl
+ libc.src.math.powf
libc.src.math.remainderf
libc.src.math.remainder
libc.src.math.remainderl
diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt
index 1ee088e399438b7..5611b399e8bca8b 100644
--- a/libc/config/windows/entrypoints.txt
+++ b/libc/config/windows/entrypoints.txt
@@ -198,6 +198,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.nextafter
libc.src.math.nextafterf
libc.src.math.nextafterl
+ libc.src.math.powf
libc.src.math.remainderf
libc.src.math.remainder
libc.src.math.remainderl
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
index 77b3c8b99a0c2bc..7c83691c0d9c801 100644
--- a/libc/docs/math/index.rst
+++ b/libc/docs/math/index.rst
@@ -420,7 +420,7 @@ Higher Math Functions
+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
| pow | | | | | | | | | | | | |
+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
-| powf | | | | | | | | | | | | |
+| powf | |check| | |check| | | |check| | |check| | | | |check| | | | | |
+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
| powl | | | | | | | | | | | | |
+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
@@ -493,6 +493,7 @@ log |check| |check|
log10 |check| |check|
log1p |check| |check|
log2 |check| |check|
+pow |check|
sin |check| large
sincos |check| large
sinh |check|
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 1e9043b841ff1d8..239d8f9843cfc0f 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -491,6 +491,7 @@ def StdC : StandardSpec<"stdc"> {
FunctionSpec<"nextafter", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"nextafterl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+ FunctionSpec<"powf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"pow", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"coshf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 843349ccdc196a7..8fc2b0850fbc077 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -747,6 +747,36 @@ add_entrypoint_object(
-O3
)
+add_entrypoint_object(
+ powf
+ SRCS
+ powf.cpp
+ HDRS
+ ../powf.h
+ DEPENDS
+ .common_constants
+ .explogxf
+ .exp2f
+ .exp10f
+ libc.src.__support.builtin_wrappers
+ libc.src.__support.CPP.bit
+ libc.src.__support.CPP.optional
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.nearest_integer
+ libc.src.__support.FPUtil.polyeval
+ libc.src.__support.FPUtil.rounding_mode
+ libc.src.__support.FPUtil.sqrt
+ libc.src.__support.FPUtil.triple_double
+ libc.src.__support.macros.optimization
+ libc.include.errno
+ libc.src.errno.errno
+ libc.include.math
+ COMPILE_OPTIONS
+ -O3
+)
+
add_entrypoint_object(
copysign
SRCS
diff --git a/libc/src/math/generic/common_constants.cpp b/libc/src/math/generic/common_constants.cpp
index 737d2ab77121adb..1a23d00297105f7 100644
--- a/libc/src/math/generic/common_constants.cpp
+++ b/libc/src/math/generic/common_constants.cpp
@@ -227,6 +227,51 @@ alignas(64) const double LOG_R[128] = {
0x1.5707a26bb8c66p-1, 0x1.5af405c3649ep-1, 0x1.5af405c3649ep-1,
0x1.5ee82aa24192p-1, 0x0.000000000000p0};
+alignas(64) const double LOG2_R[128] = {
+ 0x0.0000000000000p+0, 0x1.72c7ba20f7327p-7, 0x1.743ee861f3556p-6,
+ 0x1.184b8e4c56af8p-5, 0x1.77394c9d958d5p-5, 0x1.d6ebd1f1febfep-5,
+ 0x1.1bb32a600549dp-4, 0x1.4c560fe68af88p-4, 0x1.7d60496cfbb4cp-4,
+ 0x1.960caf9abb7cap-4, 0x1.c7b528b70f1c5p-4, 0x1.f9c95dc1d1165p-4,
+ 0x1.097e38ce60649p-3, 0x1.22dadc2ab3497p-3, 0x1.3c6fb650cde51p-3,
+ 0x1.494f863b8df35p-3, 0x1.633a8bf437ce1p-3, 0x1.7046031c79f85p-3,
+ 0x1.8a8980abfbd32p-3, 0x1.97c1cb13c7ec1p-3, 0x1.b2602497d5346p-3,
+ 0x1.bfc67a7fff4ccp-3, 0x1.dac22d3e441d3p-3, 0x1.e857d3d361368p-3,
+ 0x1.01d9bbcfa61d4p-2, 0x1.08bce0d95fa38p-2, 0x1.169c05363f158p-2,
+ 0x1.1d982c9d52708p-2, 0x1.249cd2b13cd6cp-2, 0x1.32bfee370ee68p-2,
+ 0x1.39de8e1559f6fp-2, 0x1.4106017c3eca3p-2, 0x1.4f6fbb2cec598p-2,
+ 0x1.56b22e6b578e5p-2, 0x1.5dfdcf1eeae0ep-2, 0x1.6552b49986277p-2,
+ 0x1.6cb0f6865c8eap-2, 0x1.7b89f02cf2aadp-2, 0x1.8304d90c11fd3p-2,
+ 0x1.8a8980abfbd32p-2, 0x1.921800924dd3bp-2, 0x1.99b072a96c6b2p-2,
+ 0x1.a8ff971810a5ep-2, 0x1.b0b67f4f4681p-2, 0x1.b877c57b1b07p-2,
+ 0x1.c043859e2fdb3p-2, 0x1.c819dc2d45fe4p-2, 0x1.cffae611ad12bp-2,
+ 0x1.d7e6c0abc3579p-2, 0x1.dfdd89d586e2bp-2, 0x1.e7df5fe538ab3p-2,
+ 0x1.efec61b011f85p-2, 0x1.f804ae8d0cd02p-2, 0x1.0014332be0033p-1,
+ 0x1.042bd4b9a7c99p-1, 0x1.08494c66b8efp-1, 0x1.0c6caaf0c5597p-1,
+ 0x1.1096015dee4dap-1, 0x1.14c560fe68af9p-1, 0x1.18fadb6e2d3c2p-1,
+ 0x1.1d368296b5255p-1, 0x1.217868b0c37e8p-1, 0x1.25c0a0463bebp-1,
+ 0x1.2a0f3c340705cp-1, 0x1.2e644fac04fd8p-1, 0x1.2e644fac04fd8p-1,
+ 0x1.32bfee370ee68p-1, 0x1.37222bb70747cp-1, 0x1.3b8b1c68fa6edp-1,
+ 0x1.3ffad4e74f1d6p-1, 0x1.44716a2c08262p-1, 0x1.44716a2c08262p-1,
+ 0x1.48eef19317991p-1, 0x1.4d7380dcc422dp-1, 0x1.51ff2e30214bcp-1,
+ 0x1.5692101d9b4a6p-1, 0x1.5b2c3da19723bp-1, 0x1.5b2c3da19723bp-1,
+ 0x1.5fcdce2727ddbp-1, 0x1.6476d98ad990ap-1, 0x1.6927781d932a8p-1,
+ 0x1.6927781d932a8p-1, 0x1.6ddfc2a78fc63p-1, 0x1.729fd26b707c8p-1,
+ 0x1.7767c12967a45p-1, 0x1.7767c12967a45p-1, 0x1.7c37a9227e7fbp-1,
+ 0x1.810fa51bf65fdp-1, 0x1.810fa51bf65fdp-1, 0x1.85efd062c656dp-1,
+ 0x1.8ad846cf369a4p-1, 0x1.8ad846cf369a4p-1, 0x1.8fc924c89ac84p-1,
+ 0x1.94c287492c4dbp-1, 0x1.94c287492c4dbp-1, 0x1.99c48be2063c8p-1,
+ 0x1.9ecf50bf43f13p-1, 0x1.9ecf50bf43f13p-1, 0x1.a3e2f4ac43f6p-1,
+ 0x1.a8ff971810a5ep-1, 0x1.a8ff971810a5ep-1, 0x1.ae255819f022dp-1,
+ 0x1.b35458761d479p-1, 0x1.b35458761d479p-1, 0x1.b88cb9a2ab521p-1,
+ 0x1.b88cb9a2ab521p-1, 0x1.bdce9dcc96187p-1, 0x1.c31a27dd00b4ap-1,
+ 0x1.c31a27dd00b4ap-1, 0x1.c86f7b7ea4a89p-1, 0x1.c86f7b7ea4a89p-1,
+ 0x1.cdcebd2373995p-1, 0x1.d338120a6dd9dp-1, 0x1.d338120a6dd9dp-1,
+ 0x1.d8aba045b01c8p-1, 0x1.d8aba045b01c8p-1, 0x1.de298ec0bac0dp-1,
+ 0x1.de298ec0bac0dp-1, 0x1.e3b20546f554ap-1, 0x1.e3b20546f554ap-1,
+ 0x1.e9452c8a71028p-1, 0x1.e9452c8a71028p-1, 0x1.eee32e2aeccbfp-1,
+ 0x1.eee32e2aeccbfp-1, 0x1.f48c34bd1e96fp-1, 0x1.f48c34bd1e96fp-1,
+ 0x1.fa406bd2443dfp-1, 0x1.0000000000000p0};
+
// Generated by Sollya with:
// for i from 0 to 127 do {
// r = 2^-8 * ceil( 2^8 * (1 - 2^(-8)) / (1 + i*2^-7) );
@@ -395,6 +440,48 @@ alignas(64) const int S2[193] = {
-0x1cd, -0x1d1, -0x1d5, -0x1d9, -0x1dd, -0x1e0, -0x1e4, -0x1e8, -0x1ec,
-0x1f0, -0x1f4, -0x1f8, -0x1fc};
+alignas(64) const double R2[193] = {
+ 0x1.0101p0, 0x1.00fdp0, 0x1.00f9p0, 0x1.00f5p0, 0x1.00f1p0,
+ 0x1.00edp0, 0x1.00e9p0, 0x1.00e5p0, 0x1.00e1p0, 0x1.00ddp0,
+ 0x1.00d9p0, 0x1.00d5p0, 0x1.00d1p0, 0x1.00cdp0, 0x1.00c9p0,
+ 0x1.00c5p0, 0x1.00c1p0, 0x1.00bdp0, 0x1.00b9p0, 0x1.00b4p0,
+ 0x1.00bp0, 0x1.00acp0, 0x1.00a8p0, 0x1.00a4p0, 0x1.00ap0,
+ 0x1.009cp0, 0x1.0098p0, 0x1.0094p0, 0x1.009p0, 0x1.008cp0,
+ 0x1.0088p0, 0x1.0084p0, 0x1.008p0, 0x1.007cp0, 0x1.0078p0,
+ 0x1.0074p0, 0x1.007p0, 0x1.006cp0, 0x1.0068p0, 0x1.0064p0,
+ 0x1.006p0, 0x1.005cp0, 0x1.0058p0, 0x1.0054p0, 0x1.005p0,
+ 0x1.004cp0, 0x1.0048p0, 0x1.0044p0, 0x1.004p0, 0x1.003cp0,
+ 0x1.0038p0, 0x1.0034p0, 0x1.003p0, 0x1.002cp0, 0x1.0028p0,
+ 0x1.0024p0, 0x1.002p0, 0x1.001cp0, 0x1.0018p0, 0x1.0014p0,
+ 0x1.001p0, 0x1.000cp0, 0x1.0008p0, 0x1.0004p0, 0x1p0,
+ 0x1.fff8p-1, 0x1.fffp-1, 0x1.ffe8p-1, 0x1.ffep-1, 0x1.ffd8p-1,
+ 0x1.ffdp-1, 0x1.ffc8p-1, 0x1.ffcp-1, 0x1.ffb8p-1, 0x1.ffbp-1,
+ 0x1.ffa8p-1, 0x1.ffap-1, 0x1.ff98p-1, 0x1.ff9p-1, 0x1.ff88p-1,
+ 0x1.ff8p-1, 0x1.ff78p-1, 0x1.ff7p-1, 0x1.ff68p-1, 0x1.ff6p-1,
+ 0x1.ff58p-1, 0x1.ff5p-1, 0x1.ff48p-1, 0x1.ff4p-1, 0x1.ff38p-1,
+ 0x1.ff3p-1, 0x1.ff28p-1, 0x1.ff2p-1, 0x1.ff18p-1, 0x1.ff1p-1,
+ 0x1.ff08p-1, 0x1.ffp-1, 0x1.fef8p-1, 0x1.fefp-1, 0x1.fee8p-1,
+ 0x1.feep-1, 0x1.fed8p-1, 0x1.fedp-1, 0x1.fec8p-1, 0x1.fecp-1,
+ 0x1.feb8p-1, 0x1.febp-1, 0x1.fea8p-1, 0x1.feap-1, 0x1.fe98p-1,
+ 0x1.fe92p-1, 0x1.fe8ap-1, 0x1.fe82p-1, 0x1.fe7ap-1, 0x1.fe72p-1,
+ 0x1.fe6ap-1, 0x1.fe62p-1, 0x1.fe5ap-1, 0x1.fe52p-1, 0x1.fe4ap-1,
+ 0x1.fe42p-1, 0x1.fe3ap-1, 0x1.fe32p-1, 0x1.fe2ap-1, 0x1.fe22p-1,
+ 0x1.fe1ap-1, 0x1.fe12p-1, 0x1.fe0ap-1, 0x1.fe02p-1, 0x1.fdfap-1,
+ 0x1.fdf2p-1, 0x1.fdeap-1, 0x1.fde2p-1, 0x1.fddap-1, 0x1.fdd2p-1,
+ 0x1.fdcap-1, 0x1.fdc2p-1, 0x1.fdbap-1, 0x1.fdb2p-1, 0x1.fdaap-1,
+ 0x1.fda2p-1, 0x1.fd9ap-1, 0x1.fd92p-1, 0x1.fd8cp-1, 0x1.fd84p-1,
+ 0x1.fd7cp-1, 0x1.fd74p-1, 0x1.fd6cp-1, 0x1.fd64p-1, 0x1.fd5cp-1,
+ 0x1.fd54p-1, 0x1.fd4cp-1, 0x1.fd44p-1, 0x1.fd3cp-1, 0x1.fd34p-1,
+ 0x1.fd2cp-1, 0x1.fd24p-1, 0x1.fd1cp-1, 0x1.fd14p-1, 0x1.fd0cp-1,
+ 0x1.fd04p-1, 0x1.fcfcp-1, 0x1.fcf4p-1, 0x1.fcecp-1, 0x1.fce4p-1,
+ 0x1.fcdcp-1, 0x1.fcd6p-1, 0x1.fccep-1, 0x1.fcc6p-1, 0x1.fcbep-1,
+ 0x1.fcb6p-1, 0x1.fcaep-1, 0x1.fca6p-1, 0x1.fc9ep-1, 0x1.fc96p-1,
+ 0x1.fc8ep-1, 0x1.fc86p-1, 0x1.fc7ep-1, 0x1.fc76p-1, 0x1.fc6ep-1,
+ 0x1.fc66p-1, 0x1.fc5ep-1, 0x1.fc56p-1, 0x1.fc4ep-1, 0x1.fc46p-1,
+ 0x1.fc4p-1, 0x1.fc38p-1, 0x1.fc3p-1, 0x1.fc28p-1, 0x1.fc2p-1,
+ 0x1.fc18p-1, 0x1.fc1p-1, 0x1.fc08p-1,
+};
+
// Logarithm range reduction - Step 3:
// r(k) = 2^-21 round(2^21 / (1 + k*2^-21)) for k = -80 .. 80.
// Output range:
diff --git a/libc/src/math/generic/common_constants.h b/libc/src/math/generic/common_constants.h
index df9d7828bf1609d..13fc8d757cb8ca3 100644
--- a/libc/src/math/generic/common_constants.h
+++ b/libc/src/math/generic/common_constants.h
@@ -34,6 +34,9 @@ extern const double CD[128];
extern const double LOG_R[128];
extern const NumberPair<double> LOG_R_DD[128];
+// Lookup table for -log2(r)
+extern const double LOG2_R[128];
+
// Minimax polynomial for (log(1 + x) - x)/x^2, generated by sollya with:
// > P = fpminimax((log(1 + x) - x)/x^2, 5, [|D...|], [-2^-8, 2^-7]);
constexpr double LOG_COEFFS[6] = {-0x1.fffffffffffffp-2, 0x1.5555555554a9bp-2,
@@ -45,6 +48,8 @@ extern const int S2[193];
extern const int S3[161];
extern const int S4[130];
+extern const double R2[193];
+
// log(2) generated by Sollya with:
// > a = 2^-43 * nearestint(2^43*log(2));
// LSB = 2^-43 is chosen so that e_x * LOG_2_HI is exact for -1075 < e_x < 1024.
diff --git a/libc/src/math/generic/log2f.cpp b/libc/src/math/generic/log2f.cpp
index cbb71fb91b10705..7665a90f092359f 100644
--- a/libc/src/math/generic/log2f.cpp
+++ b/libc/src/math/generic/log2f.cpp
@@ -53,52 +53,6 @@
namespace LIBC_NAMESPACE {
-// Lookup table for log2(f) = log2(1 + n*2^(-7)) where n = 0..127.
-static constexpr double LOG2_R[128] = {
- 0x0.0000000000000p+0, 0x1.72c7ba20f7327p-7, 0x1.743ee861f3556p-6,
- 0x1.184b8e4c56af8p-5, 0x1.77394c9d958d5p-5, 0x1.d6ebd1f1febfep-5,
- 0x1.1bb32a600549dp-4, 0x1.4c560fe68af88p-4, 0x1.7d60496cfbb4cp-4,
- 0x1.960caf9abb7cap-4, 0x1.c7b528b70f1c5p-4, 0x1.f9c95dc1d1165p-4,
- 0x1.097e38ce60649p-3, 0x1.22dadc2ab3497p-3, 0x1.3c6fb650cde51p-3,
- 0x1.494f863b8df35p-3, 0x1.633a8bf437ce1p-3, 0x1.7046031c79f85p-3,
- 0x1.8a8980abfbd32p-3, 0x1.97c1cb13c7ec1p-3, 0x1.b2602497d5346p-3,
- 0x1.bfc67a7fff4ccp-3, 0x1.dac22d3e441d3p-3, 0x1.e857d3d361368p-3,
- 0x1.01d9bbcfa61d4p-2, 0x1.08bce0d95fa38p-2, 0x1.169c05363f158p-2,
- 0x1.1d982c9d52708p-2, 0x1.249cd2b13cd6cp-2, 0x1.32bfee370ee68p-2,
- 0x1.39de8e1559f6fp-2, 0x1.4106017c3eca3p-2, 0x1.4f6fbb2cec598p-2,
- 0x1.56b22e6b578e5p-2, 0x1.5dfdcf1eeae0ep-2, 0x1.6552b49986277p-2,
- 0x1.6cb0f6865c8eap-2, 0x1.7b89f02cf2aadp-2, 0x1.8304d90c11fd3p-2,
- 0x1.8a8980abfbd32p-2, 0x1.921800924dd3bp-2, 0x1.99b072a96c6b2p-2,
- 0x1.a8ff971810a5ep-2, 0x1.b0b67f4f4681p-2, 0x1.b877c57b1b07p-2,
- 0x1.c043859e2fdb3p-2, 0x1.c819dc2d45fe4p-2, 0x1.cffae611ad12bp-2,
- 0x1.d7e6c0abc3579p-2, 0x1.dfdd89d586e2bp-2, 0x1.e7df5fe538ab3p-2,
- 0x1.efec61b011f85p-2, 0x1.f804ae8d0cd02p-2, 0x1.0014332be0033p-1,
- 0x1.042bd4b9a7c99p-1, 0x1.08494c66b8efp-1, 0x1.0c6caaf0c5597p-1,
- 0x1.1096015dee4dap-1, 0x1.14c560fe68af9p-1, 0x1.18fadb6e2d3c2p-1,
- 0x1.1d368296b5255p-1, 0x1.217868b0c37e8p-1, 0x1.25c0a0463bebp-1,
- 0x1.2a0f3c340705cp-1, 0x1.2e644fac04fd8p-1, 0x1.2e644fac04fd8p-1,
- 0x1.32bfee370ee68p-1, 0x1.37222bb70747cp-1, 0x1.3b8b1c68fa6edp-1,
- 0x1.3ffad4e74f1d6p-1, 0x1.44716a2c08262p-1, 0x1.44716a2c08262p-1,
- 0x1.48eef19317991p-1, 0x1.4d7380dcc422dp-1, 0x1.51ff2e30214bcp-1,
- 0x1.5692101d9b4a6p-1, 0x1.5b2c3da19723bp-1, 0x1.5b2c3da19723bp-1,
- 0x1.5fcdce2727ddbp-1, 0x1.6476d98ad990ap-1, 0x1.6927781d932a8p-1,
- 0x1.6927781d932a8p-1, 0x1.6ddfc2a78fc63p-1, 0x1.729fd26b707c8p-1,
- 0x1.7767c12967a45p-1, 0x1.7767c12967a45p-1, 0x1.7c37a9227e7fbp-1,
- 0x1.810fa51bf65fdp-1, 0x1.810fa51bf65fdp-1, 0x1.85efd062c656dp-1,
- 0x1.8ad846cf369a4p-1, 0x1.8ad846cf369a4p-1, 0x1.8fc924c89ac84p-1,
- 0x1.94c287492c4dbp-1, 0x1.94c287492c4dbp-1, 0x1.99c48be2063c8p-1,
- 0x1.9ecf50bf43f13p-1, 0x1.9ecf50bf43f13p-1, 0x1.a3e2f4ac43f6p-1,
- 0x1.a8ff971810a5ep-1, 0x1.a8ff971810a5ep-1, 0x1.ae255819f022dp-1,
- 0x1.b35458761d479p-1, 0x1.b35458761d479p-1, 0x1.b88cb9a2ab521p-1,
- 0x1.b88cb9a2ab521p-1, 0x1.bdce9dcc96187p-1, 0x1.c31a27dd00b4ap-1,
- 0x1.c31a27dd00b4ap-1, 0x1.c86f7b7ea4a89p-1, 0x1.c86f7b7ea4a89p-1,
- 0x1.cdcebd2373995p-1, 0x1.d338120a6dd9dp-1, 0x1.d338120a6dd9dp-1,
- 0x1.d8aba045b01c8p-1, 0x1.d8aba045b01c8p-1, 0x1.de298ec0bac0dp-1,
- 0x1.de298ec0bac0dp-1, 0x1.e3b20546f554ap-1, 0x1.e3b20546f554ap-1,
- 0x1.e9452c8a71028p-1, 0x1.e9452c8a71028p-1, 0x1.eee32e2aeccbfp-1,
- 0x1.eee32e2aeccbfp-1, 0x1.f48c34bd1e96fp-1, 0x1.f48c34bd1e96fp-1,
- 0x1.fa406bd2443dfp-1, 0x1.0000000000000p0};
-
LLVM_LIBC_FUNCTION(float, log2f, (float x)) {
using FPBits = typename fputil::FPBits<float>;
FPBits xbits(x);
diff --git a/libc/src/math/generic/powf.cpp b/libc/src/math/generic/powf.cpp
new file mode 100644
index 000000000000000..c432a4fa372f24b
--- /dev/null
+++ b/libc/src/math/generic/powf.cpp
@@ -0,0 +1,840 @@
+//===-- Single-precision x^y function -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/powf.h"
+#include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
+#include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/optional.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/builtin_wrappers.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/math/exp10f.h"
+#include "src/math/exp2f.h"
+
+#include <errno.h>
+
+namespace LIBC_NAMESPACE {
+
+using fputil::DoubleDouble;
+using fputil::TripleDouble;
+
+namespace {
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA
+constexpr uint64_t ERR = 64;
+#else
+constexpr uint64_t ERR = 128;
+#endif // LIBC_TARGET_CPU_HAS_FMA
+
+// We choose the precision of the high part to be 53 - 24 - 8, so that when
+// y * (e_x + LOG2_R_DD[i].hi) is exact.
+// Generated by Sollya with:
+// > for i from 0 to 127 do {
+// r = 2^-8 * ceil(2^8 * (1 - 2^-8) / (1 + i * 2^-7) );
+// a = -log2(r);
+// b = round(1 + a, 53 - 24 - 8, RN) - 1;
+// c = round(a - b, D, RN);
+// d = round(a - b - c, D, RN);
+// print("{", d, ",", c, ", ", b, "},");
+// };
+static constexpr TripleDouble LOG2_R_TD[128] = {
+ {0.0, 0.0, 0.0},
+ {0x1.84a2c615b70adp-79, -0x1.177c23362928cp-25, 0x1.72c8p-7},
+ {-0x1.f27b820fd03eap-76, -0x1.179e0caa9c9abp-22, 0x1.744p-6},
+ {-0x1.f27ef487c8f34p-77, -0x1.c6cea541f5b7p-23, 0x1.184cp-5},
+ {-0x1.e3f80fbc71454p-76, -0x1.66c4d4e554434p-22, 0x1.773ap-5},
+ {-0x1.9f8ef14d5f6eep-79, -0x1.70700a00fdd55p-24, 0x1.d6ecp-5},
+ {0x1.452bbce7398c1p-77, 0x1.53002a4e86631p-23, 0x1.1bb3p-4},
+ {-0x1.990555535afdp-81, 0x1.fcd15f101c142p-25, 0x1.4c56p-4},
+ {0x1.447e30ad393eep-78, 0x1.25b3eed319cedp-22, 0x1.7d6p-4},
+ {0x1.b7759da88a2dap-76, -0x1.4195120d8486fp-22, 0x1.960dp-4},
+ {0x1.cee7766ece702p-78, 0x1.45b878e27d0d9p-23, 0x1.c7b5p-4},
+ {-0x1.a55c745ecdc2fp-77, 0x1.770744593a4cbp-22, 0x1.f9c9p-4},
+ {0x1.f7ec992caa67fp-77, 0x1.c673032495d24p-22, 0x1.097ep-3},
+ {-0x1.433638c6ece3ep-77, -0x1.1eaa65b49696ep-22, 0x1.22dbp-3},
+ {0x1.58f27b6518824p-76, 0x1.b2866f2850b22p-22, 0x1.3c6f8p-3},
+ {-0x1.86bdcfdfd4a4cp-79, 0x1.8ee37cd2ea9d3p-25, 0x1.494f8p-3},
+ {-0x1.ff7044a68a7fap-80, 0x1.7e86f9c2154fbp-24, 0x1.633a8p-3},
+ {-0x1.aa21694561327p-81, 0x1.8e3cfc25f0ce6p-26, 0x1.7046p-3},
+ {-0x1.d209f2d4239c6p-87, 0x1.57f7a64ccd537p-28, 0x1.8a898p-3},
+ {-0x1.a55e97e60e632p-76, -0x1.a761c09fbd2aep-22, 0x1.97c2p-3},
+ {0x1.261179225541ep-76, 0x1.24bea9a2c66f3p-22, 0x1.b26p-3},
+ {-0x1.08fa30510fca9p-82, -0x1.60002ccfe43f5p-25, 0x1.bfc68p-3},
+ {-0x1.63ec8d56242f9p-76, 0x1.69f220e97f22cp-22, 0x1.dac2p-3}...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Mostly LGTM with some design comments
// d = round(a - b - c, D, RN); | ||
// print("{", d, ",", c, ", ", b, "},"); | ||
// }; | ||
static constexpr TripleDouble LOG2_R_TD[128] = { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should these tables be in common_constants.h
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Currently I only move tables to common_constants.h
if they are used by at least 2 different targets. Otherwise I only keeping it local to the only target using it. That would reduce the dependency and make common_constants.h
less clutter.
switch (x_u) { | ||
case 0x3f80'0000: // x = 1.0f | ||
return 1.0f; | ||
// TODO: Put these 2 entrypoint dependency under control flag. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The usual way to do this is to move the functionality into a shared utility, since we want to avoid depending on other entrypoints
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like with the current cmake setup, libc.src.errno.errno
was compiled differently between object_library
and entrypoint_object
. That forces any codes using libc_errno
to be either header_library
or entrypoint_object
. I move exp2f
and exp10f
implementations to exp2f_impl.h
and exp10f_impl.h
respectively. But I will revisit the cmake setup for object_library
so that bigger math functions implementations can be put in a common object_library
instead of header_library
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That sounds like a good plan.
libc/src/math/generic/CMakeLists.txt
Outdated
DEPENDS | ||
.common_constants | ||
.explogxf | ||
.exp2f |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is it possible to avoid these dependencies between entrypoints?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
sounds good to me, the performance is very similar to that of core-math on the machine I used |
…d with powf and remove dependency between entrypoints.
if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) | ||
return static_cast<float>(FPBits(FPBits::MAX_NORMAL)); | ||
|
||
fputil::set_errno_if_required(ERANGE); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The usual way we've handled needing to set errno
based on internal functions is by returning it in a struct (e.g. str_to_num_result
). That would let you make the internal implementations object libraries while also sharing the complicated logic.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With that structure, the errno check-and-set logic will be delegated to the entrypoint implementations, which will insert this extra checks in the critical path (of exp2f, exp10f), instead of exceptional input code path. This reduces the throughputs of exp2f
and exp10f
substantially, since their reciprocal throughputs are only few clocks.
So for performance reason, we keep the logic in exp2f
and exp10f
unmodified, making them acting as entrypoint functions without registering as such, that will be delegated by powf
with special inputs.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
that makes sense, in that case I'm okay with this.
libc/src/math/generic/powf.cpp
Outdated
#include "src/__support/builtin_wrappers.h" | ||
#include "src/__support/common.h" | ||
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY | ||
#include "src/math/exp10f.h" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
does this include need to be here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
We compute
pow(x, y)
using the formulaWe follow similar steps as in
log2f(x)
andexp2f(x)
, by breaking down intohi + mid + lo
parts, in whichhi
parts are computed using the exponent field directly,mid
parts will use look-up tables, andlo
parts are approximated by polynomials.We add some speedup for common use-cases: