-
Notifications
You must be signed in to change notification settings - Fork 12.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libc][math] Optimize generic nearest integer functions #98483
Conversation
Before:
|
2df2699
to
781700f
Compare
@llvm/pr-subscribers-libc Author: OverMighty (overmighty) ChangesFull diff: https://github.com/llvm/llvm-project/pull/98483.diff 3 Files Affected:
diff --git a/libc/src/__support/FPUtil/NearestIntegerOperations.h b/libc/src/__support/FPUtil/NearestIntegerOperations.h
index cff32938229d0..a9a0a97eebb5c 100644
--- a/libc/src/__support/FPUtil/NearestIntegerOperations.h
+++ b/libc/src/__support/FPUtil/NearestIntegerOperations.h
@@ -75,15 +75,17 @@ LIBC_INLINE T ceil(T x) {
}
uint32_t trim_size = FPBits<T>::FRACTION_LEN - exponent;
- StorageType trunc_mantissa =
- static_cast<StorageType>((bits.get_mantissa() >> trim_size) << trim_size);
- bits.set_mantissa(trunc_mantissa);
- T trunc_value = bits.get_val();
+ StorageType x_u = bits.uintval();
+ StorageType trunc_u =
+ static_cast<StorageType>((x_u >> trim_size) << trim_size);
// If x is already an integer, return it.
- if (trunc_value == x)
+ if (trunc_u == x_u)
return x;
+ bits.set_uintval(trunc_u);
+ T trunc_value = bits.get_val();
+
// If x is negative, the ceil operation is equivalent to the trunc operation.
if (is_neg)
return trunc_value;
@@ -130,15 +132,17 @@ LIBC_INLINE T round(T x) {
uint32_t trim_size = FPBits<T>::FRACTION_LEN - exponent;
bool half_bit_set =
bool(bits.get_mantissa() & (StorageType(1) << (trim_size - 1)));
- StorageType trunc_mantissa =
- static_cast<StorageType>((bits.get_mantissa() >> trim_size) << trim_size);
- bits.set_mantissa(trunc_mantissa);
- T trunc_value = bits.get_val();
+ StorageType x_u = bits.uintval();
+ StorageType trunc_u =
+ static_cast<StorageType>((x_u >> trim_size) << trim_size);
// If x is already an integer, return it.
- if (trunc_value == x)
+ if (trunc_u == x_u)
return x;
+ bits.set_uintval(trunc_u);
+ T trunc_value = bits.get_val();
+
if (!half_bit_set) {
// Franctional part is less than 0.5 so round value is the
// same as the trunc value.
@@ -188,16 +192,17 @@ round_using_specific_rounding_mode(T x, int rnd) {
}
uint32_t trim_size = FPBits<T>::FRACTION_LEN - exponent;
- FPBits<T> new_bits = bits;
- StorageType trunc_mantissa =
- static_cast<StorageType>((bits.get_mantissa() >> trim_size) << trim_size);
- new_bits.set_mantissa(trunc_mantissa);
- T trunc_value = new_bits.get_val();
+ StorageType x_u = bits.uintval();
+ StorageType trunc_u =
+ static_cast<StorageType>((x_u >> trim_size) << trim_size);
// If x is already an integer, return it.
- if (trunc_value == x)
+ if (trunc_u == x_u)
return x;
+ FPBits<T> new_bits(trunc_u);
+ T trunc_value = new_bits.get_val();
+
StorageType trim_value =
bits.get_mantissa() &
static_cast<StorageType>(((StorageType(1) << trim_size) - 1));
diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt
index 4ea78f9999e4d..bf88fbb85c5d7 100644
--- a/libc/test/src/math/performance_testing/CMakeLists.txt
+++ b/libc/test/src/math/performance_testing/CMakeLists.txt
@@ -366,3 +366,22 @@ add_perf_binary(
COMPILE_OPTIONS
-fno-builtin
)
+
+add_perf_binary(
+ nearest_integer_funcs_perf
+ SRCS
+ nearest_integer_funcs_perf.cpp
+ DEPENDS
+ libc.src.math.ceilf
+ libc.src.math.ceilf16
+ libc.src.math.floorf
+ libc.src.math.floorf16
+ libc.src.math.roundevenf
+ libc.src.math.roundevenf16
+ libc.src.math.roundf
+ libc.src.math.roundf16
+ libc.src.math.truncf
+ libc.src.math.truncf16
+ COMPILE_OPTIONS
+ -fno-builtin
+)
diff --git a/libc/test/src/math/performance_testing/nearest_integer_funcs_perf.cpp b/libc/test/src/math/performance_testing/nearest_integer_funcs_perf.cpp
new file mode 100644
index 0000000000000..24176a377e9d4
--- /dev/null
+++ b/libc/test/src/math/performance_testing/nearest_integer_funcs_perf.cpp
@@ -0,0 +1,168 @@
+//===-- Performance test for nearest integer functions --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/math/ceilf.h"
+#include "src/math/ceilf16.h"
+#include "src/math/floorf.h"
+#include "src/math/floorf16.h"
+#include "src/math/roundevenf.h"
+#include "src/math/roundevenf16.h"
+#include "src/math/roundf.h"
+#include "src/math/roundf16.h"
+#include "src/math/truncf.h"
+#include "src/math/truncf16.h"
+#include "test/src/math/performance_testing/Timer.h"
+
+#include <fstream>
+#include <math.h>
+
+namespace LIBC_NAMESPACE::testing {
+
+template <typename T> class NearestIntegerPerf {
+ using FPBits = fputil::FPBits<T>;
+ using StorageType = typename FPBits::StorageType;
+
+public:
+ typedef T Func(T);
+
+ static void run_perf_in_range(Func my_func, Func other_func,
+ StorageType starting_bit,
+ StorageType ending_bit, StorageType step,
+ size_t rounds, std::ofstream &log) {
+ auto runner = [=](Func func) {
+ volatile T result;
+ for (size_t i = 0; i < rounds; i++) {
+ for (StorageType bits = starting_bit; bits <= ending_bit;
+ bits += step) {
+ T x = FPBits(bits).get_val();
+ result = func(x);
+ }
+ }
+ };
+
+ Timer timer;
+ timer.start();
+ runner(my_func);
+ timer.stop();
+
+ size_t number_of_runs = (ending_bit - starting_bit) / step + 1;
+ double my_average =
+ static_cast<double>(timer.nanoseconds()) / number_of_runs / rounds;
+ log << "-- My function --\n";
+ log << " Total time : " << timer.nanoseconds() << " ns \n";
+ log << " Average runtime : " << my_average << " ns/op \n";
+ log << " Ops per second : "
+ << static_cast<uint64_t>(1'000'000'000.0 / my_average) << " op/s \n";
+
+ timer.start();
+ runner(other_func);
+ timer.stop();
+
+ double other_average =
+ static_cast<double>(timer.nanoseconds()) / number_of_runs / rounds;
+ log << "-- Other function --\n";
+ log << " Total time : " << timer.nanoseconds() << " ns \n";
+ log << " Average runtime : " << other_average << " ns/op \n";
+ log << " Ops per second : "
+ << static_cast<uint64_t>(1'000'000'000.0 / other_average) << " op/s \n";
+
+ log << "-- Average runtime ratio --\n";
+ log << " Mine / Other's : " << my_average / other_average << " \n";
+ }
+
+ static void run_perf(Func my_func, Func other_func, size_t rounds,
+ const char *log_file) {
+ std::ofstream log(log_file);
+ log << "Performance tests with inputs in normal integral range:\n";
+ run_perf_in_range(
+ my_func, other_func,
+ /*starting_bit=*/StorageType((FPBits::EXP_BIAS + 1) << FPBits::SIG_LEN),
+ /*ending_bit=*/
+ StorageType((FPBits::EXP_BIAS + FPBits::FRACTION_LEN - 1)
+ << FPBits::SIG_LEN),
+ /*step=*/StorageType(1 << FPBits::SIG_LEN),
+ rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log);
+ log << "\n Performance tests with inputs in low integral range:\n";
+ run_perf_in_range(
+ my_func, other_func,
+ /*starting_bit=*/StorageType(1 << FPBits::SIG_LEN),
+ /*ending_bit=*/StorageType((FPBits::EXP_BIAS - 1) << FPBits::SIG_LEN),
+ /*step_bit=*/StorageType(1 << FPBits::SIG_LEN),
+ rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log);
+ log << "\n Performance tests with inputs in high integral range:\n";
+ run_perf_in_range(
+ my_func, other_func,
+ /*starting_bit=*/
+ StorageType((FPBits::EXP_BIAS + FPBits::FRACTION_LEN)
+ << FPBits::SIG_LEN),
+ /*ending_bit=*/
+ StorageType(FPBits::MAX_BIASED_EXPONENT << FPBits::SIG_LEN),
+ /*step=*/StorageType(1 << FPBits::SIG_LEN),
+ rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log);
+ log << "\n Performance tests with inputs in normal fractional range:\n";
+ run_perf_in_range(
+ my_func, other_func,
+ /*starting_bit=*/
+ StorageType(((FPBits::EXP_BIAS + 1) << FPBits::SIG_LEN) + 1),
+ /*ending_bit=*/
+ StorageType(((FPBits::EXP_BIAS + 2) << FPBits::SIG_LEN) - 1),
+ /*step=*/StorageType(1), rounds * 2, log);
+ log << "\n Performance tests with inputs in subnormal fractional range:\n";
+ run_perf_in_range(my_func, other_func, /*starting_bit=*/StorageType(1),
+ /*ending_bit=*/StorageType(FPBits::SIG_MASK),
+ /*step=*/StorageType(1), rounds, log);
+ }
+};
+
+} // namespace LIBC_NAMESPACE::testing
+
+#define NEAREST_INTEGER_PERF(T, my_func, other_func, rounds, filename) \
+ { \
+ LIBC_NAMESPACE::testing::NearestIntegerPerf<T>::run_perf( \
+ &my_func, &other_func, rounds, filename); \
+ LIBC_NAMESPACE::testing::NearestIntegerPerf<T>::run_perf( \
+ &my_func, &other_func, rounds, filename); \
+ }
+
+static constexpr size_t FLOAT16_ROUNDS = 20'000;
+static constexpr size_t FLOAT_ROUNDS = 40;
+
+// LLVM libc might be the only libc implementation with support for float16 math
+// functions currently. We can't compare our float16 functions against the
+// system libc, so we compare them against this placeholder function.
+float16 placeholderf16(float16 x) { return x; }
+
+// The system libc might not provide the roundeven* C23 math functions either.
+float placeholderf(float x) { return x; }
+
+int main() {
+ NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::ceilf16, ::placeholderf16,
+ FLOAT16_ROUNDS, "ceilf16_perf.log")
+ NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::floorf16, ::placeholderf16,
+ FLOAT16_ROUNDS, "floorf16_perf.log")
+ NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::roundevenf16, ::placeholderf16,
+ FLOAT16_ROUNDS, "roundevenf16_perf.log")
+ NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::roundf16, ::placeholderf16,
+ FLOAT16_ROUNDS, "roundf16_perf.log")
+ NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::truncf16, ::placeholderf16,
+ FLOAT16_ROUNDS, "truncf16_perf.log")
+
+ NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::ceilf, ::ceilf, FLOAT_ROUNDS,
+ "ceilf_perf.log")
+ NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::floorf, ::floorf, FLOAT_ROUNDS,
+ "floorf_perf.log")
+ NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::roundevenf, ::placeholderf,
+ FLOAT_ROUNDS, "roundevenf_perf.log")
+ NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::roundf, ::roundf, FLOAT_ROUNDS,
+ "roundf_perf.log")
+ NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::truncf, ::truncf, FLOAT_ROUNDS,
+ "truncf_perf.log")
+
+ return 0;
+}
|
No description provided.