boostorg · mborland · Aug 5, 2022 · Aug 6, 2022 · Aug 6, 2022 · Aug 14, 2022
diff --git a/include/boost/math/special_functions/fast_float_distance.hpp b/include/boost/math/special_functions/fast_float_distance.hpp
@@ -0,0 +1,132 @@
+// (C) Copyright Matt Borland 2022.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_MATH_SF_FAST_FLOAT_DISTANCE
+#define BOOST_MATH_SF_FAST_FLOAT_DISTANCE
+
+#include <boost/math/special_functions/next.hpp>
+#include <boost/math/tools/throw_exception.hpp>
+#include <stdexcept>
+#include <limits>
+
+#if defined(BOOST_MATH_USE_FLOAT128) && !defined(BOOST_MATH_STANDALONE)
+#include <boost/multiprecision/float128.hpp>
+#include <boost/multiprecision/detail/standalone_config.hpp>
+#define BOOST_MATH_USE_FAST_FLOAT128
+#elif defined(BOOST_MATH_USE_FLOAT128) && defined(BOOST_MATH_STANDALONE)
+# if __has_include(<quadmath.h>)
+# include <quadmath.h>
+# define BOOST_MATH_USE_FAST_STANDALONE_FLOAT128
+# endif
+#endif
+
+namespace boost { namespace math { 
+
+// https://randomascii.wordpress.com/2012/01/23/stupid-float-tricks-2/
+// https://blog.regehr.org/archives/959
+inline std::int32_t fast_float_distance(float a, float b)
+{
+ return boost::math::float_distance(a, b);
+}
+
+inline std::int64_t fast_float_distance(double a, double b)
+{
+ return boost::math::float_distance(a, b);
+}
+
+#ifdef BOOST_MATH_USE_FAST_FLOAT128
+boost::multiprecision::int128_type fast_float_distance(boost::multiprecision::float128_type a, boost::multiprecision::float128_type b)
+{
+ using std::abs;
+ using std::isfinite;
+
+ constexpr boost::multiprecision::float128_type tol = 2 * BOOST_MP_QUAD_MIN;
+
+ // 0, very small, and large magnitude distances all need special handling
+ if (abs(a) == 0 || abs(b) == 0)
+ {
+ return 0;
+ }
+ else if (abs(a) < tol || abs(b) < tol)
+ {
+ BOOST_MATH_THROW_EXCEPTION(std::domain_error("special handling is required for tiny distances. Please use boost::math::float_distance for a slower but safe solution"));
+ }
+
+ if (!(isfinite)(a))
+ { 
+ BOOST_MATH_THROW_EXCEPTION(std::domain_error("Both arguments to fast_float_distance must be finite"));
+ }
+ else if (!(isfinite)(b))
+ {
+ BOOST_MATH_THROW_EXCEPTION(std::domain_error("Both arguments to fast_float_distnace must be finite"));
+ }
+
+ static_assert(sizeof(boost::multiprecision::int128_type) == sizeof(boost::multiprecision::float128_type), "float128 is the wrong size");
+
+ boost::multiprecision::int128_type ai;
+ boost::multiprecision::int128_type bi;
+ std::memcpy(&ai, &a, sizeof(boost::multiprecision::float128_type));
+ std::memcpy(&bi, &b, sizeof(boost::multiprecision::float128_type));
+
+ boost::multiprecision::int128_type result = bi - ai;
+
+ if (ai < 0 || bi < 0)
+ {
+ result = -result;
+ }
+
+ return result;
+}
+
+#elif defined(BOOST_MATH_USE_FAST_STANDALONE_FLOAT128)
+__int128 fast_float_distance(__float128 a, __float128 b)
+{
+ constexpr __float128 tol = 2 * static_cast<__float128>(1) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * 
+ static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * 
+ static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * 
+ static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * 
+ static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * 
+ static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) / 1073741824;
+
+ // 0, very small, and large magnitude distances all need special handling
+ if (::fabsq(a) == 0 || ::fabsq(b) == 0)
+ {
+ return 0;
+ }
+ else if (::fabsq(a) < tol || ::fabsq(b) < tol)
+ {
+ BOOST_MATH_THROW_EXCEPTION(std::domain_error("special handling is required for tiny distances. Please use boost::math::float_distance for a slower but safe solution"));
+ }
+
+ if (!(::isinfq)(a) && !(::isnanq)(a))
+ { 
+ BOOST_MATH_THROW_EXCEPTION(std::domain_error("Both arguments to fast_float_distnace must be finite"));
+ }
+ else if (!(::isinfq)(b) && !(::isnanq)(b))
+ {
+ BOOST_MATH_THROW_EXCEPTION(std::domain_error("Both arguments to fast_float_distnace must be finite"));
+ }
+
+ static_assert(sizeof(__int128) == sizeof(__float128));
+
+ __int128 ai;
+ __int128 bi;
+ std::memcpy(&ai, &a, sizeof(__float128));
+ std::memcpy(&bi, &b, sizeof(__float128));
+
+ __int128 result = bi - ai;
+
+ if (ai < 0 || bi < 0)
+ {
+ result = -result;
+ }
+
+ return result;
+}
+#endif
+
+}} // Namespaces
+
+#endif
diff --git a/include/boost/math/special_functions/next.hpp b/include/boost/math/special_functions/next.hpp
@@ -16,9 +16,11 @@
 #include <boost/math/special_functions/sign.hpp>
 #include <boost/math/special_functions/trunc.hpp>
 #include <boost/math/tools/traits.hpp>
+#include <boost/math/tools/config.hpp>
 #include <type_traits>
 #include <cfloat>
-
+#include <cstdint>
+#include <cstring>
 
 #if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
 #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__)
@@ -717,6 +719,103 @@ typename tools::promote_args<T, U>::type float_distance(const T& a, const U& b)
  return boost::math::float_distance(a, b, policies::policy<>());
 }
 
+// https://randomascii.wordpress.com/2012/01/23/stupid-float-tricks-2/
+// https://blog.regehr.org/archives/959
+inline std::int32_t float_distance(float a, float b)
+{
+ using std::abs;
+ using std::isfinite;
+ constexpr auto tol = 2 * (std::numeric_limits<float>::min)();
+
+ // 0, very small, and large magnitude distances all need special handling
+ if (abs(a) == 0 || abs(b) == 0)
+ {
+ return static_cast<std::int32_t>(float_distance(a, b, policies::policy<>()));
+ }
+ else if (abs(a) < tol || abs(b) < tol)
+ {
+ return static_cast<std::int32_t>(float_distance(a, b, policies::policy<>()));
+ }
+
+ static const char* function = "float_distance<%1%>(%1%, %1%)";
+ if(!(boost::math::isfinite)(a))
+ {
+ return policies::raise_domain_error<float>(
+ function,
+ "Argument a must be finite, but got %1%", a, policies::policy<>());
+ }
+ if(!(boost::math::isfinite)(b))
+ {
+ return policies::raise_domain_error<float>(
+ function,
+ "Argument b must be finite, but got %1%", b, policies::policy<>());
+ }
+
+ static_assert(sizeof(float) == sizeof(std::int32_t), "float is incorrect size.");
+
+ std::int32_t ai;
+ std::int32_t bi;
+ std::memcpy(&ai, &a, sizeof(float));
+ std::memcpy(&bi, &b, sizeof(float));
+
+ auto result = bi - ai;
+
+ if (ai < 0 || bi < 0)
+ {
+ result = -result;
+ }
+
+ return result;
+}
+
+inline std::int64_t float_distance(double a, double b)
+{
+ using std::abs;
+ using std::isfinite;
+ constexpr auto tol = 2 * (std::numeric_limits<double>::min)();
+
+ // 0, very small, and large magnitude distances all need special handling
+ if (abs(a) == 0 || abs(b) == 0)
+ {
+ return static_cast<std::int64_t>(float_distance(a, b, policies::policy<>()));
+ }
+ else if (abs(a) < tol || abs(b) < tol)
+ {
+ return static_cast<std::int64_t>(float_distance(a, b, policies::policy<>()));
+ }
+
+ static const char* function = "float_distance<%1%>(%1%, %1%)";
+ if(!(boost::math::isfinite)(a))
+ {
+ return policies::raise_domain_error<double>(
+ function,
+ "Argument a must be finite, but got %1%", a, policies::policy<>());
+ }
+ if(!(boost::math::isfinite)(b))
+ {
+ return policies::raise_domain_error<double>(
+ function,
+ "Argument b must be finite, but got %1%", b, policies::policy<>());
+ }
+
+
+ static_assert(sizeof(double) == sizeof(std::int64_t), "double is incorrect size.");
+
+ std::int64_t ai;
+ std::int64_t bi;
+ std::memcpy(&ai, &a, sizeof(double));
+ std::memcpy(&bi, &b, sizeof(double));
+
+ auto result = bi - ai;
+
+ if (ai < 0 || bi < 0)
+ {
+ result = -result;
+ }
+
+ return result;
+}
+
 namespace detail{
 
 template <class T, class Policy>

diff --git a/reporting/performance/new_next_performance.cpp b/reporting/performance/new_next_performance.cpp
@@ -0,0 +1,126 @@
+// (C) Copyright Matt Borland 2022.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <boost/math/special_functions/next.hpp>
+#include <benchmark/benchmark.h>
+
+template <typename T>
+void float_distance(benchmark::State& state)
+{
+ const auto difference = static_cast<int>(state.range(0));
+ T left = 2;
+ T right = boost::math::float_advance(left, difference);
+
+ for (auto _ : state)
+ {
+ benchmark::DoNotOptimize(boost::math::float_distance(left, right));
+ }
+ state.SetComplexityN(state.range(0));
+}
+
+BENCHMARK_TEMPLATE(float_distance, float)->RangeMultiplier(2)->Range(1 << 1, 1 << 14)->Complexity()->UseRealTime();
+BENCHMARK_TEMPLATE(float_distance, double)->RangeMultiplier(2)->Range(1 << 1, 1 << 14)->Complexity()->UseRealTime();
+
+BENCHMARK_MAIN();
+
+/*
+Run on Apple M1 Pro Arch using Apple Clang 14.0.0 (15OCT22)
+
+Original performance (Boost 1.80.0):
+
+Unable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory
+This does not affect benchmark measurements, only the metadata output.
+2022-10-15T15:24:07-07:00
+Running ./new_next_performance
+Run on (10 X 24.0916 MHz CPU s)
+CPU Caches:
+ L1 Data 64 KiB
+ L1 Instruction 128 KiB
+ L2 Unified 4096 KiB (x10)
+Load Average: 1.86, 2.53, 5.83
+---------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+---------------------------------------------------------------------------------
+float_distance<float>/2/real_time 61.4 ns 61.4 ns 9074469
+float_distance<float>/4/real_time 61.7 ns 61.7 ns 11384150
+float_distance<float>/8/real_time 61.4 ns 61.4 ns 10814604
+float_distance<float>/16/real_time 61.7 ns 61.7 ns 11348376
+float_distance<float>/32/real_time 61.4 ns 61.4 ns 11387167
+float_distance<float>/64/real_time 61.6 ns 61.6 ns 11131932
+float_distance<float>/128/real_time 61.4 ns 61.4 ns 11382029
+float_distance<float>/256/real_time 61.4 ns 61.4 ns 11307649
+float_distance<float>/512/real_time 61.4 ns 61.4 ns 11376048
+float_distance<float>/1024/real_time 61.4 ns 61.4 ns 11355748
+float_distance<float>/2048/real_time 61.8 ns 61.8 ns 11373776
+float_distance<float>/4096/real_time 61.4 ns 61.4 ns 11382368
+float_distance<float>/8192/real_time 61.4 ns 61.4 ns 11353453
+float_distance<float>/16384/real_time 61.4 ns 61.4 ns 11378298
+float_distance<float>/real_time_BigO 61.48 (1) 61.47 (1)
+float_distance<float>/real_time_RMS 0 % 0 %
+float_distance<double>/2/real_time 55.6 ns 55.6 ns 12580218
+float_distance<double>/4/real_time 55.6 ns 55.6 ns 12577835
+float_distance<double>/8/real_time 55.6 ns 55.6 ns 12564909
+float_distance<double>/16/real_time 56.2 ns 56.2 ns 12554909
+float_distance<double>/32/real_time 56.0 ns 56.0 ns 12544381
+float_distance<double>/64/real_time 55.6 ns 55.6 ns 12566488
+float_distance<double>/128/real_time 55.6 ns 55.6 ns 12499581
+float_distance<double>/256/real_time 55.6 ns 55.6 ns 12565661
+float_distance<double>/512/real_time 56.1 ns 56.1 ns 12550023
+float_distance<double>/1024/real_time 55.8 ns 55.8 ns 12568603
+float_distance<double>/2048/real_time 55.6 ns 55.6 ns 12546049
+float_distance<double>/4096/real_time 55.6 ns 55.6 ns 12528525
+float_distance<double>/8192/real_time 55.9 ns 55.9 ns 12563030
+float_distance<double>/16384/real_time 56.0 ns 56.0 ns 12447644
+float_distance<double>/real_time_BigO 55.78 (1) 55.78 (1)
+float_distance<double>/real_time_RMS 0 % 0 %
+
+New performance:
+
+Unable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory
+This does not affect benchmark measurements, only the metadata output.
+2022-10-15T15:31:37-07:00
+Running ./new_next_performance
+Run on (10 X 24.122 MHz CPU s)
+CPU Caches:
+ L1 Data 64 KiB
+ L1 Instruction 128 KiB
+ L2 Unified 4096 KiB (x10)
+Load Average: 2.12, 2.17, 4.26
+---------------------------------------------------------------------------------
+Benchmark Time CPU Iterations
+---------------------------------------------------------------------------------
+float_distance<float>/2/real_time 15.8 ns 15.8 ns 42162717
+float_distance<float>/4/real_time 15.9 ns 15.9 ns 44213877
+float_distance<float>/8/real_time 15.8 ns 15.8 ns 43972542
+float_distance<float>/16/real_time 15.8 ns 15.8 ns 44209456
+float_distance<float>/32/real_time 15.8 ns 15.8 ns 44200244
+float_distance<float>/64/real_time 15.8 ns 15.8 ns 44239293
+float_distance<float>/128/real_time 15.8 ns 15.8 ns 44171202
+float_distance<float>/256/real_time 15.8 ns 15.8 ns 44241507
+float_distance<float>/512/real_time 15.9 ns 15.8 ns 44230034
+float_distance<float>/1024/real_time 15.8 ns 15.8 ns 44241554
+float_distance<float>/2048/real_time 15.8 ns 15.8 ns 44220802
+float_distance<float>/4096/real_time 15.8 ns 15.8 ns 44220441
+float_distance<float>/8192/real_time 15.9 ns 15.9 ns 44213994
+float_distance<float>/16384/real_time 15.8 ns 15.8 ns 44215413
+float_distance<float>/real_time_BigO 15.83 (1) 15.83 (1)
+float_distance<float>/real_time_RMS 0 % 0 %
+float_distance<double>/2/real_time 15.5 ns 15.5 ns 45098165
+float_distance<double>/4/real_time 15.6 ns 15.6 ns 45065465
+float_distance<double>/8/real_time 15.5 ns 15.5 ns 45058733
+float_distance<double>/16/real_time 15.8 ns 15.7 ns 45078404
+float_distance<double>/32/real_time 15.5 ns 15.5 ns 44832734
+float_distance<double>/64/real_time 15.5 ns 15.5 ns 45077303
+float_distance<double>/128/real_time 15.5 ns 15.5 ns 45067255
+float_distance<double>/256/real_time 15.5 ns 15.5 ns 45073844
+float_distance<double>/512/real_time 15.6 ns 15.6 ns 45109342
+float_distance<double>/1024/real_time 15.5 ns 15.5 ns 44845180
+float_distance<double>/2048/real_time 15.5 ns 15.5 ns 45051846
+float_distance<double>/4096/real_time 15.5 ns 15.5 ns 45064317
+float_distance<double>/8192/real_time 15.5 ns 15.5 ns 45115653
+float_distance<double>/16384/real_time 15.5 ns 15.5 ns 45067642
+float_distance<double>/real_time_BigO 15.54 (1) 15.54 (1)
+float_distance<double>/real_time_RMS 0 % 0 %
+*/
diff --git a/test/Jamfile.v2 b/test/Jamfile.v2
@@ -527,6 +527,7 @@ test-suite special_fun :
  [ run test_ldouble_simple.cpp ../../test/build//boost_unit_test_framework ]
  # Needs to run in release mode, as it's rather slow:
  [ run test_next.cpp pch ../../test/build//boost_unit_test_framework : : : release ]
+ [ run test_fast_float_distance.cpp ../../test/build//boost_unit_test_framework : : : release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" : <build>no ] ]
  [ run test_next_decimal.cpp pch ../../test/build//boost_unit_test_framework : : : release ]
  [ run test_owens_t.cpp ../../test/build//boost_unit_test_framework ]
  [ run test_polygamma.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ]