Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faster floating point comparisons #814

Open
wants to merge 10 commits into
base: develop
Choose a base branch
from
132 changes: 132 additions & 0 deletions include/boost/math/special_functions/fast_float_distance.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// (C) Copyright Matt Borland 2022.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#ifndef BOOST_MATH_SF_FAST_FLOAT_DISTANCE
#define BOOST_MATH_SF_FAST_FLOAT_DISTANCE

#include <boost/math/special_functions/next.hpp>
#include <boost/math/tools/throw_exception.hpp>
#include <stdexcept>
#include <limits>

#if defined(BOOST_MATH_USE_FLOAT128) && !defined(BOOST_MATH_STANDALONE)
#include <boost/multiprecision/float128.hpp>
#include <boost/multiprecision/detail/standalone_config.hpp>
#define BOOST_MATH_USE_FAST_FLOAT128
#elif defined(BOOST_MATH_USE_FLOAT128) && defined(BOOST_MATH_STANDALONE)
# if __has_include(<quadmath.h>)
# include <quadmath.h>
# define BOOST_MATH_USE_FAST_STANDALONE_FLOAT128
# endif
#endif

namespace boost { namespace math {

// https://randomascii.wordpress.com/2012/01/23/stupid-float-tricks-2/
// https://blog.regehr.org/archives/959
inline std::int32_t fast_float_distance(float a, float b)
NAThompson marked this conversation as resolved.
Show resolved Hide resolved
{
return boost::math::float_distance(a, b);
}

inline std::int64_t fast_float_distance(double a, double b)
{
return boost::math::float_distance(a, b);
}

#ifdef BOOST_MATH_USE_FAST_FLOAT128
boost::multiprecision::int128_type fast_float_distance(boost::multiprecision::float128_type a, boost::multiprecision::float128_type b)
{
using std::abs;
using std::isfinite;

constexpr boost::multiprecision::float128_type tol = 2 * BOOST_MP_QUAD_MIN;

// 0, very small, and large magnitude distances all need special handling
if (abs(a) == 0 || abs(b) == 0)
{
return 0;
}
else if (abs(a) < tol || abs(b) < tol)
{
BOOST_MATH_THROW_EXCEPTION(std::domain_error("special handling is required for tiny distances. Please use boost::math::float_distance for a slower but safe solution"));
}

if (!(isfinite)(a))
{
BOOST_MATH_THROW_EXCEPTION(std::domain_error("Both arguments to fast_float_distance must be finite"));
}
else if (!(isfinite)(b))
{
BOOST_MATH_THROW_EXCEPTION(std::domain_error("Both arguments to fast_float_distnace must be finite"));
}

static_assert(sizeof(boost::multiprecision::int128_type) == sizeof(boost::multiprecision::float128_type), "float128 is the wrong size");

boost::multiprecision::int128_type ai;
boost::multiprecision::int128_type bi;
std::memcpy(&ai, &a, sizeof(boost::multiprecision::float128_type));
std::memcpy(&bi, &b, sizeof(boost::multiprecision::float128_type));

boost::multiprecision::int128_type result = bi - ai;

if (ai < 0 || bi < 0)
{
result = -result;
}

return result;
}

#elif defined(BOOST_MATH_USE_FAST_STANDALONE_FLOAT128)
__int128 fast_float_distance(__float128 a, __float128 b)
{
constexpr __float128 tol = 2 * static_cast<__float128>(1) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) *
static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) *
static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) *
static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) *
static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) *
static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) / 1073741824;

// 0, very small, and large magnitude distances all need special handling
if (::fabsq(a) == 0 || ::fabsq(b) == 0)
{
return 0;
}
else if (::fabsq(a) < tol || ::fabsq(b) < tol)
{
BOOST_MATH_THROW_EXCEPTION(std::domain_error("special handling is required for tiny distances. Please use boost::math::float_distance for a slower but safe solution"));
}

if (!(::isinfq)(a) && !(::isnanq)(a))
{
BOOST_MATH_THROW_EXCEPTION(std::domain_error("Both arguments to fast_float_distnace must be finite"));
}
else if (!(::isinfq)(b) && !(::isnanq)(b))
{
BOOST_MATH_THROW_EXCEPTION(std::domain_error("Both arguments to fast_float_distnace must be finite"));
}

static_assert(sizeof(__int128) == sizeof(__float128));

__int128 ai;
__int128 bi;
std::memcpy(&ai, &a, sizeof(__float128));
std::memcpy(&bi, &b, sizeof(__float128));

__int128 result = bi - ai;

if (ai < 0 || bi < 0)
{
result = -result;
}

return result;
}
#endif

}} // Namespaces

#endif
101 changes: 100 additions & 1 deletion include/boost/math/special_functions/next.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@
#include <boost/math/special_functions/sign.hpp>
#include <boost/math/special_functions/trunc.hpp>
#include <boost/math/tools/traits.hpp>
#include <boost/math/tools/config.hpp>
#include <type_traits>
#include <cfloat>

#include <cstdint>
#include <cstring>

#if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
#if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__)
Expand Down Expand Up @@ -717,6 +719,103 @@ typename tools::promote_args<T, U>::type float_distance(const T& a, const U& b)
return boost::math::float_distance(a, b, policies::policy<>());
}

// https://randomascii.wordpress.com/2012/01/23/stupid-float-tricks-2/
// https://blog.regehr.org/archives/959
inline std::int32_t float_distance(float a, float b)
{
using std::abs;
using std::isfinite;
constexpr auto tol = 2 * (std::numeric_limits<float>::min)();

// 0, very small, and large magnitude distances all need special handling
if (abs(a) == 0 || abs(b) == 0)
{
return static_cast<std::int32_t>(float_distance(a, b, policies::policy<>()));
}
else if (abs(a) < tol || abs(b) < tol)
{
return static_cast<std::int32_t>(float_distance(a, b, policies::policy<>()));
}

static const char* function = "float_distance<%1%>(%1%, %1%)";
if(!(boost::math::isfinite)(a))
{
return policies::raise_domain_error<float>(
function,
"Argument a must be finite, but got %1%", a, policies::policy<>());
}
if(!(boost::math::isfinite)(b))
{
return policies::raise_domain_error<float>(
function,
"Argument b must be finite, but got %1%", b, policies::policy<>());
}

static_assert(sizeof(float) == sizeof(std::int32_t), "float is incorrect size.");

std::int32_t ai;
std::int32_t bi;
std::memcpy(&ai, &a, sizeof(float));
std::memcpy(&bi, &b, sizeof(float));

auto result = bi - ai;

if (ai < 0 || bi < 0)
{
result = -result;
}

return result;
}

inline std::int64_t float_distance(double a, double b)
{
using std::abs;
using std::isfinite;
constexpr auto tol = 2 * (std::numeric_limits<double>::min)();

// 0, very small, and large magnitude distances all need special handling
if (abs(a) == 0 || abs(b) == 0)
{
return static_cast<std::int64_t>(float_distance(a, b, policies::policy<>()));
}
else if (abs(a) < tol || abs(b) < tol)
{
return static_cast<std::int64_t>(float_distance(a, b, policies::policy<>()));
}

static const char* function = "float_distance<%1%>(%1%, %1%)";
if(!(boost::math::isfinite)(a))
{
return policies::raise_domain_error<double>(
function,
"Argument a must be finite, but got %1%", a, policies::policy<>());
}
if(!(boost::math::isfinite)(b))
{
return policies::raise_domain_error<double>(
function,
"Argument b must be finite, but got %1%", b, policies::policy<>());
}


static_assert(sizeof(double) == sizeof(std::int64_t), "double is incorrect size.");

std::int64_t ai;
std::int64_t bi;
std::memcpy(&ai, &a, sizeof(double));
std::memcpy(&bi, &b, sizeof(double));

auto result = bi - ai;

if (ai < 0 || bi < 0)
{
result = -result;
}

return result;
}

namespace detail{

template <class T, class Policy>
Expand Down
126 changes: 126 additions & 0 deletions reporting/performance/new_next_performance.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
// (C) Copyright Matt Borland 2022.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <boost/math/special_functions/next.hpp>
#include <benchmark/benchmark.h>

template <typename T>
void float_distance(benchmark::State& state)
{
const auto difference = static_cast<int>(state.range(0));
T left = 2;
T right = boost::math::float_advance(left, difference);

for (auto _ : state)
{
benchmark::DoNotOptimize(boost::math::float_distance(left, right));
}
state.SetComplexityN(state.range(0));
}

BENCHMARK_TEMPLATE(float_distance, float)->RangeMultiplier(2)->Range(1 << 1, 1 << 14)->Complexity()->UseRealTime();
BENCHMARK_TEMPLATE(float_distance, double)->RangeMultiplier(2)->Range(1 << 1, 1 << 14)->Complexity()->UseRealTime();

BENCHMARK_MAIN();

/*
Run on Apple M1 Pro Arch using Apple Clang 14.0.0 (15OCT22)

Original performance (Boost 1.80.0):

Unable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory
This does not affect benchmark measurements, only the metadata output.
2022-10-15T15:24:07-07:00
Running ./new_next_performance
Run on (10 X 24.0916 MHz CPU s)
CPU Caches:
L1 Data 64 KiB
L1 Instruction 128 KiB
L2 Unified 4096 KiB (x10)
Load Average: 1.86, 2.53, 5.83
---------------------------------------------------------------------------------
Benchmark Time CPU Iterations
---------------------------------------------------------------------------------
float_distance<float>/2/real_time 61.4 ns 61.4 ns 9074469
float_distance<float>/4/real_time 61.7 ns 61.7 ns 11384150
float_distance<float>/8/real_time 61.4 ns 61.4 ns 10814604
float_distance<float>/16/real_time 61.7 ns 61.7 ns 11348376
float_distance<float>/32/real_time 61.4 ns 61.4 ns 11387167
float_distance<float>/64/real_time 61.6 ns 61.6 ns 11131932
float_distance<float>/128/real_time 61.4 ns 61.4 ns 11382029
float_distance<float>/256/real_time 61.4 ns 61.4 ns 11307649
float_distance<float>/512/real_time 61.4 ns 61.4 ns 11376048
float_distance<float>/1024/real_time 61.4 ns 61.4 ns 11355748
float_distance<float>/2048/real_time 61.8 ns 61.8 ns 11373776
float_distance<float>/4096/real_time 61.4 ns 61.4 ns 11382368
float_distance<float>/8192/real_time 61.4 ns 61.4 ns 11353453
float_distance<float>/16384/real_time 61.4 ns 61.4 ns 11378298
float_distance<float>/real_time_BigO 61.48 (1) 61.47 (1)
float_distance<float>/real_time_RMS 0 % 0 %
float_distance<double>/2/real_time 55.6 ns 55.6 ns 12580218
float_distance<double>/4/real_time 55.6 ns 55.6 ns 12577835
float_distance<double>/8/real_time 55.6 ns 55.6 ns 12564909
float_distance<double>/16/real_time 56.2 ns 56.2 ns 12554909
float_distance<double>/32/real_time 56.0 ns 56.0 ns 12544381
float_distance<double>/64/real_time 55.6 ns 55.6 ns 12566488
float_distance<double>/128/real_time 55.6 ns 55.6 ns 12499581
float_distance<double>/256/real_time 55.6 ns 55.6 ns 12565661
float_distance<double>/512/real_time 56.1 ns 56.1 ns 12550023
float_distance<double>/1024/real_time 55.8 ns 55.8 ns 12568603
float_distance<double>/2048/real_time 55.6 ns 55.6 ns 12546049
float_distance<double>/4096/real_time 55.6 ns 55.6 ns 12528525
float_distance<double>/8192/real_time 55.9 ns 55.9 ns 12563030
float_distance<double>/16384/real_time 56.0 ns 56.0 ns 12447644
float_distance<double>/real_time_BigO 55.78 (1) 55.78 (1)
float_distance<double>/real_time_RMS 0 % 0 %

New performance:

Unable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory
This does not affect benchmark measurements, only the metadata output.
2022-10-15T15:31:37-07:00
Running ./new_next_performance
Run on (10 X 24.122 MHz CPU s)
CPU Caches:
L1 Data 64 KiB
L1 Instruction 128 KiB
L2 Unified 4096 KiB (x10)
Load Average: 2.12, 2.17, 4.26
---------------------------------------------------------------------------------
Benchmark Time CPU Iterations
---------------------------------------------------------------------------------
float_distance<float>/2/real_time 15.8 ns 15.8 ns 42162717
float_distance<float>/4/real_time 15.9 ns 15.9 ns 44213877
float_distance<float>/8/real_time 15.8 ns 15.8 ns 43972542
float_distance<float>/16/real_time 15.8 ns 15.8 ns 44209456
float_distance<float>/32/real_time 15.8 ns 15.8 ns 44200244
float_distance<float>/64/real_time 15.8 ns 15.8 ns 44239293
float_distance<float>/128/real_time 15.8 ns 15.8 ns 44171202
float_distance<float>/256/real_time 15.8 ns 15.8 ns 44241507
float_distance<float>/512/real_time 15.9 ns 15.8 ns 44230034
float_distance<float>/1024/real_time 15.8 ns 15.8 ns 44241554
float_distance<float>/2048/real_time 15.8 ns 15.8 ns 44220802
float_distance<float>/4096/real_time 15.8 ns 15.8 ns 44220441
float_distance<float>/8192/real_time 15.9 ns 15.9 ns 44213994
float_distance<float>/16384/real_time 15.8 ns 15.8 ns 44215413
float_distance<float>/real_time_BigO 15.83 (1) 15.83 (1)
float_distance<float>/real_time_RMS 0 % 0 %
float_distance<double>/2/real_time 15.5 ns 15.5 ns 45098165
float_distance<double>/4/real_time 15.6 ns 15.6 ns 45065465
float_distance<double>/8/real_time 15.5 ns 15.5 ns 45058733
float_distance<double>/16/real_time 15.8 ns 15.7 ns 45078404
float_distance<double>/32/real_time 15.5 ns 15.5 ns 44832734
float_distance<double>/64/real_time 15.5 ns 15.5 ns 45077303
float_distance<double>/128/real_time 15.5 ns 15.5 ns 45067255
float_distance<double>/256/real_time 15.5 ns 15.5 ns 45073844
float_distance<double>/512/real_time 15.6 ns 15.6 ns 45109342
float_distance<double>/1024/real_time 15.5 ns 15.5 ns 44845180
float_distance<double>/2048/real_time 15.5 ns 15.5 ns 45051846
float_distance<double>/4096/real_time 15.5 ns 15.5 ns 45064317
float_distance<double>/8192/real_time 15.5 ns 15.5 ns 45115653
float_distance<double>/16384/real_time 15.5 ns 15.5 ns 45067642
float_distance<double>/real_time_BigO 15.54 (1) 15.54 (1)
float_distance<double>/real_time_RMS 0 % 0 %
*/
1 change: 1 addition & 0 deletions test/Jamfile.v2
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,7 @@ test-suite special_fun :
[ run test_ldouble_simple.cpp ../../test/build//boost_unit_test_framework ]
# Needs to run in release mode, as it's rather slow:
[ run test_next.cpp pch ../../test/build//boost_unit_test_framework : : : release ]
[ run test_fast_float_distance.cpp ../../test/build//boost_unit_test_framework : : : release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" : <build>no ] ]
[ run test_next_decimal.cpp pch ../../test/build//boost_unit_test_framework : : : release ]
[ run test_owens_t.cpp ../../test/build//boost_unit_test_framework ]
[ run test_polygamma.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ]
Expand Down
Loading