Skip to content

Commit

Permalink
Improve use of intrinsics for calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
mborland committed May 25, 2022
1 parent 720573a commit 49881be
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 6 deletions.
1 change: 1 addition & 0 deletions doc/sf/ccmath.qbk
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ All of the following functions require C++17 or greater.

template <typename Real>
inline constexpr Real fma(Real x, Real y, Real z) noexcept
Requires compiling with fma flag

template <typename Arithmetic1, typename Arithmetic2, typename Arithmetic3>
inline constepxr Promoted fma(Arithmetic1 x, Arithmetic2 y, Arithmetic3 z) noexcept
Expand Down
25 changes: 20 additions & 5 deletions include/boost/math/ccmath/fma.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,19 @@
#include <boost/math/ccmath/isinf.hpp>
#include <boost/math/ccmath/isnan.hpp>

#if __has_include("immintrin.h") && defined(__X86_64__) || defined(__amd64__)
# include "immintrin.h"
# define BOOST_MATH_HAS_IMMINTRIN_H
#endif

namespace boost::math::ccmath {

namespace detail {

template <typename T>
inline constexpr T fma_imp(const T x, const T y, const T z) noexcept
{
#if __GNUC__ < 10
return (x * y) + z;
#else
#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && !defined(__INTEL_LLVM_COMPILER)
if constexpr (std::is_same_v<T, float>)
{
return __builtin_fmaf(x, y, z);
Expand All @@ -35,11 +38,23 @@ inline constexpr T fma_imp(const T x, const T y, const T z) noexcept
{
return __builtin_fmal(x, y, z);
}
else // e.g. Boost.Multiprecision types where no built-in exists
#elif defined(BOOST_MATH_HAS_IMMINTRIN_H)
if constexpr (std::is_same_v<T, float>)
{
return static_cast<float>(_mm_fmadd_ps(x, y, z));
}
else if constexpr (std::is_same_v<T, double>)
{
return static_cast<double>(_mm_fmadd_pd(x, y, z));
}
else if constexpr (std::is_same_v<T, long double>)
{
return (x * y) + z;
return static_cast<long double>(_mm256_fmadd_pd(x, y, z));
}
#endif

// If we can't use compiler intrinsics hope that -fma flag optimizes this call to fma instruction
return (x * y) + z;
}

} // Namespace detail
Expand Down
2 changes: 1 addition & 1 deletion test/ccmath_fma_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <boost/multiprecision/float128.hpp>
#endif

#if !defined(BOOST_MATH_NO_CONSTEXPR_DETECTION) && !defined(BOOST_MATH_USING_BUILTIN_CONSTANT_P)
template <typename T>
constexpr void test()
{
Expand Down Expand Up @@ -49,7 +50,6 @@ constexpr void test()
}
}

#if !defined(BOOST_MATH_NO_CONSTEXPR_DETECTION) && !defined(BOOST_MATH_USING_BUILTIN_CONSTANT_P)
int main()
{
test<float>();
Expand Down

0 comments on commit 49881be

Please sign in to comment.