From 8c7418ece1bd2ad5143bb8309559a3a6a1a4a12e Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 18 Apr 2025 19:19:24 +0000
Subject: [PATCH] Mark generic functions `#[inline]`

Benchmarks for [1] seemed to indicate that repository organization for
some reason had an effect on performance, even though the exact same
rustc commands were running (though some with a different order). After
investigating more, it appears that dependencies may have an affect on
inlining thresholds for generic functions.

It is surprising that this happens, we more or less expect that public
functions will be standalone but everything they call will be inlined.
To help ensure this, mark all generic functions `#[inline]` if they
should be merged into the public function.

Zulip discussion at [2].

[1]: https://github.com/rust-lang/libm/pull/533
[2]: https://rust-lang.zulipchat.com/#narrow/channel/182449-t-compiler.2Fhelp/topic/Dependencies.20affecting.20codegen/with/513079387
---
 src/math/fma.rs                  | 1 +
 src/math/fma_wide.rs             | 1 +
 src/math/generic/ceil.rs         | 2 ++
 src/math/generic/copysign.rs     | 1 +
 src/math/generic/fabs.rs         | 1 +
 src/math/generic/fdim.rs         | 1 +
 src/math/generic/floor.rs        | 2 ++
 src/math/generic/fmax.rs         | 2 +-
 src/math/generic/fmaximum.rs     | 1 +
 src/math/generic/fmaximum_num.rs | 1 +
 src/math/generic/fmin.rs         | 1 +
 src/math/generic/fminimum.rs     | 1 +
 src/math/generic/fminimum_num.rs | 1 +
 src/math/generic/fmod.rs         | 2 +-
 src/math/generic/mod.rs          | 3 +++
 src/math/generic/rint.rs         | 1 +
 src/math/generic/round.rs        | 1 +
 src/math/generic/scalbn.rs       | 1 +
 src/math/generic/sqrt.rs         | 2 ++
 src/math/generic/trunc.rs        | 2 ++
 src/math/roundeven.rs            | 1 +
 21 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/src/math/fma.rs b/src/math/fma.rs
index 789b0836a..e0b3347ac 100644
--- a/src/math/fma.rs
+++ b/src/math/fma.rs
@@ -29,6 +29,7 @@ pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
 
 /// Fused multiply-add that works when there is not a larger float size available. Computes
 /// `(x * y) + z`.
+#[inline]
 pub fn fma_round<F>(x: F, y: F, z: F, _round: Round) -> FpResult<F>
 where
     F: Float,
diff --git a/src/math/fma_wide.rs b/src/math/fma_wide.rs
index 8e908a14f..08b78b022 100644
--- a/src/math/fma_wide.rs
+++ b/src/math/fma_wide.rs
@@ -28,6 +28,7 @@ pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
 
 /// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`,
 /// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding.
+#[inline]
 pub fn fma_wide_round<F, B>(x: F, y: F, z: F, round: Round) -> FpResult<F>
 where
     F: Float + HFloat<D = B>,
diff --git a/src/math/generic/ceil.rs b/src/math/generic/ceil.rs
index bf7e1d8e2..5c5bb4763 100644
--- a/src/math/generic/ceil.rs
+++ b/src/math/generic/ceil.rs
@@ -10,10 +10,12 @@
 use super::super::support::{FpResult, Status};
 use super::super::{Float, Int, IntTy, MinInt};
 
+#[inline]
 pub fn ceil<F: Float>(x: F) -> F {
     ceil_status(x).val
 }
 
+#[inline]
 pub fn ceil_status<F: Float>(x: F) -> FpResult<F> {
     let zero = IntTy::<F>::ZERO;
 
diff --git a/src/math/generic/copysign.rs b/src/math/generic/copysign.rs
index 04864a359..a61af22f0 100644
--- a/src/math/generic/copysign.rs
+++ b/src/math/generic/copysign.rs
@@ -1,6 +1,7 @@
 use super::super::Float;
 
 /// Copy the sign of `y` to `x`.
+#[inline]
 pub fn copysign<F: Float>(x: F, y: F) -> F {
     let mut ux = x.to_bits();
     let uy = y.to_bits();
diff --git a/src/math/generic/fabs.rs b/src/math/generic/fabs.rs
index 75b473107..0fa0edf9b 100644
--- a/src/math/generic/fabs.rs
+++ b/src/math/generic/fabs.rs
@@ -1,6 +1,7 @@
 use super::super::Float;
 
 /// Absolute value.
+#[inline]
 pub fn fabs<F: Float>(x: F) -> F {
     let abs_mask = !F::SIGN_MASK;
     F::from_bits(x.to_bits() & abs_mask)
diff --git a/src/math/generic/fdim.rs b/src/math/generic/fdim.rs
index bf971cd7d..a63007b19 100644
--- a/src/math/generic/fdim.rs
+++ b/src/math/generic/fdim.rs
@@ -1,5 +1,6 @@
 use super::super::Float;
 
+#[inline]
 pub fn fdim<F: Float>(x: F, y: F) -> F {
     if x <= y { F::ZERO } else { x - y }
 }
diff --git a/src/math/generic/floor.rs b/src/math/generic/floor.rs
index 779955164..243804625 100644
--- a/src/math/generic/floor.rs
+++ b/src/math/generic/floor.rs
@@ -10,10 +10,12 @@
 use super::super::support::{FpResult, Status};
 use super::super::{Float, Int, IntTy, MinInt};
 
+#[inline]
 pub fn floor<F: Float>(x: F) -> F {
     floor_status(x).val
 }
 
+#[inline]
 pub fn floor_status<F: Float>(x: F) -> FpResult<F> {
     let zero = IntTy::<F>::ZERO;
 
diff --git a/src/math/generic/fmax.rs b/src/math/generic/fmax.rs
index 29a031100..bf3f847e8 100644
--- a/src/math/generic/fmax.rs
+++ b/src/math/generic/fmax.rs
@@ -16,7 +16,7 @@
 
 use super::super::Float;
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[inline]
 pub fn fmax<F: Float>(x: F, y: F) -> F {
     let res = if x.is_nan() || x < y { y } else { x };
     // Canonicalize
diff --git a/src/math/generic/fmaximum.rs b/src/math/generic/fmaximum.rs
index 9e8d1739f..387055af2 100644
--- a/src/math/generic/fmaximum.rs
+++ b/src/math/generic/fmaximum.rs
@@ -11,6 +11,7 @@
 
 use super::super::Float;
 
+#[inline]
 pub fn fmaximum<F: Float>(x: F, y: F) -> F {
     let res = if x.is_nan() {
         x
diff --git a/src/math/generic/fmaximum_num.rs b/src/math/generic/fmaximum_num.rs
index 756ef5d9f..f7efdde80 100644
--- a/src/math/generic/fmaximum_num.rs
+++ b/src/math/generic/fmaximum_num.rs
@@ -13,6 +13,7 @@
 
 use super::super::Float;
 
+#[inline]
 pub fn fmaximum_num<F: Float>(x: F, y: F) -> F {
     let res =
         if x.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {
diff --git a/src/math/generic/fmin.rs b/src/math/generic/fmin.rs
index 69fbf85a1..cd3caeee4 100644
--- a/src/math/generic/fmin.rs
+++ b/src/math/generic/fmin.rs
@@ -16,6 +16,7 @@
 
 use super::super::Float;
 
+#[inline]
 pub fn fmin<F: Float>(x: F, y: F) -> F {
     let res = if y.is_nan() || x < y { x } else { y };
     // Canonicalize
diff --git a/src/math/generic/fminimum.rs b/src/math/generic/fminimum.rs
index ee5493880..4ddb36455 100644
--- a/src/math/generic/fminimum.rs
+++ b/src/math/generic/fminimum.rs
@@ -11,6 +11,7 @@
 
 use super::super::Float;
 
+#[inline]
 pub fn fminimum<F: Float>(x: F, y: F) -> F {
     let res = if x.is_nan() {
         x
diff --git a/src/math/generic/fminimum_num.rs b/src/math/generic/fminimum_num.rs
index 966618328..441c204a9 100644
--- a/src/math/generic/fminimum_num.rs
+++ b/src/math/generic/fminimum_num.rs
@@ -13,6 +13,7 @@
 
 use super::super::Float;
 
+#[inline]
 pub fn fminimum_num<F: Float>(x: F, y: F) -> F {
     let res =
         if y.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {
diff --git a/src/math/generic/fmod.rs b/src/math/generic/fmod.rs
index cd23350ea..6414bbd25 100644
--- a/src/math/generic/fmod.rs
+++ b/src/math/generic/fmod.rs
@@ -3,7 +3,7 @@
 
 use super::super::{CastFrom, Float, Int, MinInt};
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[inline]
 pub fn fmod<F: Float>(x: F, y: F) -> F {
     let zero = F::Int::ZERO;
     let one = F::Int::ONE;
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
index 9be185f80..35846351a 100644
--- a/src/math/generic/mod.rs
+++ b/src/math/generic/mod.rs
@@ -1,3 +1,6 @@
+// Note: generic functions are marked `#[inline]` because, even though generic functions are
+// typically inlined, this does not seem to always be the case.
+
 mod ceil;
 mod copysign;
 mod fabs;
diff --git a/src/math/generic/rint.rs b/src/math/generic/rint.rs
index 45d2f3138..9cdeb1185 100644
--- a/src/math/generic/rint.rs
+++ b/src/math/generic/rint.rs
@@ -6,6 +6,7 @@ use super::super::support::{FpResult, Round};
 
 /// IEEE 754-2019 `roundToIntegralExact`, which respects rounding mode and raises inexact if
 /// applicable.
+#[inline]
 pub fn rint_round<F: Float>(x: F, _round: Round) -> FpResult<F> {
     let toint = F::ONE / F::EPSILON;
     let e = x.ex();
diff --git a/src/math/generic/round.rs b/src/math/generic/round.rs
index 8b5138188..01314ac70 100644
--- a/src/math/generic/round.rs
+++ b/src/math/generic/round.rs
@@ -1,6 +1,7 @@
 use super::super::{Float, MinInt};
 use super::{copysign, trunc};
 
+#[inline]
 pub fn round<F: Float>(x: F) -> F {
     let f0p5 = F::from_parts(false, F::EXP_BIAS - 1, F::Int::ZERO); // 0.5
     let f0p25 = F::from_parts(false, F::EXP_BIAS - 2, F::Int::ZERO); // 0.25
diff --git a/src/math/generic/scalbn.rs b/src/math/generic/scalbn.rs
index b2696e5cc..a45db1b4a 100644
--- a/src/math/generic/scalbn.rs
+++ b/src/math/generic/scalbn.rs
@@ -16,6 +16,7 @@ use super::super::{CastFrom, CastInto, Float, IntTy, MinInt};
 /// >
 /// > If the calculation does not overflow or underflow, the returned value is exact and
 /// > independent of the current rounding direction mode.
+#[inline]
 pub fn scalbn<F: Float>(mut x: F, mut n: i32) -> F
 where
     u32: CastInto<F::Int>,
diff --git a/src/math/generic/sqrt.rs b/src/math/generic/sqrt.rs
index 5918025bc..ec9ff22df 100644
--- a/src/math/generic/sqrt.rs
+++ b/src/math/generic/sqrt.rs
@@ -44,6 +44,7 @@
 use super::super::support::{FpResult, IntTy, Round, Status, cold_path};
 use super::super::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt};
 
+#[inline]
 pub fn sqrt<F>(x: F) -> F
 where
     F: Float + SqrtHelper,
@@ -57,6 +58,7 @@ where
     sqrt_round(x, Round::Nearest).val
 }
 
+#[inline]
 pub fn sqrt_round<F>(x: F, _round: Round) -> FpResult<F>
 where
     F: Float + SqrtHelper,
diff --git a/src/math/generic/trunc.rs b/src/math/generic/trunc.rs
index 0fb3fa5ad..25414ecf4 100644
--- a/src/math/generic/trunc.rs
+++ b/src/math/generic/trunc.rs
@@ -4,10 +4,12 @@
 use super::super::support::{FpResult, Status};
 use super::super::{Float, Int, IntTy, MinInt};
 
+#[inline]
 pub fn trunc<F: Float>(x: F) -> F {
     trunc_status(x).val
 }
 
+#[inline]
 pub fn trunc_status<F: Float>(x: F) -> FpResult<F> {
     let mut xi: F::Int = x.to_bits();
     let e: i32 = x.exp_unbiased();
diff --git a/src/math/roundeven.rs b/src/math/roundeven.rs
index ec1738285..6e621d762 100644
--- a/src/math/roundeven.rs
+++ b/src/math/roundeven.rs
@@ -30,6 +30,7 @@ pub fn roundevenf128(x: f128) -> f128 {
     roundeven_impl(x)
 }
 
+#[inline]
 pub fn roundeven_impl<F: Float>(x: F) -> F {
     super::generic::rint_round(x, Round::Nearest).val
 }