From deea19512915a3d20942878ce509c8f5a955610f Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Tue, 22 Apr 2025 07:45:38 +0900 Subject: [PATCH] mul_add feature + macos CI --- .github/workflows/rust.yml | 10 ++++++---- Cargo.toml | 5 +++++ src/gamma.rs | 15 ++++++++++++--- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 98ec583..0b5c9dc 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -19,7 +19,7 @@ jobs: os: [ ubuntu-latest, windows-latest, - # macos-latest # disabled due to incompatibility. See issue #1 + macos-latest, ] rust: [stable] steps: @@ -38,6 +38,8 @@ jobs: - name: Build run: cargo build --verbose - name: Run tests - run: cargo test --verbose - - name: Run tests on Release - run: cargo test --release --verbose + if: matrix.os != 'macos-latest' + run: cargo test --verbose && cargo test --release --verbose + - name: Run tests with FMA (macOS) + if: matrix.os == 'macos-latest' + run: cargo test --verbose --features mul_add && cargo test --release --verbose --features mul_add diff --git a/Cargo.toml b/Cargo.toml index 6347416..e95f84d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,11 @@ name = "pymath" version = "0.1.0" edition = "2024" +[features] +# Turning on this feature on aarch64-apple-darwin helps bit representation compatibility +# See also: https://github.com/python/cpython/issues/132763 +mul_add = [] + [dev-dependencies] proptest = "1.6.0" pyo3 = { version = "0.24", features = ["abi3"] } diff --git a/src/gamma.rs b/src/gamma.rs index 7dd6f98..db96333 100644 --- a/src/gamma.rs +++ b/src/gamma.rs @@ -37,6 +37,14 @@ const LANCZOS_DEN_COEFFS: [f64; LANCZOS_N] = [ 1.0, ]; +fn mul_add(a: f64, b: f64, c: f64) -> f64 { + if cfg!(feature = "mul_add") { + a.mul_add(b, c) + } else { + a * b + c + } +} + fn lanczos_sum(x: f64) -> f64 { let mut num = 0.0; let mut den = 0.0; @@ -50,8 +58,8 @@ fn lanczos_sum(x: f64) -> f64 { // this resulted in lower accuracy. if x < 5.0 { for i in (0..LANCZOS_N).rev() { - num = num * x + LANCZOS_NUM_COEFFS[i]; - den = den * x + LANCZOS_DEN_COEFFS[i]; + num = mul_add(num, x, LANCZOS_NUM_COEFFS[i]); + den = mul_add(den, x, LANCZOS_DEN_COEFFS[i]); } } else { for i in 0..LANCZOS_N { @@ -237,7 +245,8 @@ pub fn lgamma(x: f64) -> Result { // absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g // subtraction below; it's probably not worth it. let mut r = lanczos_sum(absx).ln() - LANCZOS_G; - r += (absx - 0.5) * ((absx + LANCZOS_G - 0.5).ln() - 1.0); + let t = absx - 0.5; + r = mul_add(t, (absx + LANCZOS_G - 0.5).ln() - 1.0, r); if x < 0.0 { // Use reflection formula to get value for negative x