Skip to content

Commit deea195

Browse files
committed
mul_add feature + macos CI
1 parent d991c6e commit deea195

File tree

3 files changed

+23
-7
lines changed

3 files changed

+23
-7
lines changed

.github/workflows/rust.yml

+6-4
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
os: [
2020
ubuntu-latest,
2121
windows-latest,
22-
# macos-latest # disabled due to incompatibility. See issue #1
22+
macos-latest,
2323
]
2424
rust: [stable]
2525
steps:
@@ -38,6 +38,8 @@ jobs:
3838
- name: Build
3939
run: cargo build --verbose
4040
- name: Run tests
41-
run: cargo test --verbose
42-
- name: Run tests on Release
43-
run: cargo test --release --verbose
41+
if: matrix.os != 'macos-latest'
42+
run: cargo test --verbose && cargo test --release --verbose
43+
- name: Run tests with FMA (macOS)
44+
if: matrix.os == 'macos-latest'
45+
run: cargo test --verbose --features mul_add && cargo test --release --verbose --features mul_add

Cargo.toml

+5
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@ name = "pymath"
33
version = "0.1.0"
44
edition = "2024"
55

6+
[features]
7+
# Turning on this feature on aarch64-apple-darwin helps bit representation compatibility
8+
# See also: https://github.com/python/cpython/issues/132763
9+
mul_add = []
10+
611
[dev-dependencies]
712
proptest = "1.6.0"
813
pyo3 = { version = "0.24", features = ["abi3"] }

src/gamma.rs

+12-3
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,14 @@ const LANCZOS_DEN_COEFFS: [f64; LANCZOS_N] = [
3737
1.0,
3838
];
3939

40+
fn mul_add(a: f64, b: f64, c: f64) -> f64 {
41+
if cfg!(feature = "mul_add") {
42+
a.mul_add(b, c)
43+
} else {
44+
a * b + c
45+
}
46+
}
47+
4048
fn lanczos_sum(x: f64) -> f64 {
4149
let mut num = 0.0;
4250
let mut den = 0.0;
@@ -50,8 +58,8 @@ fn lanczos_sum(x: f64) -> f64 {
5058
// this resulted in lower accuracy.
5159
if x < 5.0 {
5260
for i in (0..LANCZOS_N).rev() {
53-
num = num * x + LANCZOS_NUM_COEFFS[i];
54-
den = den * x + LANCZOS_DEN_COEFFS[i];
61+
num = mul_add(num, x, LANCZOS_NUM_COEFFS[i]);
62+
den = mul_add(den, x, LANCZOS_DEN_COEFFS[i]);
5563
}
5664
} else {
5765
for i in 0..LANCZOS_N {
@@ -237,7 +245,8 @@ pub fn lgamma(x: f64) -> Result<f64, Error> {
237245
// absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
238246
// subtraction below; it's probably not worth it.
239247
let mut r = lanczos_sum(absx).ln() - LANCZOS_G;
240-
r += (absx - 0.5) * ((absx + LANCZOS_G - 0.5).ln() - 1.0);
248+
let t = absx - 0.5;
249+
r = mul_add(t, (absx + LANCZOS_G - 0.5).ln() - 1.0, r);
241250

242251
if x < 0.0 {
243252
// Use reflection formula to get value for negative x

0 commit comments

Comments
 (0)