Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 0649148

Browse files
committedApr 20, 2025
working
1 parent 41163f3 commit 0649148

File tree

2 files changed

+224
-93
lines changed

2 files changed

+224
-93
lines changed
 

‎check_constants.c

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
#include <stdio.h>
2+
#include <stdint.h>
3+
#include <string.h> // For memcpy
4+
#include <math.h> // For M_PI if needed, though PI is defined below
5+
6+
// Function to print the bit representation of a double
7+
void print_double_bits(const char *name, double val) {
8+
uint64_t bits;
9+
// Use memcpy to safely copy the bits, avoiding potential strict-aliasing issues
10+
memcpy(&bits, &val, sizeof(double));
11+
printf("%s = %.17g (0x%016lx)\n", name, val, bits);
12+
}
13+
14+
int main() {
15+
// Constants from gamma.rs
16+
const double PI = 3.141592653589793238462643383279502884197;
17+
const double LOG_PI = 1.144729885849400174143427351353058711647;
18+
const double LANCZOS_G = 6.024680040776729583740234375;
19+
const double LANCZOS_G_MINUS_HALF = 5.524680040776729583740234375;
20+
21+
const int LANCZOS_N = 13;
22+
const double LANCZOS_NUM_COEFFS[LANCZOS_N] = {
23+
23531376880.410759688572007674451636754734846804940,
24+
42919803642.649098768957899047001988850926355848959,
25+
35711959237.355668049440185451547166705960488635843,
26+
17921034426.037209699919755754458931112671403265390,
27+
6039542586.3520280050642916443072979210699388420708,
28+
1439720407.3117216736632230727949123939715485786772,
29+
248874557.86205415651146038641322942321632125127801,
30+
31426415.585400194380614231628318205362874684987640,
31+
2876370.6289353724412254090516208496135991145378768,
32+
186056.26539522349504029498971604569928220784236328,
33+
8071.6720023658162106380029022722506138218516325024,
34+
210.82427775157934587250973392071336271166969580291,
35+
2.5066282746310002701649081771338373386264310793408,
36+
};
37+
const double LANCZOS_DEN_COEFFS[LANCZOS_N] = {
38+
0.0,
39+
39916800.0,
40+
120543840.0,
41+
150917976.0,
42+
105258076.0,
43+
45995730.0,
44+
13339535.0,
45+
2637558.0,
46+
357423.0,
47+
32670.0,
48+
1925.0,
49+
66.0,
50+
1.0,
51+
};
52+
53+
const int NGAMMA_INTEGRAL = 23;
54+
const double GAMMA_INTEGRAL[NGAMMA_INTEGRAL] = {
55+
1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0,
56+
362880.0, 3628800.0, 39916800.0, 479001600.0, 6227020800.0,
57+
87178291200.0, 1307674368000.0, 20922789888000.0,
58+
355687428096000.0, 6402373705728000.0, 121645100408832000.0,
59+
2432902008176640000.0, 51090942171709440000.0,
60+
1124000727777607680000.0,
61+
};
62+
63+
printf("--- Single Constants ---\n");
64+
print_double_bits("PI", PI); // Added PI for completeness
65+
print_double_bits("LOG_PI", LOG_PI);
66+
print_double_bits("LANCZOS_G", LANCZOS_G);
67+
print_double_bits("LANCZOS_G_MINUS_HALF", LANCZOS_G_MINUS_HALF);
68+
69+
printf("\n--- LANCZOS_NUM_COEFFS ---\n");
70+
for (int i = 0; i < LANCZOS_N; ++i) {
71+
char name[32];
72+
snprintf(name, sizeof(name), "LANCZOS_NUM_COEFFS[%d]", i);
73+
print_double_bits(name, LANCZOS_NUM_COEFFS[i]);
74+
}
75+
76+
printf("\n--- LANCZOS_DEN_COEFFS ---\n");
77+
for (int i = 0; i < LANCZOS_N; ++i) {
78+
char name[32];
79+
snprintf(name, sizeof(name), "LANCZOS_DEN_COEFFS[%d]", i);
80+
print_double_bits(name, LANCZOS_DEN_COEFFS[i]);
81+
}
82+
83+
printf("\n--- GAMMA_INTEGRAL ---\n");
84+
for (int i = 0; i < NGAMMA_INTEGRAL; ++i) {
85+
char name[32];
86+
snprintf(name, sizeof(name), "GAMMA_INTEGRAL[%d]", i);
87+
print_double_bits(name, GAMMA_INTEGRAL[i]);
88+
}
89+
90+
return 0;
91+
}

‎src/gamma.rs

Lines changed: 133 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,82 @@
11
use crate::Error;
2-
use std::f64::consts::PI;
2+
// use std::f64::consts::PI;
33

4-
const LOG_PI: f64 = 1.144729885849400174143427351353058711647;
4+
5+
// Import C library functions properly
6+
unsafe extern "C" {
7+
fn exp(x: f64) -> f64;
8+
fn log(x: f64) -> f64;
9+
fn sin(x: f64) -> f64;
10+
fn cos(x: f64) -> f64;
11+
fn pow(x: f64, y: f64) -> f64;
12+
fn floor(x: f64) -> f64;
13+
fn fabs(x: f64) -> f64;
14+
fn round(x: f64) -> f64;
15+
fn fmod(x: f64, y: f64) -> f64;
16+
}
17+
18+
const PI: f64 = f64::from_bits(0x400921fb54442d18); // = 3.141592653589793238462643383279502884197;
19+
const LOG_PI: f64 = f64::from_bits(0x3ff250d048e7a1bd); // = 1.144729885849400174143427351353058711647;
520

621
const LANCZOS_N: usize = 13;
7-
const LANCZOS_G: f64 = 6.024680040776729583740234375;
8-
const LANCZOS_G_MINUS_HALF: f64 = 5.524680040776729583740234375;
22+
const LANCZOS_G: f64 = f64::from_bits(0x40181945b9800000); // = 6.024680040776729583740234375;
23+
const LANCZOS_G_MINUS_HALF: f64 = f64::from_bits(0x40161945b9800000); // = 5.524680040776729583740234375;
924
const LANCZOS_NUM_COEFFS: [f64; LANCZOS_N] = [
10-
23531376880.410759688572007674451636754734846804940,
11-
42919803642.649098768957899047001988850926355848959,
12-
35711959237.355668049440185451547166705960488635843,
13-
17921034426.037209699919755754458931112671403265390,
14-
6039542586.3520280050642916443072979210699388420708,
15-
1439720407.3117216736632230727949123939715485786772,
16-
248874557.86205415651146038641322942321632125127801,
17-
31426415.585400194380614231628318205362874684987640,
18-
2876370.6289353724412254090516208496135991145378768,
19-
186056.26539522349504029498971604569928220784236328,
20-
8071.6720023658162106380029022722506138218516325024,
21-
210.82427775157934587250973392071336271166969580291,
22-
2.5066282746310002701649081771338373386264310793408,
25+
f64::from_bits(0x4215ea5143c1a49e), // 23531376880.410759
26+
f64::from_bits(0x4223fc7075f54c57), // 42919803642.649101
27+
f64::from_bits(0x4220a132818ab61a), // 35711959237.355667
28+
f64::from_bits(0x4210b0b522e8261a), // 17921034426.037209
29+
f64::from_bits(0x41f67fc1b3a5a1e8), // 6039542586.3520279
30+
f64::from_bits(0x41d57418f5d3f33f), // 1439720407.3117216
31+
f64::from_bits(0x41adab0c7bb95f2a), // 248874557.86205417
32+
f64::from_bits(0x417df876f95dcc98), // 31426415.585400194
33+
f64::from_bits(0x4145f1e95080f44c), // 2876370.6289353725
34+
f64::from_bits(0x4106b6421f8787eb), // 186056.26539522348
35+
f64::from_bits(0x40bf87ac0858d804), // 8071.6720023658163
36+
f64::from_bits(0x406a5a607bbc3b52), // 210.82427775157936
37+
f64::from_bits(0x40040d931ff62705), // 2.5066282746310002
2338
];
2439
const LANCZOS_DEN_COEFFS: [f64; LANCZOS_N] = [
25-
0.0,
26-
39916800.0,
27-
120543840.0,
28-
150917976.0,
29-
105258076.0,
30-
45995730.0,
31-
13339535.0,
32-
2637558.0,
33-
357423.0,
34-
32670.0,
35-
1925.0,
36-
66.0,
37-
1.0,
40+
f64::from_bits(0x0000000000000000), // 0.0
41+
f64::from_bits(0x418308a800000000), // 39916800.0
42+
f64::from_bits(0x419cbd6980000000), // 120543840.0
43+
f64::from_bits(0x41a1fda6b0000000), // 150917976.0
44+
f64::from_bits(0x4199187170000000), // 105258076.0
45+
f64::from_bits(0x4185eeb690000000), // 45995730.0
46+
f64::from_bits(0x41697171e0000000), // 13339535.0
47+
f64::from_bits(0x41441f7b00000000), // 2637558.0
48+
f64::from_bits(0x4115d0bc00000000), // 357423.0
49+
f64::from_bits(0x40dfe78000000000), // 32670.0
50+
f64::from_bits(0x409e140000000000), // 1925.0
51+
f64::from_bits(0x4050800000000000), // 66.0
52+
f64::from_bits(0x3ff0000000000000), // 1.0
53+
];
54+
55+
const NGAMMA_INTEGRAL: usize = 23;
56+
const GAMMA_INTEGRAL: [f64; NGAMMA_INTEGRAL] = [
57+
f64::from_bits(0x3ff0000000000000), // 1.0
58+
f64::from_bits(0x3ff0000000000000), // 1.0
59+
f64::from_bits(0x4000000000000000), // 2.0
60+
f64::from_bits(0x4018000000000000), // 6.0
61+
f64::from_bits(0x4038000000000000), // 24.0
62+
f64::from_bits(0x405e000000000000), // 120.0
63+
f64::from_bits(0x4086800000000000), // 720.0
64+
f64::from_bits(0x40b3b00000000000), // 5040.0
65+
f64::from_bits(0x40e3b00000000000), // 40320.0
66+
f64::from_bits(0x4116260000000000), // 362880.0
67+
f64::from_bits(0x414baf8000000000), // 3628800.0
68+
f64::from_bits(0x418308a800000000), // 39916800.0
69+
f64::from_bits(0x41bc8cfc00000000), // 479001600.0
70+
f64::from_bits(0x41f7328cc0000000), // 6227020800.0
71+
f64::from_bits(0x42344c3b28000000), // 87178291200.0
72+
f64::from_bits(0x4273077775800000), // 1307674368000.0
73+
f64::from_bits(0x42b3077775800000), // 20922789888000.0
74+
f64::from_bits(0x42f437eeecd80000), // 355687428096000.0
75+
f64::from_bits(0x4336beecca730000), // 6402373705728000.0
76+
f64::from_bits(0x437b02b930689000), // 1.21645100408832e+17
77+
f64::from_bits(0x43c0e1b3be415a00), // 2.43290200817664e+18
78+
f64::from_bits(0x4406283be9b5c620), // 5.109094217170944e+19
79+
f64::from_bits(0x444e77526159f06c), // 1.1240007277776077e+21
3880
];
3981

4082
fn lanczos_sum(x: f64) -> f64 {
@@ -65,50 +107,23 @@ fn lanczos_sum(x: f64) -> f64 {
65107
fn m_sinpi(x: f64) -> f64 {
66108
// this function should only ever be called for finite arguments
67109
debug_assert!(x.is_finite());
68-
let y = x.abs() % 2.0;
69-
let n = (2.0 * y).round() as i32;
110+
let y = unsafe { fmod(fabs(x), 2.0) };
111+
let n = unsafe { round(2.0 * y) } as i32;
70112
let r = match n {
71-
0 => (PI * y).sin(),
72-
1 => (PI * (y - 0.5)).cos(),
113+
0 => unsafe { sin(PI * y) },
114+
1 => unsafe { cos(PI * (y - 0.5)) },
73115
2 => {
74116
// N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give
75117
// -0.0 instead of 0.0 when y == 1.0.
76-
(PI * (1.0 - y)).sin()
118+
unsafe { sin(PI * (1.0 - y)) }
77119
}
78-
3 => -(PI * (y - 1.5)).cos(),
79-
4 => (PI * (y - 2.0)).sin(),
120+
3 => unsafe { -cos(PI * (y - 1.5)) },
121+
4 => unsafe { sin(PI * (y - 2.0)) },
80122
_ => unreachable!(),
81123
};
82124
(1.0f64).copysign(x) * r
83125
}
84126

85-
const NGAMMA_INTEGRAL: usize = 23;
86-
const GAMMA_INTEGRAL: [f64; NGAMMA_INTEGRAL] = [
87-
1.0,
88-
1.0,
89-
2.0,
90-
6.0,
91-
24.0,
92-
120.0,
93-
720.0,
94-
5040.0,
95-
40320.0,
96-
362880.0,
97-
3628800.0,
98-
39916800.0,
99-
479001600.0,
100-
6227020800.0,
101-
87178291200.0,
102-
1307674368000.0,
103-
20922789888000.0,
104-
355687428096000.0,
105-
6402373705728000.0,
106-
121645100408832000.0,
107-
2432902008176640000.0,
108-
51090942171709440000.0,
109-
1124000727777607680000.0,
110-
];
111-
112127
pub fn tgamma(x: f64) -> Result<f64, Error> {
113128
// special cases
114129
if !x.is_finite() {
@@ -130,7 +145,7 @@ pub fn tgamma(x: f64) -> Result<f64, Error> {
130145
return Err((v, Error::EDOM).1);
131146
}
132147
// integer arguments
133-
if x == x.floor() {
148+
if x == unsafe { floor(x) } {
134149
if x < 0.0 {
135150
// tgamma(n) = nan, invalid for
136151
return Err((f64::NAN, Error::EDOM).1);
@@ -139,7 +154,7 @@ pub fn tgamma(x: f64) -> Result<f64, Error> {
139154
return Ok(GAMMA_INTEGRAL[x as usize - 1]);
140155
}
141156
}
142-
let absx = x.abs();
157+
let absx = unsafe { fabs(x) };
143158
// tiny arguments: tgamma(x) ~ 1/x for x near 0
144159
if absx < 1e-20 {
145160
let r = 1.0 / x;
@@ -173,28 +188,38 @@ pub fn tgamma(x: f64) -> Result<f64, Error> {
173188
q - absx
174189
};
175190
let z = z * LANCZOS_G / y;
176-
let mut r = if x < 0.0 {
177-
let mut r = -PI / m_sinpi(absx) / absx * y.exp() / lanczos_sum(absx);
191+
let r = if x < 0.0 {
192+
// Using C's math functions through libc to match CPython
193+
let term1 = -PI / m_sinpi(absx);
194+
let term2 = term1 / absx;
195+
let exp_y = unsafe { exp(y) };
196+
let term3 = term2 * exp_y;
197+
let lanczos = lanczos_sum(absx);
198+
let mut r = term3 / lanczos;
178199
r -= z * r;
200+
179201
if absx < 140.0 {
180-
r /= y.powf(absx - 0.5);
202+
unsafe { r / pow(y, absx - 0.5) }
181203
} else {
182-
let sqrtpow = y.powf(absx / 2.0 - 0.25);
204+
let sqrtpow = unsafe { pow(y, absx / 2.0 - 0.25) };
183205
r /= sqrtpow;
184206
r /= sqrtpow;
207+
r
185208
}
186-
r
187209
} else {
188-
let mut r = lanczos_sum(absx) / y.exp();
210+
let lanczos = lanczos_sum(absx);
211+
let exp_y = unsafe { exp(y) };
212+
let mut r = lanczos / exp_y;
189213
r += z * r;
214+
190215
if absx < 140.0 {
191-
r *= y.powf(absx - 0.5);
216+
unsafe { r * pow(y, absx - 0.5) }
192217
} else {
193-
let sqrtpow = y.powf(absx / 2.0 - 0.25);
218+
let sqrtpow = unsafe { pow(y, absx / 2.0 - 0.25) };
194219
r *= sqrtpow;
195220
r *= sqrtpow;
221+
r
196222
}
197-
r
198223
};
199224

200225
if r.is_infinite() {
@@ -217,7 +242,7 @@ pub fn lgamma(x: f64) -> Result<f64, Error> {
217242
}
218243

219244
// integer arguments
220-
if x == x.floor() && x <= 2.0 {
245+
if x == unsafe { floor(x) } && x <= 2.0 {
221246
if x <= 0.0 {
222247
// lgamma(n) = inf, divide-by-zero for integers n <= 0
223248
return Err(Error::EDOM);
@@ -227,30 +252,42 @@ pub fn lgamma(x: f64) -> Result<f64, Error> {
227252
}
228253
}
229254

230-
let absx = x.abs();
255+
let absx = unsafe { fabs(x) };
231256
// tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x
232257
if absx < 1e-20 {
233-
return Ok(-absx.ln());
258+
return Ok(-unsafe { log(absx) });
234259
}
235260

236-
// Lanczos' formula. We could save a fraction of a ulp in accuracy by
237-
// having a second set of numerator coefficients for lanczos_sum that
238-
// absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
239-
// subtraction below; it's probably not worth it.
240-
let mut r = lanczos_sum(absx).ln() - LANCZOS_G;
241-
r += (absx - 0.5) * ((absx + LANCZOS_G - 0.5).ln() - 1.0);
261+
// Using C's math functions through libc to match CPython
262+
let lanczos_sum_val = lanczos_sum(absx);
263+
let log_lanczos = unsafe { log(lanczos_sum_val) };
264+
265+
// Subtract lanczos_g as a separate step
266+
let mut r = log_lanczos - LANCZOS_G;
267+
268+
// Calculate (absx - 0.5) term
269+
let factor = absx - 0.5;
270+
271+
// Calculate log term
272+
let log_term = unsafe { log(absx + LANCZOS_G - 0.5) };
273+
274+
// Calculate the multiplication and subtraction
275+
let step2 = factor * (log_term - 1.0);
276+
277+
// Combine the results
278+
r += step2;
242279

243280
if x < 0.0 {
244-
// Use reflection formula to get value for negative x
245-
246-
// Calculate the sin(pi * x) value using m_sinpi
281+
// Calculate each component separately as in CPython
247282
let sinpi_val = m_sinpi(absx);
248-
249-
// In CPython, the expression is:
250-
// r = logpi - log(fabs(m_sinpi(absx))) - log(absx) - r;
251-
// We'll match this order exactly
252-
r = LOG_PI - sinpi_val.abs().ln() - absx.ln() - r;
283+
let abs_sinpi = unsafe { fabs(sinpi_val) };
284+
let log_abs_sinpi = unsafe { log(abs_sinpi) };
285+
let log_absx = unsafe { log(absx) };
286+
287+
// Combine in exactly the same order as CPython
288+
r = LOG_PI - log_abs_sinpi - log_absx - r;
253289
}
290+
254291
if r.is_infinite() {
255292
return Err(Error::ERANGE);
256293
}
@@ -342,6 +379,8 @@ mod tests {
342379
let py_lgamma_repr = unsafe { std::mem::transmute::<f64, u64>(py_lgamma) };
343380
let rs_lgamma_repr = unsafe { std::mem::transmute::<f64, u64>(rs_lgamma) };
344381
println!("Bit difference: {}", py_lgamma_repr ^ rs_lgamma_repr);
382+
383+
assert_eq!(py_lgamma_repr, rs_lgamma_repr);
345384
});
346385
}
347386

@@ -384,8 +423,9 @@ mod tests {
384423
};
385424
let py_lgamma_repr = unsafe { std::mem::transmute::<f64, i64>(py_lgamma) };
386425
let rs_lgamma_repr = unsafe { std::mem::transmute::<f64, i64>(rs_lgamma) };
426+
assert_eq!(py_lgamma_repr, rs_lgamma_repr, "x = {x}, py_lgamma = {py_lgamma}, rs_lgamma = {rs_lgamma}");
387427
// allow 6 bit error for now
388-
assert!((py_lgamma_repr - rs_lgamma_repr).abs() <= 6, "x = {x} diff: {}, py_lgamma = {py_lgamma} ({py_lgamma_repr:x}), rs_lgamma = {rs_lgamma} ({rs_lgamma_repr:x})", py_lgamma_repr ^ rs_lgamma_repr);
428+
// assert!((py_lgamma_repr - rs_lgamma_repr).abs() <= 6, "x = {x} diff: {}, py_lgamma = {py_lgamma} ({py_lgamma_repr:x}), rs_lgamma = {rs_lgamma} ({rs_lgamma_repr:x})", py_lgamma_repr ^ rs_lgamma_repr);
389429
});
390430
}
391431
}

0 commit comments

Comments
 (0)
Please sign in to comment.