Skip to content

Commit 0a22aa0

Browse files
authored
Simplify and speed-up math.hypot() and math.dist() (GH-102734)
1 parent 00d1ef7 commit 0a22aa0

File tree

1 file changed

+139
-154
lines changed

1 file changed

+139
-154
lines changed

Modules/mathmodule.c

+139-154
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,113 @@ get_math_module_state(PyObject *module)
9292
return (math_module_state *)state;
9393
}
9494

95+
/*
96+
Double and triple length extended precision algorithms from:
97+
98+
Accurate Sum and Dot Product
99+
by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi
100+
https://doi.org/10.1137/030601818
101+
https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf
102+
103+
*/
104+
105+
typedef struct{ double hi; double lo; } DoubleLength;
106+
107+
static DoubleLength
108+
dl_fast_sum(double a, double b)
109+
{
110+
/* Algorithm 1.1. Compensated summation of two floating point numbers. */
111+
assert(fabs(a) >= fabs(b));
112+
double x = a + b;
113+
double y = (a - x) + b;
114+
return (DoubleLength) {x, y};
115+
}
116+
117+
static DoubleLength
118+
dl_sum(double a, double b)
119+
{
120+
/* Algorithm 3.1 Error-free transformation of the sum */
121+
double x = a + b;
122+
double z = x - a;
123+
double y = (a - (x - z)) + (b - z);
124+
return (DoubleLength) {x, y};
125+
}
126+
127+
#ifndef UNRELIABLE_FMA
128+
129+
static DoubleLength
130+
dl_mul(double x, double y)
131+
{
132+
/* Algorithm 3.5. Error-free transformation of a product */
133+
double z = x * y;
134+
double zz = fma(x, y, -z);
135+
return (DoubleLength) {z, zz};
136+
}
137+
138+
#else
139+
140+
/*
141+
The default implementation of dl_mul() depends on the C math library
142+
having an accurate fma() function as required by § 7.12.13.1 of the
143+
C99 standard.
144+
145+
The UNRELIABLE_FMA option is provided as a slower but accurate
146+
alternative for builds where the fma() function is found wanting.
147+
The speed penalty may be modest (17% slower on an Apple M1 Max),
148+
so don't hesitate to enable this build option.
149+
150+
The algorithms are from the T. J. Dekker paper:
151+
A Floating-Point Technique for Extending the Available Precision
152+
https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
153+
*/
154+
155+
static DoubleLength
156+
dl_split(double x) {
157+
// Dekker (5.5) and (5.6).
158+
double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1
159+
double hi = t - (t - x);
160+
double lo = x - hi;
161+
return (DoubleLength) {hi, lo};
162+
}
163+
164+
static DoubleLength
165+
dl_mul(double x, double y)
166+
{
167+
// Dekker (5.12) and mul12()
168+
DoubleLength xx = dl_split(x);
169+
DoubleLength yy = dl_split(y);
170+
double p = xx.hi * yy.hi;
171+
double q = xx.hi * yy.lo + xx.lo * yy.hi;
172+
double z = p + q;
173+
double zz = p - z + q + xx.lo * yy.lo;
174+
return (DoubleLength) {z, zz};
175+
}
176+
177+
#endif
178+
179+
typedef struct { double hi; double lo; double tiny; } TripleLength;
180+
181+
static const TripleLength tl_zero = {0.0, 0.0, 0.0};
182+
183+
static TripleLength
184+
tl_fma(double x, double y, TripleLength total)
185+
{
186+
/* Algorithm 5.10 with SumKVert for K=3 */
187+
DoubleLength pr = dl_mul(x, y);
188+
DoubleLength sm = dl_sum(total.hi, pr.hi);
189+
DoubleLength r1 = dl_sum(total.lo, pr.lo);
190+
DoubleLength r2 = dl_sum(r1.hi, sm.lo);
191+
return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo};
192+
}
193+
194+
static double
195+
tl_to_d(TripleLength total)
196+
{
197+
DoubleLength last = dl_sum(total.lo, total.hi);
198+
return total.tiny + last.lo + last.hi;
199+
}
200+
201+
95202
/*
96203
sin(pi*x), giving accurate results for all finite x (especially x
97204
integral or close to an integer). This is here for use in the
@@ -2301,6 +2408,7 @@ that are almost always correctly rounded, four techniques are used:
23012408
23022409
* lossless scaling using a power-of-two scaling factor
23032410
* accurate squaring using Veltkamp-Dekker splitting [1]
2411+
or an equivalent with an fma() call
23042412
* compensated summation using a variant of the Neumaier algorithm [2]
23052413
* differential correction of the square root [3]
23062414
@@ -2359,14 +2467,21 @@ algorithm, effectively doubling the number of accurate bits.
23592467
This technique is used in Dekker's SQRT2 algorithm and again in
23602468
Borges' ALGORITHM 4 and 5.
23612469
2362-
Without proof for all cases, hypot() cannot claim to be always
2363-
correctly rounded. However for n <= 1000, prior to the final addition
2364-
that rounds the overall result, the internal accuracy of "h" together
2365-
with its correction of "x / (2.0 * h)" is at least 100 bits. [6]
2366-
Also, hypot() was tested against a Decimal implementation with
2367-
prec=300. After 100 million trials, no incorrectly rounded examples
2368-
were found. In addition, perfect commutativity (all permutations are
2369-
exactly equal) was verified for 1 billion random inputs with n=5. [7]
2470+
The hypot() function is faithfully rounded (less than 1 ulp error)
2471+
and usually correctly rounded (within 1/2 ulp). The squaring
2472+
step is exact. The Neumaier summation computes as if in doubled
2473+
precision (106 bits) and has the advantage that its input squares
2474+
are non-negative so that the condition number of the sum is one.
2475+
The square root with a differential correction is likewise computed
2476+
as if in double precision.
2477+
2478+
For n <= 1000, prior to the final addition that rounds the overall
2479+
result, the internal accuracy of "h" together with its correction of
2480+
"x / (2.0 * h)" is at least 100 bits. [6] Also, hypot() was tested
2481+
against a Decimal implementation with prec=300. After 100 million
2482+
trials, no incorrectly rounded examples were found. In addition,
2483+
perfect commutativity (all permutations are exactly equal) was
2484+
verified for 1 billion random inputs with n=5. [7]
23702485
23712486
References:
23722487
@@ -2383,9 +2498,8 @@ exactly equal) was verified for 1 billion random inputs with n=5. [7]
23832498
static inline double
23842499
vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
23852500
{
2386-
const double T27 = 134217729.0; /* ldexp(1.0, 27) + 1.0) */
2387-
double x, scale, oldcsum, csum = 1.0, frac1 = 0.0, frac2 = 0.0, frac3 = 0.0;
2388-
double t, hi, lo, h;
2501+
double x, h, scale, oldcsum, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
2502+
DoubleLength pr, sm;
23892503
int max_e;
23902504
Py_ssize_t i;
23912505

@@ -2410,54 +2524,21 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
24102524
x *= scale;
24112525
assert(fabs(x) < 1.0);
24122526

2413-
t = x * T27;
2414-
hi = t - (t - x);
2415-
lo = x - hi;
2416-
assert(hi + lo == x);
2417-
2418-
x = hi * hi;
2419-
assert(x <= 1.0);
2420-
assert(fabs(csum) >= fabs(x));
2421-
oldcsum = csum;
2422-
csum += x;
2423-
frac1 += (oldcsum - csum) + x;
2424-
2425-
x = 2.0 * hi * lo;
2426-
assert(fabs(csum) >= fabs(x));
2427-
oldcsum = csum;
2428-
csum += x;
2429-
frac2 += (oldcsum - csum) + x;
2430-
2431-
assert(csum + lo * lo == csum);
2432-
frac3 += lo * lo;
2433-
}
2434-
h = sqrt(csum - 1.0 + (frac1 + frac2 + frac3));
2435-
2436-
x = h;
2437-
t = x * T27;
2438-
hi = t - (t - x);
2439-
lo = x - hi;
2440-
assert (hi + lo == x);
2527+
pr = dl_mul(x, x);
2528+
assert(pr.hi <= 1.0);
24412529

2442-
x = -hi * hi;
2443-
assert(fabs(csum) >= fabs(x));
2444-
oldcsum = csum;
2445-
csum += x;
2446-
frac1 += (oldcsum - csum) + x;
2447-
2448-
x = -2.0 * hi * lo;
2449-
assert(fabs(csum) >= fabs(x));
2450-
oldcsum = csum;
2451-
csum += x;
2452-
frac2 += (oldcsum - csum) + x;
2453-
2454-
x = -lo * lo;
2455-
assert(fabs(csum) >= fabs(x));
2456-
oldcsum = csum;
2457-
csum += x;
2458-
frac3 += (oldcsum - csum) + x;
2459-
2460-
x = csum - 1.0 + (frac1 + frac2 + frac3);
2530+
sm = dl_fast_sum(csum, pr.hi);
2531+
csum = sm.hi;
2532+
frac1 += pr.lo;
2533+
frac2 += sm.lo;
2534+
}
2535+
h = sqrt(csum - 1.0 + (frac1 + frac2));
2536+
pr = dl_mul(-h, h);
2537+
sm = dl_fast_sum(csum, pr.hi);
2538+
csum = sm.hi;
2539+
frac1 += pr.lo;
2540+
frac2 += sm.lo;
2541+
x = csum - 1.0 + (frac1 + frac2);
24612542
return (h + x / (2.0 * h)) / scale;
24622543
}
24632544
/* When max_e < -1023, ldexp(1.0, -max_e) overflows.
@@ -2646,102 +2727,6 @@ long_add_would_overflow(long a, long b)
26462727
return (a > 0) ? (b > LONG_MAX - a) : (b < LONG_MIN - a);
26472728
}
26482729

2649-
/*
2650-
Double and triple length extended precision algorithms from:
2651-
2652-
Accurate Sum and Dot Product
2653-
by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi
2654-
https://doi.org/10.1137/030601818
2655-
https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf
2656-
2657-
*/
2658-
2659-
typedef struct{ double hi; double lo; } DoubleLength;
2660-
2661-
static DoubleLength
2662-
dl_sum(double a, double b)
2663-
{
2664-
/* Algorithm 3.1 Error-free transformation of the sum */
2665-
double x = a + b;
2666-
double z = x - a;
2667-
double y = (a - (x - z)) + (b - z);
2668-
return (DoubleLength) {x, y};
2669-
}
2670-
2671-
#ifndef UNRELIABLE_FMA
2672-
2673-
static DoubleLength
2674-
dl_mul(double x, double y)
2675-
{
2676-
/* Algorithm 3.5. Error-free transformation of a product */
2677-
double z = x * y;
2678-
double zz = fma(x, y, -z);
2679-
return (DoubleLength) {z, zz};
2680-
}
2681-
2682-
#else
2683-
2684-
/*
2685-
The default implementation of dl_mul() depends on the C math library
2686-
having an accurate fma() function as required by § 7.12.13.1 of the
2687-
C99 standard.
2688-
2689-
The UNRELIABLE_FMA option is provided as a slower but accurate
2690-
alternative for builds where the fma() function is found wanting.
2691-
The speed penalty may be modest (17% slower on an Apple M1 Max),
2692-
so don't hesitate to enable this build option.
2693-
2694-
The algorithms are from the T. J. Dekker paper:
2695-
A Floating-Point Technique for Extending the Available Precision
2696-
https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
2697-
*/
2698-
2699-
static DoubleLength
2700-
dl_split(double x) {
2701-
// Dekker (5.5) and (5.6).
2702-
double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1
2703-
double hi = t - (t - x);
2704-
double lo = x - hi;
2705-
return (DoubleLength) {hi, lo};
2706-
}
2707-
2708-
static DoubleLength
2709-
dl_mul(double x, double y)
2710-
{
2711-
// Dekker (5.12) and mul12()
2712-
DoubleLength xx = dl_split(x);
2713-
DoubleLength yy = dl_split(y);
2714-
double p = xx.hi * yy.hi;
2715-
double q = xx.hi * yy.lo + xx.lo * yy.hi;
2716-
double z = p + q;
2717-
double zz = p - z + q + xx.lo * yy.lo;
2718-
return (DoubleLength) {z, zz};
2719-
}
2720-
2721-
#endif
2722-
2723-
typedef struct { double hi; double lo; double tiny; } TripleLength;
2724-
2725-
static const TripleLength tl_zero = {0.0, 0.0, 0.0};
2726-
2727-
static TripleLength
2728-
tl_fma(double x, double y, TripleLength total)
2729-
{
2730-
/* Algorithm 5.10 with SumKVert for K=3 */
2731-
DoubleLength pr = dl_mul(x, y);
2732-
DoubleLength sm = dl_sum(total.hi, pr.hi);
2733-
DoubleLength r1 = dl_sum(total.lo, pr.lo);
2734-
DoubleLength r2 = dl_sum(r1.hi, sm.lo);
2735-
return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo};
2736-
}
2737-
2738-
static double
2739-
tl_to_d(TripleLength total)
2740-
{
2741-
DoubleLength last = dl_sum(total.lo, total.hi);
2742-
return total.tiny + last.lo + last.hi;
2743-
}
2744-
27452730
/*[clinic input]
27462731
math.sumprod
27472732

0 commit comments

Comments
 (0)