Skip to content

Commit b250211

Browse files
committed
Use macros for more division/array checks
This commit moves over more array accesses to the `i!` macro to avoid bounds checks when debug assertions are disabled. This is surfaced from rust-lang/compiler-builtins#360 where recent changes in codegen units has caused some bounds checks to not get elided in release mode. This also adds a `div!` macro to work around rust-lang/rust#72751.
1 parent 3d729b7 commit b250211

11 files changed

+45
-34
lines changed

src/lib.rs

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
//! libm in pure Rust
22
#![deny(warnings)]
33
#![no_std]
4-
#![cfg_attr(
5-
all(target_arch = "wasm32", feature = "unstable"),
6-
feature(core_intrinsics)
7-
)]
4+
#![cfg_attr(all(feature = "unstable"), feature(core_intrinsics))]
85
#![allow(clippy::unreadable_literal)]
96
#![allow(clippy::many_single_char_names)]
107
#![allow(clippy::needless_return)]

src/math/atanf.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ pub fn atanf(mut x: f32) -> f32 {
5656
if x.is_nan() {
5757
return x;
5858
}
59-
z = ATAN_HI[3] + x1p_120;
59+
z = i!(ATAN_HI, 3) + x1p_120;
6060
return if sign { -z } else { z };
6161
}
6262
let id = if ix < 0x3ee00000 {
@@ -97,13 +97,13 @@ pub fn atanf(mut x: f32) -> f32 {
9797
z = x * x;
9898
let w = z * z;
9999
/* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
100-
let s1 = z * (A_T[0] + w * (A_T[2] + w * A_T[4]));
101-
let s2 = w * (A_T[1] + w * A_T[3]);
100+
let s1 = z * (i!(A_T, 0) + w * (i!(A_T, 2) + w * i!(A_T, 4)));
101+
let s2 = w * (i!(A_T, 1) + w * i!(A_T, 3));
102102
if id < 0 {
103103
return x - x * (s1 + s2);
104104
}
105105
let id = id as usize;
106-
let z = ATAN_HI[id] - ((x * (s1 + s2) - ATAN_LO[id]) - x);
106+
let z = i!(ATAN_HI, id) - ((x * (s1 + s2) - i!(ATAN_LO, id)) - x);
107107
if sign {
108108
-z
109109
} else {

src/math/exp.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ pub fn exp(mut x: f64) -> f64 {
124124
/* if |x| > 0.5 ln2 */
125125
if hx >= 0x3ff0a2b2 {
126126
/* if |x| >= 1.5 ln2 */
127-
k = (INVLN2 * x + HALF[sign as usize]) as i32;
127+
k = (INVLN2 * x + i!(HALF, sign as usize)) as i32;
128128
} else {
129129
k = 1 - sign - sign;
130130
}

src/math/exp2.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -374,14 +374,14 @@ pub fn exp2(mut x: f64) -> f64 {
374374
let mut i0 = ui as u32;
375375
i0 = i0.wrapping_add(TBLSIZE as u32 / 2);
376376
let ku = i0 / TBLSIZE as u32 * TBLSIZE as u32;
377-
let ki = ku as i32 / TBLSIZE as i32;
377+
let ki = div!(ku as i32, TBLSIZE as i32);
378378
i0 %= TBLSIZE as u32;
379379
let uf = f64::from_bits(ui) - redux;
380380
let mut z = x - uf;
381381

382382
/* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */
383-
let t = f64::from_bits(TBL[2 * i0 as usize]); /* exp2t[i0] */
384-
z -= f64::from_bits(TBL[2 * i0 as usize + 1]); /* eps[i0] */
383+
let t = f64::from_bits(i!(TBL, 2 * i0 as usize)); /* exp2t[i0] */
384+
z -= f64::from_bits(i!(TBL, 2 * i0 as usize + 1)); /* eps[i0] */
385385
let r = t + t * z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * p5))));
386386

387387
scalbn(r, ki)

src/math/exp2f.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ pub fn exp2f(mut x: f32) -> f32 {
126126
uf -= redux;
127127
let z: f64 = (x - uf) as f64;
128128
/* Compute r = exp2(y) = exp2ft[i0] * p(z). */
129-
let r: f64 = f64::from_bits(EXP2FT[i0 as usize]);
129+
let r: f64 = f64::from_bits(i!(EXP2FT, i0 as usize));
130130
let t: f64 = r as f64 * z;
131131
let r: f64 = r + t * (p1 as f64 + z * p2 as f64) + t * (z * z) * (p3 as f64 + z * p4 as f64);
132132

src/math/expf.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ pub fn expf(mut x: f32) -> f32 {
7070
/* if |x| > 0.5 ln2 */
7171
if hx > 0x3f851592 {
7272
/* if |x| > 1.5 ln2 */
73-
k = (INV_LN2 * x + HALF[sign as usize]) as i32;
73+
k = (INV_LN2 * x + i!(HALF, sign as usize)) as i32;
7474
} else {
7575
k = 1 - sign - sign;
7676
}

src/math/mod.rs

+14
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,20 @@ macro_rules! i {
5858
};
5959
}
6060

61+
#[cfg(any(debug_assertions, not(feature = "unstable")))]
62+
macro_rules! div {
63+
($a:expr, $b:expr) => {
64+
$a / $b
65+
};
66+
}
67+
68+
#[cfg(all(not(debug_assertions), feature = "unstable"))]
69+
macro_rules! div {
70+
($a:expr, $b:expr) => {
71+
unsafe { core::intrinsics::unchecked_div($a, $b) }
72+
};
73+
}
74+
6175
macro_rules! llvm_intrinsically_optimized {
6276
(#[cfg($($clause:tt)*)] $e:expr) => {
6377
#[cfg(all(feature = "unstable", $($clause)*))]

src/math/pow.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -299,8 +299,8 @@ pub fn pow(x: f64, y: f64) -> f64 {
299299
ax = with_set_high_word(ax, ix as u32);
300300

301301
/* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
302-
let u: f64 = ax - BP[k as usize]; /* bp[0]=1.0, bp[1]=1.5 */
303-
let v: f64 = 1.0 / (ax + BP[k as usize]);
302+
let u: f64 = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */
303+
let v: f64 = 1.0 / (ax + i!(BP, k as usize));
304304
let ss: f64 = u * v;
305305
let s_h = with_set_low_word(ss, 0);
306306

@@ -309,7 +309,7 @@ pub fn pow(x: f64, y: f64) -> f64 {
309309
0.0,
310310
((ix as u32 >> 1) | 0x20000000) + 0x00080000 + ((k as u32) << 18),
311311
);
312-
let t_l: f64 = ax - (t_h - BP[k as usize]);
312+
let t_l: f64 = ax - (t_h - i!(BP, k as usize));
313313
let s_l: f64 = v * ((u - s_h * t_h) - s_h * t_l);
314314

315315
/* compute log(ax) */
@@ -328,12 +328,12 @@ pub fn pow(x: f64, y: f64) -> f64 {
328328
let p_h: f64 = with_set_low_word(u + v, 0);
329329
let p_l = v - (p_h - u);
330330
let z_h: f64 = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */
331-
let z_l: f64 = CP_L * p_h + p_l * CP + DP_L[k as usize];
331+
let z_l: f64 = CP_L * p_h + p_l * CP + i!(DP_L, k as usize);
332332

333333
/* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */
334334
let t: f64 = n as f64;
335-
t1 = with_set_low_word(((z_h + z_l) + DP_H[k as usize]) + t, 0);
336-
t2 = z_l - (((t1 - t) - DP_H[k as usize]) - z_h);
335+
t1 = with_set_low_word(((z_h + z_l) + i!(DP_H, k as usize)) + t, 0);
336+
t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h);
337337
}
338338

339339
/* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */

src/math/powf.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -238,16 +238,16 @@ pub fn powf(x: f32, y: f32) -> f32 {
238238
ax = f32::from_bits(ix as u32);
239239

240240
/* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
241-
u = ax - BP[k as usize]; /* bp[0]=1.0, bp[1]=1.5 */
242-
v = 1.0 / (ax + BP[k as usize]);
241+
u = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */
242+
v = 1.0 / (ax + i!(BP, k as usize));
243243
s = u * v;
244244
s_h = s;
245245
is = s_h.to_bits() as i32;
246246
s_h = f32::from_bits(is as u32 & 0xfffff000);
247247
/* t_h=ax+bp[k] High */
248248
is = (((ix as u32 >> 1) & 0xfffff000) | 0x20000000) as i32;
249249
t_h = f32::from_bits(is as u32 + 0x00400000 + ((k as u32) << 21));
250-
t_l = ax - (t_h - BP[k as usize]);
250+
t_l = ax - (t_h - i!(BP, k as usize));
251251
s_l = v * ((u - s_h * t_h) - s_h * t_l);
252252
/* compute log(ax) */
253253
s2 = s * s;
@@ -267,13 +267,13 @@ pub fn powf(x: f32, y: f32) -> f32 {
267267
p_h = f32::from_bits(is as u32 & 0xfffff000);
268268
p_l = v - (p_h - u);
269269
z_h = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */
270-
z_l = CP_L * p_h + p_l * CP + DP_L[k as usize];
270+
z_l = CP_L * p_h + p_l * CP + i!(DP_L, k as usize);
271271
/* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
272272
t = n as f32;
273-
t1 = ((z_h + z_l) + DP_H[k as usize]) + t;
273+
t1 = ((z_h + z_l) + i!(DP_H, k as usize)) + t;
274274
is = t1.to_bits() as i32;
275275
t1 = f32::from_bits(is as u32 & 0xfffff000);
276-
t2 = z_l - (((t1 - t) - DP_H[k as usize]) - z_h);
276+
t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h);
277277
};
278278

279279
/* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */

src/math/rem_pio2.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -167,21 +167,21 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) {
167167
let mut z = f64::from_bits(ui);
168168
let mut tx = [0.0; 3];
169169
for i in 0..2 {
170-
tx[i] = z as i32 as f64;
171-
z = (z - tx[i]) * x1p24;
170+
i!(tx,i, =, z as i32 as f64);
171+
z = (z - i!(tx, i)) * x1p24;
172172
}
173-
tx[2] = z;
173+
i!(tx,2, =, z);
174174
/* skip zero terms, first term is non-zero */
175175
let mut i = 2;
176-
while i != 0 && tx[i] == 0.0 {
176+
while i != 0 && i!(tx, i) == 0.0 {
177177
i -= 1;
178178
}
179179
let mut ty = [0.0; 3];
180180
let n = rem_pio2_large(&tx[..=i], &mut ty, ((ix as i32) >> 20) - (0x3ff + 23), 1);
181181
if sign != 0 {
182-
return (-n, -ty[0], -ty[1]);
182+
return (-n, -i!(ty, 0), -i!(ty, 1));
183183
}
184-
(n, ty[0], ty[1])
184+
(n, i!(ty, 0), i!(ty, 1))
185185
}
186186

187187
#[cfg(test)]

src/math/rem_pio2_large.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -242,12 +242,12 @@ pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) ->
242242
let mut iq: [i32; 20] = [0; 20];
243243

244244
/* initialize jk*/
245-
let jk = INIT_JK[prec];
245+
let jk = i!(INIT_JK, prec);
246246
let jp = jk;
247247

248248
/* determine jx,jv,q0, note that 3>q0 */
249249
let jx = nx - 1;
250-
let mut jv = (e0 - 3) / 24;
250+
let mut jv = div!(e0 - 3, 24);
251251
if jv < 0 {
252252
jv = 0;
253253
}

0 commit comments

Comments
 (0)