Skip to content

Commit

Permalink
librustc: use LLVM intrinsics for several floating point operations.
Browse files Browse the repository at this point in the history
Achieves at least 5x speed up for some functions!

Also, reorganise the delegation code so that the delegated function wrappers
have the #[inline(always)] annotation, and reduce the repetition of
delegate!(..).
  • Loading branch information
huonw committed Apr 20, 2013
1 parent 93c0888 commit d9c54f8
Show file tree
Hide file tree
Showing 3 changed files with 166 additions and 153 deletions.
152 changes: 80 additions & 72 deletions src/libcore/num/f32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,9 @@

//! Operations and constants for `f32`
use cmath;
use libc::{c_float, c_int};
use num::strconv;
use num;
use option::Option;
use unstable::intrinsics::floorf32;
use from_str;
use to_str;

Expand All @@ -24,79 +21,93 @@ use to_str;

pub use cmath::c_float_targ_consts::*;

// An inner module is required to get the #[inline(always)] attribute on the
// functions.
pub use self::delegated::*;

macro_rules! delegate(
(
fn $name:ident(
$(
$arg:ident : $arg_ty:ty
),*
) -> $rv:ty = $bound_name:path
$(
fn $name:ident(
$(
$arg:ident : $arg_ty:ty
),*
) -> $rv:ty = $bound_name:path
),*
) => (
pub fn $name($( $arg : $arg_ty ),*) -> $rv {
unsafe {
$bound_name($( $arg ),*)
}
mod delegated {
use cmath::c_float_utils;
use libc::{c_float, c_int};
use unstable::intrinsics;

$(
#[inline(always)]
pub fn $name($( $arg : $arg_ty ),*) -> $rv {
unsafe {
$bound_name($( $arg ),*)
}
}
)*
}
)
)

delegate!(fn acos(n: c_float) -> c_float = cmath::c_float_utils::acos)
delegate!(fn asin(n: c_float) -> c_float = cmath::c_float_utils::asin)
delegate!(fn atan(n: c_float) -> c_float = cmath::c_float_utils::atan)
delegate!(fn atan2(a: c_float, b: c_float) -> c_float =
cmath::c_float_utils::atan2)
delegate!(fn cbrt(n: c_float) -> c_float = cmath::c_float_utils::cbrt)
delegate!(fn ceil(n: c_float) -> c_float = cmath::c_float_utils::ceil)
delegate!(fn copysign(x: c_float, y: c_float) -> c_float =
cmath::c_float_utils::copysign)
delegate!(fn cos(n: c_float) -> c_float = cmath::c_float_utils::cos)
delegate!(fn cosh(n: c_float) -> c_float = cmath::c_float_utils::cosh)
delegate!(fn erf(n: c_float) -> c_float = cmath::c_float_utils::erf)
delegate!(fn erfc(n: c_float) -> c_float = cmath::c_float_utils::erfc)
delegate!(fn exp(n: c_float) -> c_float = cmath::c_float_utils::exp)
delegate!(fn expm1(n: c_float) -> c_float = cmath::c_float_utils::expm1)
delegate!(fn exp2(n: c_float) -> c_float = cmath::c_float_utils::exp2)
delegate!(fn abs(n: c_float) -> c_float = cmath::c_float_utils::abs)
delegate!(fn abs_sub(a: c_float, b: c_float) -> c_float =
cmath::c_float_utils::abs_sub)
delegate!(fn mul_add(a: c_float, b: c_float, c: c_float) -> c_float =
cmath::c_float_utils::mul_add)
delegate!(fn fmax(a: c_float, b: c_float) -> c_float =
cmath::c_float_utils::fmax)
delegate!(fn fmin(a: c_float, b: c_float) -> c_float =
cmath::c_float_utils::fmin)
delegate!(fn nextafter(x: c_float, y: c_float) -> c_float =
cmath::c_float_utils::nextafter)
delegate!(fn frexp(n: c_float, value: &mut c_int) -> c_float =
cmath::c_float_utils::frexp)
delegate!(fn hypot(x: c_float, y: c_float) -> c_float =
cmath::c_float_utils::hypot)
delegate!(fn ldexp(x: c_float, n: c_int) -> c_float =
cmath::c_float_utils::ldexp)
delegate!(fn lgamma(n: c_float, sign: &mut c_int) -> c_float =
cmath::c_float_utils::lgamma)
delegate!(fn ln(n: c_float) -> c_float = cmath::c_float_utils::ln)
delegate!(fn log_radix(n: c_float) -> c_float =
cmath::c_float_utils::log_radix)
delegate!(fn ln1p(n: c_float) -> c_float = cmath::c_float_utils::ln1p)
delegate!(fn log10(n: c_float) -> c_float = cmath::c_float_utils::log10)
delegate!(fn log2(n: c_float) -> c_float = cmath::c_float_utils::log2)
delegate!(fn ilog_radix(n: c_float) -> c_int =
cmath::c_float_utils::ilog_radix)
delegate!(fn modf(n: c_float, iptr: &mut c_float) -> c_float =
cmath::c_float_utils::modf)
delegate!(fn pow(n: c_float, e: c_float) -> c_float =
cmath::c_float_utils::pow)
delegate!(fn round(n: c_float) -> c_float = cmath::c_float_utils::round)
delegate!(fn ldexp_radix(n: c_float, i: c_int) -> c_float =
cmath::c_float_utils::ldexp_radix)
delegate!(fn sin(n: c_float) -> c_float = cmath::c_float_utils::sin)
delegate!(fn sinh(n: c_float) -> c_float = cmath::c_float_utils::sinh)
delegate!(fn sqrt(n: c_float) -> c_float = cmath::c_float_utils::sqrt)
delegate!(fn tan(n: c_float) -> c_float = cmath::c_float_utils::tan)
delegate!(fn tanh(n: c_float) -> c_float = cmath::c_float_utils::tanh)
delegate!(fn tgamma(n: c_float) -> c_float = cmath::c_float_utils::tgamma)
delegate!(fn trunc(n: c_float) -> c_float = cmath::c_float_utils::trunc)
delegate!(
// intrinsics
fn abs(n: f32) -> f32 = intrinsics::fabsf32,
fn cos(n: f32) -> f32 = intrinsics::cosf32,
fn exp(n: f32) -> f32 = intrinsics::expf32,
fn exp2(n: f32) -> f32 = intrinsics::exp2f32,
fn floor(x: f32) -> f32 = intrinsics::floorf32,
fn ln(n: f32) -> f32 = intrinsics::logf32,
fn log10(n: f32) -> f32 = intrinsics::log10f32,
fn log2(n: f32) -> f32 = intrinsics::log2f32,
fn mul_add(a: f32, b: f32, c: f32) -> f32 = intrinsics::fmaf32,
fn pow(n: f32, e: f32) -> f32 = intrinsics::powf32,
fn powi(n: f32, e: c_int) -> f32 = intrinsics::powif32,
fn sin(n: f32) -> f32 = intrinsics::sinf32,
fn sqrt(n: f32) -> f32 = intrinsics::sqrtf32,

// LLVM 3.3 required to use intrinsics for these four
fn ceil(n: c_float) -> c_float = c_float_utils::ceil,
fn trunc(n: c_float) -> c_float = c_float_utils::trunc,
/*
fn ceil(n: f32) -> f32 = intrinsics::ceilf32,
fn trunc(n: f32) -> f32 = intrinsics::truncf32,
fn rint(n: f32) -> f32 = intrinsics::rintf32,
fn nearbyint(n: f32) -> f32 = intrinsics::nearbyintf32,
*/

// cmath
fn acos(n: c_float) -> c_float = c_float_utils::acos,
fn asin(n: c_float) -> c_float = c_float_utils::asin,
fn atan(n: c_float) -> c_float = c_float_utils::atan,
fn atan2(a: c_float, b: c_float) -> c_float = c_float_utils::atan2,
fn cbrt(n: c_float) -> c_float = c_float_utils::cbrt,
fn copysign(x: c_float, y: c_float) -> c_float = c_float_utils::copysign,
fn cosh(n: c_float) -> c_float = c_float_utils::cosh,
fn erf(n: c_float) -> c_float = c_float_utils::erf,
fn erfc(n: c_float) -> c_float = c_float_utils::erfc,
fn expm1(n: c_float) -> c_float = c_float_utils::expm1,
fn abs_sub(a: c_float, b: c_float) -> c_float = c_float_utils::abs_sub,
fn fmax(a: c_float, b: c_float) -> c_float = c_float_utils::fmax,
fn fmin(a: c_float, b: c_float) -> c_float = c_float_utils::fmin,
fn nextafter(x: c_float, y: c_float) -> c_float = c_float_utils::nextafter,
fn frexp(n: c_float, value: &mut c_int) -> c_float = c_float_utils::frexp,
fn hypot(x: c_float, y: c_float) -> c_float = c_float_utils::hypot,
fn ldexp(x: c_float, n: c_int) -> c_float = c_float_utils::ldexp,
fn lgamma(n: c_float, sign: &mut c_int) -> c_float = c_float_utils::lgamma,
fn log_radix(n: c_float) -> c_float = c_float_utils::log_radix,
fn ln1p(n: c_float) -> c_float = c_float_utils::ln1p,
fn ilog_radix(n: c_float) -> c_int = c_float_utils::ilog_radix,
fn modf(n: c_float, iptr: &mut c_float) -> c_float = c_float_utils::modf,
fn round(n: c_float) -> c_float = c_float_utils::round,
fn ldexp_radix(n: c_float, i: c_int) -> c_float = c_float_utils::ldexp_radix,
fn sinh(n: c_float) -> c_float = c_float_utils::sinh,
fn tan(n: c_float) -> c_float = c_float_utils::tan,
fn tanh(n: c_float) -> c_float = c_float_utils::tanh,
fn tgamma(n: c_float) -> c_float = c_float_utils::tgamma)


// These are not defined inside consts:: for consistency with
// the integer types
Expand Down Expand Up @@ -143,9 +154,6 @@ pub fn ge(x: f32, y: f32) -> bool { return x >= y; }
#[inline(always)]
pub fn gt(x: f32, y: f32) -> bool { return x > y; }

/// Returns `x` rounded down
#[inline(always)]
pub fn floor(x: f32) -> f32 { unsafe { floorf32(x) } }

// FIXME (#1999): replace the predicates below with llvm intrinsics or
// calls to the libmath macros in the rust runtime for performance.
Expand Down
165 changes: 85 additions & 80 deletions src/libcore/num/f64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,9 @@

//! Operations and constants for `f64`
use cmath;
use libc::{c_double, c_int};
use num::strconv;
use num;
use option::Option;
use unstable::intrinsics::floorf64;
use to_str;
use from_str;

Expand All @@ -25,87 +22,98 @@ use from_str;
pub use cmath::c_double_targ_consts::*;
pub use cmp::{min, max};

// An inner module is required to get the #[inline(always)] attribute on the
// functions.
pub use self::delegated::*;

macro_rules! delegate(
(
fn $name:ident(
$(
$arg:ident : $arg_ty:ty
),*
) -> $rv:ty = $bound_name:path
$(
fn $name:ident(
$(
$arg:ident : $arg_ty:ty
),*
) -> $rv:ty = $bound_name:path
),*
) => (
pub fn $name($( $arg : $arg_ty ),*) -> $rv {
unsafe {
$bound_name($( $arg ),*)
}
mod delegated {
use cmath::c_double_utils;
use libc::{c_double, c_int};
use unstable::intrinsics;

$(
#[inline(always)]
pub fn $name($( $arg : $arg_ty ),*) -> $rv {
unsafe {
$bound_name($( $arg ),*)
}
}
)*
}
)
)

delegate!(fn acos(n: c_double) -> c_double = cmath::c_double_utils::acos)
delegate!(fn asin(n: c_double) -> c_double = cmath::c_double_utils::asin)
delegate!(fn atan(n: c_double) -> c_double = cmath::c_double_utils::atan)
delegate!(fn atan2(a: c_double, b: c_double) -> c_double =
cmath::c_double_utils::atan2)
delegate!(fn cbrt(n: c_double) -> c_double = cmath::c_double_utils::cbrt)
delegate!(fn ceil(n: c_double) -> c_double = cmath::c_double_utils::ceil)
delegate!(fn copysign(x: c_double, y: c_double) -> c_double =
cmath::c_double_utils::copysign)
delegate!(fn cos(n: c_double) -> c_double = cmath::c_double_utils::cos)
delegate!(fn cosh(n: c_double) -> c_double = cmath::c_double_utils::cosh)
delegate!(fn erf(n: c_double) -> c_double = cmath::c_double_utils::erf)
delegate!(fn erfc(n: c_double) -> c_double = cmath::c_double_utils::erfc)
delegate!(fn exp(n: c_double) -> c_double = cmath::c_double_utils::exp)
delegate!(fn expm1(n: c_double) -> c_double = cmath::c_double_utils::expm1)
delegate!(fn exp2(n: c_double) -> c_double = cmath::c_double_utils::exp2)
delegate!(fn abs(n: c_double) -> c_double = cmath::c_double_utils::abs)
delegate!(fn abs_sub(a: c_double, b: c_double) -> c_double =
cmath::c_double_utils::abs_sub)
delegate!(fn mul_add(a: c_double, b: c_double, c: c_double) -> c_double =
cmath::c_double_utils::mul_add)
delegate!(fn fmax(a: c_double, b: c_double) -> c_double =
cmath::c_double_utils::fmax)
delegate!(fn fmin(a: c_double, b: c_double) -> c_double =
cmath::c_double_utils::fmin)
delegate!(fn nextafter(x: c_double, y: c_double) -> c_double =
cmath::c_double_utils::nextafter)
delegate!(fn frexp(n: c_double, value: &mut c_int) -> c_double =
cmath::c_double_utils::frexp)
delegate!(fn hypot(x: c_double, y: c_double) -> c_double =
cmath::c_double_utils::hypot)
delegate!(fn ldexp(x: c_double, n: c_int) -> c_double =
cmath::c_double_utils::ldexp)
delegate!(fn lgamma(n: c_double, sign: &mut c_int) -> c_double =
cmath::c_double_utils::lgamma)
delegate!(fn ln(n: c_double) -> c_double = cmath::c_double_utils::ln)
delegate!(fn log_radix(n: c_double) -> c_double =
cmath::c_double_utils::log_radix)
delegate!(fn ln1p(n: c_double) -> c_double = cmath::c_double_utils::ln1p)
delegate!(fn log10(n: c_double) -> c_double = cmath::c_double_utils::log10)
delegate!(fn log2(n: c_double) -> c_double = cmath::c_double_utils::log2)
delegate!(fn ilog_radix(n: c_double) -> c_int =
cmath::c_double_utils::ilog_radix)
delegate!(fn modf(n: c_double, iptr: &mut c_double) -> c_double =
cmath::c_double_utils::modf)
delegate!(fn pow(n: c_double, e: c_double) -> c_double =
cmath::c_double_utils::pow)
delegate!(fn round(n: c_double) -> c_double = cmath::c_double_utils::round)
delegate!(fn ldexp_radix(n: c_double, i: c_int) -> c_double =
cmath::c_double_utils::ldexp_radix)
delegate!(fn sin(n: c_double) -> c_double = cmath::c_double_utils::sin)
delegate!(fn sinh(n: c_double) -> c_double = cmath::c_double_utils::sinh)
delegate!(fn sqrt(n: c_double) -> c_double = cmath::c_double_utils::sqrt)
delegate!(fn tan(n: c_double) -> c_double = cmath::c_double_utils::tan)
delegate!(fn tanh(n: c_double) -> c_double = cmath::c_double_utils::tanh)
delegate!(fn tgamma(n: c_double) -> c_double = cmath::c_double_utils::tgamma)
delegate!(fn trunc(n: c_double) -> c_double = cmath::c_double_utils::trunc)
delegate!(fn j0(n: c_double) -> c_double = cmath::c_double_utils::j0)
delegate!(fn j1(n: c_double) -> c_double = cmath::c_double_utils::j1)
delegate!(fn jn(i: c_int, n: c_double) -> c_double =
cmath::c_double_utils::jn)
delegate!(fn y0(n: c_double) -> c_double = cmath::c_double_utils::y0)
delegate!(fn y1(n: c_double) -> c_double = cmath::c_double_utils::y1)
delegate!(fn yn(i: c_int, n: c_double) -> c_double =
cmath::c_double_utils::yn)
delegate!(
// intrinsics
fn abs(n: f64) -> f64 = intrinsics::fabsf64,
fn cos(n: f64) -> f64 = intrinsics::cosf64,
fn exp(n: f64) -> f64 = intrinsics::expf64,
fn exp2(n: f64) -> f64 = intrinsics::exp2f64,
fn floor(x: f64) -> f64 = intrinsics::floorf64,
fn ln(n: f64) -> f64 = intrinsics::logf64,
fn log10(n: f64) -> f64 = intrinsics::log10f64,
fn log2(n: f64) -> f64 = intrinsics::log2f64,
fn mul_add(a: f64, b: f64, c: f64) -> f64 = intrinsics::fmaf64,
fn pow(n: f64, e: f64) -> f64 = intrinsics::powf64,
fn powi(n: f64, e: c_int) -> f64 = intrinsics::powif64,
fn sin(n: f64) -> f64 = intrinsics::sinf64,
fn sqrt(n: f64) -> f64 = intrinsics::sqrtf64,

// LLVM 3.3 required to use intrinsics for these four
fn ceil(n: c_double) -> c_double = c_double_utils::ceil,
fn trunc(n: c_double) -> c_double = c_double_utils::trunc,
/*
fn ceil(n: f64) -> f64 = intrinsics::ceilf64,
fn trunc(n: f64) -> f64 = intrinsics::truncf64,
fn rint(n: c_double) -> c_double = intrinsics::rintf64,
fn nearbyint(n: c_double) -> c_double = intrinsics::nearbyintf64,
*/

// cmath
fn acos(n: c_double) -> c_double = c_double_utils::acos,
fn asin(n: c_double) -> c_double = c_double_utils::asin,
fn atan(n: c_double) -> c_double = c_double_utils::atan,
fn atan2(a: c_double, b: c_double) -> c_double = c_double_utils::atan2,
fn cbrt(n: c_double) -> c_double = c_double_utils::cbrt,
fn copysign(x: c_double, y: c_double) -> c_double = c_double_utils::copysign,
fn cosh(n: c_double) -> c_double = c_double_utils::cosh,
fn erf(n: c_double) -> c_double = c_double_utils::erf,
fn erfc(n: c_double) -> c_double = c_double_utils::erfc,
fn expm1(n: c_double) -> c_double = c_double_utils::expm1,
fn abs_sub(a: c_double, b: c_double) -> c_double = c_double_utils::abs_sub,
fn fmax(a: c_double, b: c_double) -> c_double = c_double_utils::fmax,
fn fmin(a: c_double, b: c_double) -> c_double = c_double_utils::fmin,
fn nextafter(x: c_double, y: c_double) -> c_double = c_double_utils::nextafter,
fn frexp(n: c_double, value: &mut c_int) -> c_double = c_double_utils::frexp,
fn hypot(x: c_double, y: c_double) -> c_double = c_double_utils::hypot,
fn ldexp(x: c_double, n: c_int) -> c_double = c_double_utils::ldexp,
fn lgamma(n: c_double, sign: &mut c_int) -> c_double = c_double_utils::lgamma,
fn log_radix(n: c_double) -> c_double = c_double_utils::log_radix,
fn ln1p(n: c_double) -> c_double = c_double_utils::ln1p,
fn ilog_radix(n: c_double) -> c_int = c_double_utils::ilog_radix,
fn modf(n: c_double, iptr: &mut c_double) -> c_double = c_double_utils::modf,
fn round(n: c_double) -> c_double = c_double_utils::round,
fn ldexp_radix(n: c_double, i: c_int) -> c_double = c_double_utils::ldexp_radix,
fn sinh(n: c_double) -> c_double = c_double_utils::sinh,
fn tan(n: c_double) -> c_double = c_double_utils::tan,
fn tanh(n: c_double) -> c_double = c_double_utils::tanh,
fn tgamma(n: c_double) -> c_double = c_double_utils::tgamma,
fn j0(n: c_double) -> c_double = c_double_utils::j0,
fn j1(n: c_double) -> c_double = c_double_utils::j1,
fn jn(i: c_int, n: c_double) -> c_double = c_double_utils::jn,
fn y0(n: c_double) -> c_double = c_double_utils::y0,
fn y1(n: c_double) -> c_double = c_double_utils::y1,
fn yn(i: c_int, n: c_double) -> c_double = c_double_utils::yn)

// FIXME (#1433): obtain these in a different way

Expand Down Expand Up @@ -218,9 +226,6 @@ pub fn is_finite(x: f64) -> bool {
return !(is_NaN(x) || is_infinite(x));
}

/// Returns `x` rounded down
#[inline(always)]
pub fn floor(x: f64) -> f64 { unsafe { floorf64(x) } }

// FIXME (#1999): add is_normal, is_subnormal, and fpclassify

Expand Down
2 changes: 1 addition & 1 deletion src/libcore/num/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ pub use f64::{acos, asin, atan2, cbrt, ceil, copysign, cosh, floor};
pub use f64::{erf, erfc, exp, expm1, exp2, abs_sub};
pub use f64::{mul_add, fmax, fmin, nextafter, frexp, hypot, ldexp};
pub use f64::{lgamma, ln, log_radix, ln1p, log10, log2, ilog_radix};
pub use f64::{modf, pow, round, sinh, tanh, tgamma, trunc};
pub use f64::{modf, pow, powi, round, sinh, tanh, tgamma, trunc};
pub use f64::signbit;
pub use f64::{j0, j1, jn, y0, y1, yn};

Expand Down

0 comments on commit d9c54f8

Please sign in to comment.