Skip to content

Commit

Permalink
Kill DoubleBigDigit; BigDigit := usize
Browse files Browse the repository at this point in the history
Reimplement the low level arithmetic functions without DoubleBigDigit, and then
change BigDigit to usize. This ends up being a pretty significant performance
win, even with my relatively simple and dumb code - an assembly version could be
much faster (x86 at least can implement all these operations with one or just a
few instructions).

Before:
test divide_0      ... bench:       3,906 ns/iter (+/- 125)
test divide_1      ... bench:     293,600 ns/iter (+/- 6,545)
test divide_2      ... bench: 657,792,410 ns/iter (+/- 28,285,336)
test factorial_100 ... bench:       6,538 ns/iter (+/- 971)
test fib_100       ... bench:       1,431 ns/iter (+/- 39)
test multiply_0    ... bench:         419 ns/iter (+/- 6)
test multiply_1    ... bench:      34,637 ns/iter (+/- 550)
test multiply_2    ... bench:   4,523,752 ns/iter (+/- 1,039,485)
test shr           ... bench:         535 ns/iter (+/- 13)
test to_string     ... bench:         621 ns/iter (+/- 58)

After:
test divide_0      ... bench:       3,138 ns/iter (+/- 136)
test divide_1      ... bench:     123,971 ns/iter (+/- 2,817)
test divide_2      ... bench: 158,154,850 ns/iter (+/- 3,874,826)
test factorial_100 ... bench:       6,805 ns/iter (+/- 94)
test fib_100       ... bench:       1,429 ns/iter (+/- 35)
test multiply_0    ... bench:         108 ns/iter (+/- 15)
test multiply_1    ... bench:      16,503 ns/iter (+/- 256)
test multiply_2    ... bench:   1,851,549 ns/iter (+/- 39,648)
test shr           ... bench:         639 ns/iter (+/- 21)
test to_string     ... bench:         929 ns/iter (+/- 42)
  • Loading branch information
koverstreet committed Nov 29, 2015
1 parent 9799ad7 commit ff06399
Show file tree
Hide file tree
Showing 5 changed files with 370 additions and 116 deletions.
91 changes: 91 additions & 0 deletions src/arithmetic/doubledigit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
use std::mem;

pub type BigDigit = u32;
pub use ::std::u32::MAX;

#[allow(non_snake_case)]
#[inline]
pub fn BITS() -> usize {
mem::size_of::<BigDigit>() * 8
}

/// A `DoubleBigDigit` is the internal type used to do the computations. Its
/// size is the double of the size of `BigDigit`.
type DoubleBigDigit = u64;

const BASE: DoubleBigDigit = 1 << 32;
const LO_MASK: DoubleBigDigit = MAX as DoubleBigDigit;

#[inline]
fn get_hi(n: DoubleBigDigit) -> BigDigit { (n >> BITS()) as BigDigit }
#[inline]
fn get_lo(n: DoubleBigDigit) -> BigDigit { (n & LO_MASK) as BigDigit }

/// Split one `DoubleBigDigit` into two `BigDigit`s.
#[inline]
fn from_doublebigdigit(n: DoubleBigDigit) -> (BigDigit, BigDigit) {
(get_hi(n), get_lo(n))
}

/// Join two `BigDigit`s into one `DoubleBigDigit`
#[inline]
fn to_doublebigdigit(hi: BigDigit, lo: BigDigit) -> DoubleBigDigit {
(lo as DoubleBigDigit) | ((hi as DoubleBigDigit) << BITS())
}

/*
* Generic functions for add/subtract/multiply with carry/borrow:
*/

// Add with carry:
#[inline]
pub fn adc(a: BigDigit, b: BigDigit, carry: &mut BigDigit) -> BigDigit {
let (hi, lo) = from_doublebigdigit(
(a as DoubleBigDigit) +
(b as DoubleBigDigit) +
(*carry as DoubleBigDigit));

*carry = hi;
lo
}

// Subtract with borrow:
#[inline]
pub fn sbb(a: BigDigit, b: BigDigit, borrow: &mut BigDigit) -> BigDigit {
let (hi, lo) = from_doublebigdigit(BASE
+ (a as DoubleBigDigit)
- (b as DoubleBigDigit)
- (*borrow as DoubleBigDigit));
/*
hi * (base) + lo == 1*(base) + ai - bi - borrow
=> ai - bi - borrow < 0 <=> hi == 0
*/
*borrow = if hi == 0 { 1 } else { 0 };
lo
}

#[inline]
pub fn mul_with_carry(a: BigDigit, b: BigDigit, carry: &mut BigDigit) -> BigDigit {
let (hi, lo) = from_doublebigdigit(
(a as DoubleBigDigit) * (b as DoubleBigDigit) + (*carry as DoubleBigDigit)
);
*carry = hi;
lo
}

#[inline]
pub fn mac_with_carry(a: BigDigit, b: BigDigit, c: BigDigit, carry: &mut BigDigit) -> BigDigit {
let (hi, lo) = from_doublebigdigit(
(a as DoubleBigDigit) +
(b as DoubleBigDigit) * (c as DoubleBigDigit) +
(*carry as DoubleBigDigit));
*carry = hi;
lo
}

#[inline]
pub fn div_wide(hi: BigDigit, lo: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigit) {
let lhs = to_doublebigdigit(hi, lo);
let rhs = divisor as DoubleBigDigit;
((lhs / rhs) as BigDigit, (lhs % rhs) as BigDigit)
}
126 changes: 126 additions & 0 deletions src/arithmetic/usize.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
use std::mem;
use std::cmp::min;

pub type BigDigit = usize;
pub use ::std::usize::MAX;

#[allow(non_snake_case)]
#[inline]
pub fn BITS() -> usize {
mem::size_of::<BigDigit>() * 8
}

#[inline]
fn adc_no_flush(a: BigDigit, b: BigDigit, carry: &mut BigDigit) -> BigDigit {
let ret = a.wrapping_add(b);

if ret < a {
*carry += 1;
}

ret
}

// Add with carry:
#[inline]
pub fn adc(a: BigDigit, b: BigDigit, carry: &mut BigDigit) -> BigDigit {
debug_assert!(*carry <= 1 || b == 0);

let ret = a.wrapping_add(*carry);

*carry = if ret < a { 1 } else { 0 };

adc_no_flush(ret, b, carry)
}

// Subtract with borrow:
#[inline]
pub fn sbb(a: BigDigit, b: BigDigit, borrow: &mut BigDigit) -> BigDigit {
debug_assert!(*borrow <= 1);

let d1 = a.wrapping_sub(*borrow);

*borrow = if d1 > a { 1 } else { 0 };

let d2 = d1.wrapping_sub(b);

if d2 > d1 {
*borrow += 1;
}

d2
}

#[inline]
pub fn mul_with_carry(a: BigDigit, b: BigDigit, carry: &mut BigDigit) -> BigDigit {
let halfbits = BITS() / 2;

let (ahi, alo) = (a >> halfbits, a & ((1 << halfbits) - 1));
let (bhi, blo) = (b >> halfbits, b & ((1 << halfbits) - 1));

let m1 = alo * bhi;
let m2 = ahi * blo;

let mut lo = adc(alo * blo, 0, carry);

lo = adc_no_flush(lo, m1 << halfbits, carry);
lo = adc_no_flush(lo, m2 << halfbits, carry);

*carry += ahi * bhi
+ (m1 >> halfbits)
+ (m2 >> halfbits);
lo
}

#[inline]
pub fn mac_with_carry(a: BigDigit, b: BigDigit, c: BigDigit, carry: &mut BigDigit) -> BigDigit {
adc_no_flush(a, mul_with_carry(b, c, carry), carry)
}

#[inline]
pub fn div_wide(mut hi: BigDigit, mut lo: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigit) {
let mut bits_remaining = BITS();
let mut quotient = 0;
let mut borrow = 0;

while bits_remaining != 0 {
let mut shift = min(hi.leading_zeros() as usize,
min(bits_remaining, BITS() - 1));

if shift == 0 {
shift = 1;
borrow = 1;
}

hi <<= shift;
hi |= lo >> (BITS() - shift);
lo <<= shift;
bits_remaining -= shift;

quotient += (hi / divisor) << bits_remaining;
hi %= divisor;

if borrow != 0 {
quotient += 1 << bits_remaining;
hi = hi.wrapping_sub(divisor);
borrow = 0;
}
}

(quotient, hi)
}

#[test]
fn test_div_wide() {
use rand::{SeedableRng, StdRng, Rng};

let seed: &[_] = &[1, 2, 3, 4];
let mut rng: StdRng = SeedableRng::from_seed(seed);

for _ in 0..1000 {
let x: BigDigit = rng.gen();
let y: BigDigit = rng.gen();

assert_eq!(div_wide(0, x, y), ((x / y), (x % y)));
}
}
116 changes: 116 additions & 0 deletions src/arithmetic/x86_86.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
use std::mem;

pub type BigDigit = usize;
pub use ::std::usize::MAX;

#[allow(non_snake_case)]
#[inline]
pub fn BITS() -> usize {
mem::size_of::<BigDigit>() * 8
}

#[inline]
pub fn adc(mut a: BigDigit, b: BigDigit, carry: &mut BigDigit) -> BigDigit {
unsafe {
let carry_out: u8;

if *carry != 0 {
asm!("stc
adc $2, $0
setc $1"
: "+rm" (a), "=r" (carry_out)
: "r" (b)
: "cc");
} else {
asm!("clc
adc $2, $0
setc $1"
: "+rm" (a), "=r" (carry_out)
: "r" (b)
: "cc");
}

*carry = carry_out as BigDigit;
a
}
}

#[inline]
pub fn sbb(mut a: BigDigit, b: BigDigit, borrow: &mut BigDigit) -> BigDigit {
unsafe {
let borrow_out: u8;

if *borrow != 0 {
asm!("stc
sbb $2, $0
setc $1"
: "+rm" (a), "=r" (borrow_out)
: "r" (b)
: "cc");
} else {
asm!("clc
sbb $2, $0
setc $1"
: "+rm" (a), "=r" (borrow_out)
: "r" (b)
: "cc");
}

*borrow = borrow_out as BigDigit;
a
}
}

#[inline]
pub fn mul_with_carry(a: BigDigit, b: BigDigit, carry: &mut BigDigit) -> BigDigit {
unsafe {
let hi: BigDigit;
let lo: BigDigit;

asm!("mul $3
add $4, $0
adc $$0, $1"
: "={eax}" (lo), "={edx}" (hi)
: "{eax}" (a), "r" (b), "r" (*carry)
: "cc");

*carry = hi;
lo
}
}

#[inline]
/* a + b * c */
pub fn mac_with_carry(a: BigDigit, b: BigDigit, c: BigDigit, carry: &mut BigDigit) -> BigDigit {
unsafe {
let hi: BigDigit;
let lo: BigDigit;

asm!("mul $3
add $4, $0
adc $$0, $1
add $5, $0
adc $$0, $1"
: "={eax}" (lo), "={edx}" (hi)
: "{eax}" (b), "r" (c), "rm" (a), "r" (*carry)
: "cc");

*carry = hi;
lo
}
}

#[inline]
pub fn div_wide(hi: BigDigit, lo: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigit) {
unsafe {
let quotient: BigDigit;
let remainder: BigDigit;

asm!("div $4"
: "={eax}" (quotient), "={edx}" (remainder)
: "{edx}" (hi), "{eax}" (lo), "r" (divisor)
: "cc");

(quotient, remainder)
}
}
Loading

0 comments on commit ff06399

Please sign in to comment.