Kill DoubleBigDigit; BigDigit := usize

Reimplement the low level arithmetic functions without DoubleBigDigit, and then change BigDigit to usize. This ends up being a pretty significant performance win, even with my relatively simple and dumb code - an assembly version could be much faster (x86 at least can implement all these operations with one or just a few instructions). Before: test divide_0 ... bench: 3,906 ns/iter (+/- 125) test divide_1 ... bench: 293,600 ns/iter (+/- 6,545) test divide_2 ... bench: 657,792,410 ns/iter (+/- 28,285,336) test factorial_100 ... bench: 6,538 ns/iter (+/- 971) test fib_100 ... bench: 1,431 ns/iter (+/- 39) test multiply_0 ... bench: 419 ns/iter (+/- 6) test multiply_1 ... bench: 34,637 ns/iter (+/- 550) test multiply_2 ... bench: 4,523,752 ns/iter (+/- 1,039,485) test shr ... bench: 535 ns/iter (+/- 13) test to_string ... bench: 621 ns/iter (+/- 58) After: test divide_0 ... bench: 3,138 ns/iter (+/- 136) test divide_1 ... bench: 123,971 ns/iter (+/- 2,817) test divide_2 ... bench: 158,154,850 ns/iter (+/- 3,874,826) test factorial_100 ... bench: 6,805 ns/iter (+/- 94) test fib_100 ... bench: 1,429 ns/iter (+/- 35) test multiply_0 ... bench: 108 ns/iter (+/- 15) test multiply_1 ... bench: 16,503 ns/iter (+/- 256) test multiply_2 ... bench: 1,851,549 ns/iter (+/- 39,648) test shr ... bench: 639 ns/iter (+/- 21) test to_string ... bench: 929 ns/iter (+/- 42)
koverstreet · Nov 29, 2015 · ff06399 · ff06399
1 parent 9799ad7
commit ff06399
Show file tree

Hide file tree

Showing 5 changed files with 370 additions and 116 deletions.
diff --git a/src/arithmetic/doubledigit.rs b/src/arithmetic/doubledigit.rs
@@ -0,0 +1,91 @@
+use std::mem;
+
+pub type BigDigit = u32;
+pub use ::std::u32::MAX;
+
+#[allow(non_snake_case)]
+#[inline]
+pub fn BITS() -> usize {
+    mem::size_of::<BigDigit>() * 8
+}
+
+/// A `DoubleBigDigit` is the internal type used to do the computations.  Its
+/// size is the double of the size of `BigDigit`.
+type DoubleBigDigit = u64;
+
+const BASE: DoubleBigDigit = 1 << 32;
+const LO_MASK: DoubleBigDigit = MAX as DoubleBigDigit;
+
+#[inline]
+fn get_hi(n: DoubleBigDigit) -> BigDigit { (n >> BITS()) as BigDigit }
+#[inline]
+fn get_lo(n: DoubleBigDigit) -> BigDigit { (n & LO_MASK) as BigDigit }
+
+/// Split one `DoubleBigDigit` into two `BigDigit`s.
+#[inline]
+fn from_doublebigdigit(n: DoubleBigDigit) -> (BigDigit, BigDigit) {
+    (get_hi(n), get_lo(n))
+}
+
+/// Join two `BigDigit`s into one `DoubleBigDigit`
+#[inline]
+fn to_doublebigdigit(hi: BigDigit, lo: BigDigit) -> DoubleBigDigit {
+    (lo as DoubleBigDigit) | ((hi as DoubleBigDigit) << BITS())
+}
+
+/*
+ * Generic functions for add/subtract/multiply with carry/borrow:
+ */
+
+// Add with carry:
+#[inline]
+pub fn adc(a: BigDigit, b: BigDigit, carry: &mut BigDigit) -> BigDigit {
+    let (hi, lo) = from_doublebigdigit(
+        (a as DoubleBigDigit) +
+        (b as DoubleBigDigit) +
+        (*carry as DoubleBigDigit));
+
+    *carry = hi;
+    lo
+}
+
+// Subtract with borrow:
+#[inline]
+pub fn sbb(a: BigDigit, b: BigDigit, borrow: &mut BigDigit) -> BigDigit {
+    let (hi, lo) = from_doublebigdigit(BASE
+        + (a as DoubleBigDigit)
+        - (b as DoubleBigDigit)
+        - (*borrow as DoubleBigDigit));
+    /*
+       hi * (base) + lo == 1*(base) + ai - bi - borrow
+       => ai - bi - borrow < 0 <=> hi == 0
+       */
+    *borrow = if hi == 0 { 1 } else { 0 };
+    lo
+}
+
+#[inline]
+pub fn mul_with_carry(a: BigDigit, b: BigDigit, carry: &mut BigDigit) -> BigDigit {
+    let (hi, lo) = from_doublebigdigit(
+        (a as DoubleBigDigit) * (b as DoubleBigDigit) + (*carry as DoubleBigDigit)
+        );
+    *carry = hi;
+    lo
+}
+
+#[inline]
+pub fn mac_with_carry(a: BigDigit, b: BigDigit, c: BigDigit, carry: &mut BigDigit) -> BigDigit {
+    let (hi, lo) = from_doublebigdigit(
+        (a as DoubleBigDigit) +
+        (b as DoubleBigDigit) * (c as DoubleBigDigit) +
+        (*carry as DoubleBigDigit));
+    *carry = hi;
+    lo
+}
+
+#[inline]
+pub fn div_wide(hi: BigDigit, lo: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigit) {
+    let lhs = to_doublebigdigit(hi, lo);
+    let rhs = divisor as DoubleBigDigit;
+    ((lhs / rhs) as BigDigit, (lhs % rhs) as BigDigit)
+}
diff --git a/src/arithmetic/usize.rs b/src/arithmetic/usize.rs
@@ -0,0 +1,126 @@
+use std::mem;
+use std::cmp::min;
+
+pub type BigDigit = usize;
+pub use ::std::usize::MAX;
+
+#[allow(non_snake_case)]
+#[inline]
+pub fn BITS() -> usize {
+    mem::size_of::<BigDigit>() * 8
+}
+
+#[inline]
+fn adc_no_flush(a: BigDigit, b: BigDigit, carry: &mut BigDigit) -> BigDigit {
+    let ret = a.wrapping_add(b);
+
+    if ret < a {
+        *carry += 1;
+    }
+
+    ret
+}
+
+// Add with carry:
+#[inline]
+pub fn adc(a: BigDigit, b: BigDigit, carry: &mut BigDigit) -> BigDigit {
+    debug_assert!(*carry <= 1 || b == 0);
+
+    let ret = a.wrapping_add(*carry);
+
+    *carry = if ret < a { 1 } else { 0 };
+
+    adc_no_flush(ret, b, carry)
+}
+
+// Subtract with borrow:
+#[inline]
+pub fn sbb(a: BigDigit, b: BigDigit, borrow: &mut BigDigit) -> BigDigit {
+    debug_assert!(*borrow <= 1);
+
+    let d1 = a.wrapping_sub(*borrow);
+
+    *borrow = if d1 > a { 1 } else { 0 };
+
+    let d2 = d1.wrapping_sub(b);
+
+    if d2 > d1 {
+        *borrow += 1;
+    }
+
+    d2
+}
+
+#[inline]
+pub fn mul_with_carry(a: BigDigit, b: BigDigit, carry: &mut BigDigit) -> BigDigit {
+    let halfbits = BITS() / 2;
+
+    let (ahi, alo) = (a >> halfbits, a & ((1 << halfbits) - 1));
+    let (bhi, blo) = (b >> halfbits, b & ((1 << halfbits) - 1));
+
+    let m1 = alo * bhi;
+    let m2 = ahi * blo;
+
+    let mut lo = adc(alo * blo, 0, carry);
+
+    lo = adc_no_flush(lo, m1 << halfbits, carry);
+    lo = adc_no_flush(lo, m2 << halfbits, carry);
+
+    *carry += ahi * bhi
+        + (m1 >> halfbits)
+        + (m2 >> halfbits);
+    lo
+}
+
+#[inline]
+pub fn mac_with_carry(a: BigDigit, b: BigDigit, c: BigDigit, carry: &mut BigDigit) -> BigDigit {
+    adc_no_flush(a, mul_with_carry(b, c, carry), carry)
+}
+
+#[inline]
+pub fn div_wide(mut hi: BigDigit, mut lo: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigit) {
+    let mut bits_remaining = BITS();
+    let mut quotient = 0;
+    let mut borrow = 0;
+
+    while bits_remaining != 0 {
+        let mut shift = min(hi.leading_zeros() as usize,
+                        min(bits_remaining, BITS() - 1));
+
+        if shift == 0 {
+            shift = 1;
+            borrow = 1;
+        }
+
+        hi <<= shift;
+        hi  |= lo >> (BITS() - shift);
+        lo <<= shift;
+        bits_remaining -= shift;
+
+        quotient += (hi / divisor) << bits_remaining;
+        hi %= divisor;
+
+        if borrow != 0 {
+            quotient += 1 << bits_remaining;
+            hi = hi.wrapping_sub(divisor);
+            borrow = 0;
+        }
+    }
+
+    (quotient, hi)
+}
+
+#[test]
+fn test_div_wide() {
+    use rand::{SeedableRng, StdRng, Rng};
+
+    let seed: &[_] = &[1, 2, 3, 4];
+    let mut rng: StdRng = SeedableRng::from_seed(seed);
+
+    for _ in 0..1000 {
+        let x: BigDigit = rng.gen();
+        let y: BigDigit = rng.gen();
+
+        assert_eq!(div_wide(0, x, y), ((x / y), (x % y)));
+    }
+}
diff --git a/src/arithmetic/x86_86.rs b/src/arithmetic/x86_86.rs
@@ -0,0 +1,116 @@
+use std::mem;
+
+pub type BigDigit = usize;
+pub use ::std::usize::MAX;
+
+#[allow(non_snake_case)]
+#[inline]
+pub fn BITS() -> usize {
+    mem::size_of::<BigDigit>() * 8
+}
+
+#[inline]
+pub fn adc(mut a: BigDigit, b: BigDigit, carry: &mut BigDigit) -> BigDigit {
+    unsafe {
+        let carry_out: u8;
+
+        if *carry != 0 {
+            asm!("stc
+                  adc $2, $0
+                  setc $1"
+                 : "+rm" (a), "=r" (carry_out)
+                 : "r" (b)
+                 : "cc");
+        } else {
+            asm!("clc
+                  adc $2, $0
+                  setc $1"
+                 : "+rm" (a), "=r" (carry_out)
+                 : "r" (b)
+                 : "cc");
+        }
+
+        *carry = carry_out as BigDigit;
+        a
+    }
+}
+
+#[inline]
+pub fn sbb(mut a: BigDigit, b: BigDigit, borrow: &mut BigDigit) -> BigDigit {
+    unsafe {
+        let borrow_out: u8;
+
+        if *borrow != 0 {
+            asm!("stc
+                  sbb $2, $0
+                  setc $1"
+                 : "+rm" (a), "=r" (borrow_out)
+                 : "r" (b)
+                 : "cc");
+        } else {
+            asm!("clc
+                  sbb $2, $0
+                  setc $1"
+                 : "+rm" (a), "=r" (borrow_out)
+                 : "r" (b)
+                 : "cc");
+        }
+
+        *borrow = borrow_out as BigDigit;
+        a
+    }
+}
+
+#[inline]
+pub fn mul_with_carry(a: BigDigit, b: BigDigit, carry: &mut BigDigit) -> BigDigit {
+    unsafe {
+        let hi: BigDigit;
+        let lo: BigDigit;
+
+        asm!("mul $3
+              add $4, $0
+              adc $$0, $1"
+             : "={eax}" (lo), "={edx}" (hi)
+             : "{eax}" (a), "r" (b), "r" (*carry)
+             : "cc");
+
+        *carry = hi;
+        lo
+    }
+}
+
+#[inline]
+/* a + b * c */
+pub fn mac_with_carry(a: BigDigit, b: BigDigit, c: BigDigit, carry: &mut BigDigit) -> BigDigit {
+    unsafe {
+        let hi: BigDigit;
+        let lo: BigDigit;
+
+        asm!("mul $3
+              add $4, $0
+              adc $$0, $1
+              add $5, $0
+              adc $$0, $1"
+             : "={eax}" (lo), "={edx}" (hi)
+             : "{eax}" (b), "r" (c), "rm" (a), "r" (*carry)
+             : "cc");
+
+        *carry = hi;
+        lo
+    }
+}
+
+#[inline]
+pub fn div_wide(hi: BigDigit, lo: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigit) {
+    unsafe {
+        let quotient: BigDigit;
+        let remainder: BigDigit;
+
+        asm!("div $4"
+             : "={eax}" (quotient), "={edx}" (remainder)
+             : "{edx}" (hi), "{eax}" (lo), "r" (divisor)
+             : "cc");
+
+        (quotient, remainder)
+    }
+}