Merge pull request 27 from Kogia-sima/perf/optimize-udivmod

dtolnay · Aug 22, 2021 · 2886a30 · 2886a30
2 parents ac8183c + 3a4047d
commit 2886a30
Showing 1 changed file with 36 additions and 52 deletions.
diff --git a/src/udiv128.rs b/src/udiv128.rs
@@ -1,61 +1,45 @@
-// The code in this file is based on Rust's compiler-builtins crate. The Rust
-// compiler automatically links programs against this crate for target-specific
-// runtime support. We have copied the implementation of `__udivmodti4()` which
-// is an intrinsic implementing division with remainder for architectures
-// without 128-bit integers. This implementation works around some poor codegen
-// by LLVM (https://github.com/rust-lang/rust/issues/44545) and allows for
-// inlining which does not happen with the intrinsic.
-//
-// The compiler-builtins crate carries the following license, which is available
-// in full at:
-// https://github.com/rust-lang-nursery/compiler-builtins/blob/master/LICENSE.TXT
-//
-// ---
-//
-// Copyright 2009-2016 compiler-builtins Developers
-//
-// The compiler-builtins crate is dual licensed under both the University of
-// Illinois "BSD-Like" license and the MIT license. As a user of this code you
-// may choose to use it under either license. As a contributor, you agree to
-// allow your code to be used under both.
-
+/// Multiply unsigned 128 bit integers, return upper 128 bits of the result
 #[inline]
-pub fn udivmod_1e19(n: u128) -> (u128, u64) {
-    let d = 10_000_000_000_000_000_000_u64; // 10^19
+fn u128_mulhi(x: u128, y: u128) -> u128 {
+    let x_lo = x as u64;
+    let x_hi = (x >> 64) as u64;
+    let y_lo = y as u64;
+    let y_hi = (y >> 64) as u64;
 
-    let high = (n >> 64) as u64;
-    if high == 0 {
-        let low = n as u64;
-        return ((low / d) as u128, low % d);
-    }
+    // handle possibility of overflow
+    let carry = (x_lo as u128 * y_lo as u128) >> 64;
+    let m = x_lo as u128 * y_hi as u128 + carry;
+    let high1 = m >> 64;
 
-    let sr = 65 - high.leading_zeros();
+    let m_lo = m as u64;
+    let high2 = x_hi as u128 * y_lo as u128 + m_lo as u128 >> 64;
 
-    // 2 <= sr <= 65
-    let mut q: u128 = n << (128 - sr);
-    let mut r: u128 = n >> sr;
-    let mut carry: u64 = 0;
+    x_hi as u128 * y_hi as u128 + high1 + high2
+}
 
-    // Don't use a range because they may generate references to memcpy in unoptimized code
-    //
-    // Loop invariants:  r < d; carry is 0 or 1
-    let mut i = 0;
-    while i < sr {
-        i += 1;
+/// Divide `n` by 1e19 and return quotient and remainder
+///
+/// Integer division algorithm is based on the following paper:
+///
+///   T. Granlund and P. Montgomery, “Division by Invariant Integers Using Multiplication”
+///   in Proc. of the SIGPLAN94 Conference on Programming Language Design and
+///   Implementation, 1994, pp. 61–72
+///
+#[inline]
+pub fn udivmod_1e19(n: u128) -> (u128, u64) {
+    let d = 10_000_000_000_000_000_000_u64; // 10^19
 
-        // r:q = ((r:q) << 1) | carry
-        r = (r << 1) | (q >> 127);
-        q = (q << 1) | carry as u128;
+    let quot = if n < 1 << 83 {
+        ((n >> 19) as u64 / (d >> 19)) as u128
+    } else {
+        let factor =
+            (8507059173023461586_u64 as u128) << 64 | 10779635027931437427 as u128;
+        u128_mulhi(n, factor) >> 62
+    };
 
-        // carry = 0
-        // if r >= d {
-        //     r -= d;
-        //     carry = 1;
-        // }
-        let s = (d as u128).wrapping_sub(r).wrapping_sub(1) as i128 >> 127;
-        carry = (s & 1) as u64;
-        r -= (d as u128) & s as u128;
-    }
+    let rem = (n - quot * d as u128) as u64;
+    debug_assert_eq!(quot, n / d as u128);
+    debug_assert_eq!(rem as u128, n % d as u128);
 
-    ((q << 1) | carry as u128, r as u64)
+    (quot, rem)
 }