diff --git a/Changelog.md b/Changelog.md
index 7bbbe3df..bce89160 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -11,14 +11,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
-- Added `inv_mod`, `gcd`, `gcd_extended`, `lcm`.
+- Added `inv_mod`, `mul_redc`, `gcd`, `gcd_extended`, `lcm`.
 - Added `sqlx` support.
 
+### Changed
+
+- Renamed `ring_inverse` to `inv_ring`.
+
 ## [1.2.0] — 2022-06-03
 
 ### Added
 
-- Added `reduce_mod`, `add_mod`, `mul_mod`, `pow_mod`, `inv_mod`.
+- Added `reduce_mod`, `add_mod`, `mul_mod`, `pow_mod`.
 - Added `num-bigint` and `ark-ff` support.
 - Made `algorithms` public, but with unstable API for now.
 
diff --git a/src/algorithms/gcd/matrix.rs b/src/algorithms/gcd/matrix.rs
index 539d49d7..2febae6b 100644
--- a/src/algorithms/gcd/matrix.rs
+++ b/src/algorithms/gcd/matrix.rs
@@ -118,6 +118,7 @@ impl Matrix {
     ///
     /// Panics if `r1 < r0`.
     // OPT: Would this be faster using extended binary gcd?
+    // See <https://en.algorithmica.org/hpc/algorithms/gcd>
     #[must_use]
     pub fn from_u64(mut r0: u64, mut r1: u64) -> Self {
         debug_assert!(r0 >= r1);
diff --git a/src/algorithms/mod.rs b/src/algorithms/mod.rs
index 08b69d3f..42232dba 100644
--- a/src/algorithms/mod.rs
+++ b/src/algorithms/mod.rs
@@ -6,11 +6,13 @@
 mod div;
 mod gcd;
 mod mul;
+mod mul_redc;
 
 pub use self::{
     div::div_rem,
     gcd::{gcd, gcd_extended, inv_mod, LehmerMatrix},
     mul::{mul, mul_inline},
+    mul_redc::mul_redc,
 };
 
 #[cfg(feature = "bench")]
diff --git a/src/algorithms/mul_redc.rs b/src/algorithms/mul_redc.rs
new file mode 100644
index 00000000..0ba794eb
--- /dev/null
+++ b/src/algorithms/mul_redc.rs
@@ -0,0 +1,68 @@
+use super::mul;
+use core::iter::zip;
+
+/// See Handbook of Applied Cryptography, Algorithm 14.32, p. 601.
+pub fn mul_redc(a: &[u64], b: &[u64], result: &mut [u64], m: &[u64], inv: u64) {
+    debug_assert!(!m.is_empty());
+    debug_assert_eq!(a.len(), m.len());
+    debug_assert_eq!(b.len(), m.len());
+    debug_assert_eq!(result.len(), m.len());
+    debug_assert_eq!(inv.wrapping_mul(m[0]), u64::MAX);
+
+    // Compute temp full product.
+    // OPT: Do combined multiplication and reduction.
+    let mut temp = vec![0; 2 * m.len() + 1];
+    mul(a, b, &mut temp);
+
+    // Reduce temp.
+    for i in 0..m.len() {
+        let u = temp[i].wrapping_mul(inv);
+
+        // REFACTOR: Create add_mul1 routine.
+        let mut carry = 0;
+        #[allow(clippy::cast_possible_truncation)] // Intentional
+        for j in 0..m.len() {
+            carry += u128::from(temp[i + j]) + u128::from(m[j]) * u128::from(u);
+            temp[i + j] = carry as u64;
+            carry >>= 64;
+        }
+        #[allow(clippy::cast_possible_truncation)] // Intentional
+        for j in m.len()..(temp.len() - i) {
+            carry += u128::from(temp[i + j]);
+            temp[i + j] = carry as u64;
+            carry >>= 64;
+        }
+        debug_assert!(carry == 0);
+    }
+    debug_assert!(temp[temp.len() - 1] <= 1); // Basically a carry flag.
+
+    // Copy result.
+    result.copy_from_slice(&temp[m.len()..2 * m.len()]);
+
+    // Subtract one more m if result >= m
+    let mut reduce = true;
+    // REFACTOR: Create cmp routine
+    if temp[temp.len() - 1] == 0 {
+        for (r, m) in zip(result.iter().rev(), m.iter().rev()) {
+            if r < m {
+                reduce = false;
+                break;
+            }
+            if r > m {
+                break;
+            }
+        }
+    }
+    if reduce {
+        // REFACTOR: Create sub routine
+        let mut carry = 0;
+        #[allow(clippy::cast_possible_truncation)] // Intentional
+        #[allow(clippy::cast_sign_loss)] // Intentional
+        for (r, m) in zip(result.iter_mut(), m.iter().copied()) {
+            carry += i128::from(*r) - i128::from(m);
+            *r = carry as u64;
+            carry >>= 64; // Sign extending shift
+        }
+        debug_assert!(carry == 0 || temp[temp.len() - 1] == 1);
+    }
+}
diff --git a/src/algorithms/primaility.rs b/src/algorithms/primaility.rs
new file mode 100644
index 00000000..0fe03ae1
--- /dev/null
+++ b/src/algorithms/primaility.rs
@@ -0,0 +1,25 @@
+// Product of primes up to and including 47.
+const SMALL_PRIMES: u64 = 614889782588491410;
+
+/// Miller-Rabin primality test
+///
+/// See <https://en.wikipedia.org/wiki/Miller%E2%80%93Rabin_primality_test>
+pub fn miller_rabin(n: u64, base: u64) -> bool {
+    todo!{}
+}
+
+/// Exact 64 bit primality test
+pub fn is_prime(n: u64) -> bool {
+    // Sufficient set of bases for `u64`
+    // See <https://en.wikipedia.org/wiki/Miller%E2%80%93Rabin_primality_test#Testing_against_small_sets_of_bases>
+    // See <https://miller-rabin.appspot.com/>
+    // OPT: This method <https://www.techneon.com/> ?
+    // OPT: Combined basis srp
+    miller_rabin(n, 2) &&
+    miller_rabin(n, 325) &&
+    miller_rabin(n, 9375) &&
+    miller_rabin(n, 28178) &&
+    miller_rabin(n, 450775) &&
+    miller_rabin(n, 9780504) &&
+    miller_rabin(n, 1795265022)
+}
diff --git a/src/modular.rs b/src/modular.rs
index 6d82b707..4c8e64e5 100644
--- a/src/modular.rs
+++ b/src/modular.rs
@@ -45,6 +45,9 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
     /// Compute $\mod{\mathtt{self} ⋅ \mathtt{rhs}}_{\mathtt{modulus}}$.
     ///
     /// Returns zero if the modulus is zero.
+    ///
+    /// See [`mul_redc`](Self::mul_redc) for a faster variant at the cost of
+    /// some pre-computation.
     #[must_use]
     pub fn mul_mod(self, rhs: Self, mut modulus: Self) -> Self {
         if modulus == Self::ZERO {
@@ -100,14 +103,71 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
     pub fn inv_mod(self, modulus: Self) -> Option<Self> {
         algorithms::inv_mod(self, modulus)
     }
+
+    #[allow(clippy::doc_markdown)] // False positive
+    /// Montgomery multiplication.
+    ///
+    /// Computes
+    ///
+    /// $$
+    /// \mod{\frac{\mathtt{self} ⋅ \mathtt{other}}{ 2^{64 ·
+    /// \mathtt{LIMBS}}}}_{\mathtt{modulus}} $$
+    ///
+    /// This is useful because it can be computed notably faster than
+    /// [`mul_mod`](Self::mul_mod). Many computations can be done by
+    /// pre-multiplying values with $R = 2^{64 · \mathtt{LIMBS}}$
+    /// and then using [`mul_redc`](Self::mul_redc) instead of
+    /// [`mul_mod`](Self::mul_mod).
+    ///
+    /// For this algorithm to work, it needs an extra parameter `inv` which must
+    /// be set to
+    ///
+    /// $$
+    /// \mathtt{inv} = \mod{\frac{-1}{\mathtt{modulus}} }_{2^{64}}
+    /// $$
+    ///
+    /// The `inv` value only exists for odd values of `modulus`. It can be
+    /// computed using [`inv_ring`](Self::inv_ring) from `U64`.
+    ///
+    /// ```
+    /// # use ruint::{uint, Uint, aliases::*};
+    /// # uint!{
+    /// # let modulus = 21888242871839275222246405745257275088548364400416034343698204186575808495617_U256;
+    /// let inv = (-U64::from(modulus.as_limbs()[0]).inv_ring().unwrap()).as_limbs()[0];
+    /// # assert_eq!(inv.wrapping_mul(modulus.as_limbs()[0]), u64::MAX);
+    /// # assert_eq!(inv, 0xc2e1f593efffffff);
+    /// # }
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if `inv` is not correct.
+    // TODO: Improve the conversion dev-ex.
+    #[must_use]
+    pub fn mul_redc(self, other: Self, modulus: Self, inv: u64) -> Self {
+        if BITS == 0 {
+            return Self::ZERO;
+        }
+        assert_eq!(inv.wrapping_mul(modulus.limbs[0]), u64::MAX);
+        let mut result = Self::ZERO;
+        algorithms::mul_redc(
+            &self.limbs,
+            &other.limbs,
+            &mut result.limbs,
+            &modulus.limbs,
+            inv,
+        );
+        debug_assert!(result < modulus);
+        result
+    }
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::{const_for, nlimbs};
+    use crate::{aliases::U64, const_for, nlimbs};
     use core::cmp::min;
-    use proptest::{proptest, test_runner::Config};
+    use proptest::{prop_assume, proptest, test_runner::Config};
 
     #[test]
     fn test_commutative() {
@@ -205,6 +265,29 @@ mod tests {
             });
         });
     }
+
+    #[test]
+    fn test_mul_redc() {
+        const_for!(BITS in NON_ZERO if (BITS >= 16) {
+            const LIMBS: usize = nlimbs(BITS);
+            type U = Uint<BITS, LIMBS>;
+            proptest!(|(a: U, b: U, m: U)| {
+                prop_assume!(m >= U::from(2));
+                if let Some(inv) = U64::from(m.as_limbs()[0]).inv_ring() {
+                    let inv = (-inv).as_limbs()[0];
+
+                    let r = U::from(2).pow_mod(U::from(64 * LIMBS), m);
+                    let ar = a.mul_mod(r, m);
+                    let br = b.mul_mod(r, m);
+                    // TODO: Test for larger (>= m) values of a, b.
+
+                    let expected = a.mul_mod(b, m).mul_mod(r, m);
+
+                    assert_eq!(ar.mul_redc(br, m, inv), expected);
+                }
+            });
+        });
+    }
 }
 
 #[cfg(feature = "bench")]
diff --git a/src/mul.rs b/src/mul.rs
index 4ccda6b6..cd1608bc 100644
--- a/src/mul.rs
+++ b/src/mul.rs
@@ -68,7 +68,7 @@ impl<const BITS: usize, const LIMBS: usize> Uint<BITS, LIMBS> {
     /// Computes the inverse modulo $2^{\mathtt{BITS}}$ of `self`, returning
     /// [`None`] if the inverse does not exist.
     #[must_use]
-    pub fn ring_inverse(self) -> Option<Self> {
+    pub fn inv_ring(self) -> Option<Self> {
         if BITS == 0 || self.limbs[0] & 1 == 0 {
             return None;
         }
@@ -227,8 +227,8 @@ mod tests {
             type U = Uint<BITS, LIMBS>;
             proptest!(|(mut a: U)| {
                 a |= U::from(1); // Make sure a is invertible
-                assert_eq!(a * a.ring_inverse().unwrap(), U::from(1));
-                assert_eq!(a.ring_inverse().unwrap().ring_inverse().unwrap(), a);
+                assert_eq!(a * a.inv_ring().unwrap(), U::from(1));
+                assert_eq!(a.inv_ring().unwrap().inv_ring().unwrap(), a);
             });
         });
     }