rust-lang · Amanieu · Jul 24, 2024 · Jun 21, 2024 · Jul 16, 2024 · tgross35
diff --git a/README.md b/README.md
@@ -157,6 +157,9 @@ rely on CI.
 - [x] bswapdi2.c
 - [x] bswapsi2.c
 - [x] bswapti2.c
+- [x] clzdi2.c
+- [x] clzsi2.c
+- [x] clzti2.c
 - [x] comparedf2.c
 - [x] comparesf2.c
 - [x] divdf3.c
@@ -325,9 +328,6 @@ These builtins are never called by LLVM.
 - ~~arm/switch32.S~~
 - ~~arm/switch8.S~~
 - ~~arm/switchu8.S~~
-- ~~clzdi2.c~~
-- ~~clzsi2.c~~
-- ~~clzti2.c~~
 - ~~cmpdi2.c~~
 - ~~cmpti2.c~~
 - ~~ctzdi2.c~~

diff --git a/build.rs b/build.rs
@@ -164,7 +164,6 @@ fn configure_check_cfg() {
         "__bswapsi2",
         "__bswapdi2",
         "__bswapti2",
-        "__clzsi2",
         "__divdi3",
         "__divsi3",
         "__divmoddi4",
@@ -345,8 +344,6 @@ mod c {
             ("__absvsi2", "absvsi2.c"),
             ("__addvdi3", "addvdi3.c"),
             ("__addvsi3", "addvsi3.c"),
-            ("__clzdi2", "clzdi2.c"),
-            ("__clzsi2", "clzsi2.c"),
             ("__cmpdi2", "cmpdi2.c"),
             ("__ctzdi2", "ctzdi2.c"),
             ("__ctzsi2", "ctzsi2.c"),
@@ -382,7 +379,6 @@ mod c {
             sources.extend(&[
                 ("__absvti2", "absvti2.c"),
                 ("__addvti3", "addvti3.c"),
-                ("__clzti2", "clzti2.c"),
 sources.extend(&[("__clzdi2", "clzdi2.c"), ("__clzsi2", "clzsi2.c")]) 
 sources.extend(&[("__clzdi2", "clzdi2.c"), ("__clzsi2", "clzsi2.c")]) 
                 ("__cmpti2", "cmpti2.c"),
                 ("__ctzti2", "ctzti2.c"),
                 ("__ffsti2", "ffsti2.c"),
@@ -435,8 +431,6 @@ mod c {
                 ("__aeabi_frsub", "arm/aeabi_frsub.c"),
                 ("__bswapdi2", "arm/bswapdi2.S"),
                 ("__bswapsi2", "arm/bswapsi2.S"),
-                ("__clzdi2", "arm/clzdi2.S"),
-                ("__clzsi2", "arm/clzsi2.S"),
                 ("__divmodsi4", "arm/divmodsi4.S"),
                 ("__divsi3", "arm/divsi3.S"),
                 ("__modsi3", "arm/modsi3.S"),
@@ -572,9 +566,6 @@ mod c {
                 }
             }
             sources.remove(&to_remove);
-
-            // But use some generic implementations where possible
-            sources.extend(&[("__clzdi2", "clzdi2.c"), ("__clzsi2", "clzsi2.c")])
         }
 
         if llvm_target[0] == "thumbv7m" || llvm_target[0] == "thumbv7em" {

diff --git a/src/int/leading_zeros.rs b/src/int/leading_zeros.rs
@@ -3,10 +3,12 @@
 // adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`.
 // Compilers will insert the check for zero in cases where it is needed.
 
+use crate::int::{CastInto, Int};
+
 public_test_dep! {
 /// Returns the number of leading binary zeros in `x`.
 #[allow(dead_code)]
-pub(crate) fn usize_leading_zeros_default(x: usize) -> usize {
+pub(crate) fn leading_zeros_default<T: Int + CastInto<usize>>(x: T) -> usize {
     // The basic idea is to test if the higher bits of `x` are zero and bisect the number
     // of leading zeros. It is possible for all branches of the bisection to use the same
     // code path by conditionally shifting the higher parts down to let the next bisection
@@ -16,46 +18,47 @@ pub(crate) fn usize_leading_zeros_default(x: usize) -> usize {
     // because it simplifies the final bisection step.
     let mut x = x;
     // the number of potential leading zeros
-    let mut z = usize::MAX.count_ones() as usize;
+    let mut z = T::BITS as usize;
     // a temporary
-    let mut t: usize;
-    #[cfg(target_pointer_width = "64")]
-    {
+    let mut t: T;
+
+    const { assert!(T::BITS <= 64) };
+    if T::BITS >= 64 {
         t = x >> 32;
-        if t != 0 {
+        if t != T::ZERO {
             z -= 32;
             x = t;
         }
     }
-    #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
-    {
+    if T::BITS >= 32 {
         t = x >> 16;
-        if t != 0 {
+        if t != T::ZERO {
             z -= 16;
             x = t;
         }
     }
+    const { assert!(T::BITS >= 16) };
     t = x >> 8;
-    if t != 0 {
+    if t != T::ZERO {
         z -= 8;
         x = t;
     }
     t = x >> 4;
-    if t != 0 {
+    if t != T::ZERO {
         z -= 4;
         x = t;
     }
     t = x >> 2;
-    if t != 0 {
+    if t != T::ZERO {
         z -= 2;
         x = t;
     }
     // the last two bisections are combined into one conditional
     t = x >> 1;
-    if t != 0 {
+    if t != T::ZERO {
         z - 2
     } else {
-        z - x
+        z - x.cast()
     }
 
     // We could potentially save a few cycles by using the LUT trick from
@@ -80,12 +83,12 @@ pub(crate) fn usize_leading_zeros_default(x: usize) -> usize {
 public_test_dep! {
 /// Returns the number of leading binary zeros in `x`.
 #[allow(dead_code)]
-pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize {
+pub(crate) fn leading_zeros_riscv<T: Int + CastInto<usize>>(x: T) -> usize {
     let mut x = x;
     // the number of potential leading zeros
-    let mut z = usize::MAX.count_ones() as usize;
+    let mut z = T::BITS;
     // a temporary
-    let mut t: usize;
+    let mut t: u32;
 
     // RISC-V does not have a set-if-greater-than-or-equal instruction and
     // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is
@@ -95,55 +98,68 @@ pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize {
     // right). If we try to save an instruction by using `x < imm` for each bisection, we
     // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`,
     // but the immediate will never fit into 12 bits and never save an instruction.
-    #[cfg(target_pointer_width = "64")]
-    {
+    const { assert!(T::BITS <= 64) };
+    if T::BITS >= 64 {
         // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise
         // `t` is set to 0.
-        t = ((x >= (1 << 32)) as usize) << 5;
+        t = ((x >= (T::ONE << 32)) as u32) << 5;
         // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the
         // next step to process.
         x >>= t;
         // If `t` was set to `1 << 5`, then we subtract 32 from the number of potential
         // leading zeros
         z -= t;
     }
-    #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
-    {
-        t = ((x >= (1 << 16)) as usize) << 4;
+    if T::BITS >= 32 {
+        t = ((x >= (T::ONE << 16)) as u32) << 4;
         x >>= t;
         z -= t;
     }
-    t = ((x >= (1 << 8)) as usize) << 3;
+    const { assert!(T::BITS >= 16) };
+    t = ((x >= (T::ONE << 8)) as u32) << 3;
     x >>= t;
     z -= t;
-    t = ((x >= (1 << 4)) as usize) << 2;
+    t = ((x >= (T::ONE << 4)) as u32) << 2;
     x >>= t;
     z -= t;
-    t = ((x >= (1 << 2)) as usize) << 1;
+    t = ((x >= (T::ONE << 2)) as u32) << 1;
     x >>= t;
     z -= t;
-    t = (x >= (1 << 1)) as usize;
+    t = (x >= (T::ONE << 1)) as u32;
     x >>= t;
     z -= t;
     // All bits except the LSB are guaranteed to be zero for this final bisection step.
     // If `x != 0` then `x == 1` and subtracts one potential zero from `z`.
-    z - x
+    z as usize - x.cast()
 }
 }
 
 intrinsics! {
-    #[maybe_use_optimized_c_shim]
-    #[cfg(any(
-        target_pointer_width = "16",
-        target_pointer_width = "32",
-        target_pointer_width = "64"
-    ))]
-    /// Returns the number of leading binary zeros in `x`.
-    pub extern "C" fn __clzsi2(x: usize) -> usize {
+    /// Returns the number of leading binary zeros in `x`
+    pub extern "C" fn __clzsi2(x: u32) -> usize {
         if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) {
-            usize_leading_zeros_riscv(x)
+            leading_zeros_riscv(x)
+        } else {
+            leading_zeros_default(x)
+        }
+    }
+
+    /// Returns the number of leading binary zeros in `x`
+    pub extern "C" fn __clzdi2(x: u64) -> usize {
+        if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) {
+            leading_zeros_riscv(x)
+        } else {
+            leading_zeros_default(x)
+        }
+    }
+
+    /// Returns the number of leading binary zeros in `x`
+    pub extern "C" fn __clzti2(x: u128) -> usize {
+        let hi = (x >> 64) as u64;
+        if hi == 0 {
+            64 + __clzdi2(x as u64)
         } else {
-            usize_leading_zeros_default(x)
+            __clzdi2(hi)
         }
     }
 }
diff --git a/src/int/mod.rs b/src/int/mod.rs
@@ -12,7 +12,6 @@ pub mod shift;
 pub mod udiv;
 
 pub use big::{i256, u256};
-pub use leading_zeros::__clzsi2;
 
 public_test_dep! {
 /// Minimal integer implementations needed on all integer types, including wide integers.

diff --git a/testcrate/tests/misc.rs b/testcrate/tests/misc.rs
@@ -65,31 +65,70 @@ fn fuzz_values() {
 
 #[test]
 fn leading_zeros() {
-    use compiler_builtins::int::__clzsi2;
-    use compiler_builtins::int::leading_zeros::{
-        usize_leading_zeros_default, usize_leading_zeros_riscv,
-    };
-    fuzz(N, |x: usize| {
-        let lz = x.leading_zeros() as usize;
-        let lz0 = __clzsi2(x);
-        let lz1 = usize_leading_zeros_default(x);
-        let lz2 = usize_leading_zeros_riscv(x);
-        if lz0 != lz {
-            panic!("__clzsi2({}): std: {}, builtins: {}", x, lz, lz0);
-        }
-        if lz1 != lz {
-            panic!(
-                "usize_leading_zeros_default({}): std: {}, builtins: {}",
-                x, lz, lz1
-            );
-        }
-        if lz2 != lz {
-            panic!(
-                "usize_leading_zeros_riscv({}): std: {}, builtins: {}",
-                x, lz, lz2
-            );
-        }
-    })
+    use compiler_builtins::int::leading_zeros::{leading_zeros_default, leading_zeros_riscv};
+    {
+        use compiler_builtins::int::leading_zeros::__clzsi2;
+        fuzz(N, |x: u32| {
+            if x == 0 {
+                return; // undefined value for an intrinsic
+            }
+            let lz = x.leading_zeros() as usize;
+            let lz0 = __clzsi2(x);
+            let lz1 = leading_zeros_default(x);
+            let lz2 = leading_zeros_riscv(x);
+            if lz0 != lz {
+                panic!("__clzsi2({}): std: {}, builtins: {}", x, lz, lz0);
+            }
+            if lz1 != lz {
+                panic!(
+                    "leading_zeros_default({}): std: {}, builtins: {}",
+                    x, lz, lz1
+                );
+            }
+            if lz2 != lz {
+                panic!("leading_zeros_riscv({}): std: {}, builtins: {}", x, lz, lz2);
+            }
+        });
+    }
+
+    {
+        use compiler_builtins::int::leading_zeros::__clzdi2;
+        fuzz(N, |x: u64| {
+            if x == 0 {
+                return; // undefined value for an intrinsic
+            }
+            let lz = x.leading_zeros() as usize;
+            let lz0 = __clzdi2(x);
+            let lz1 = leading_zeros_default(x);
+            let lz2 = leading_zeros_riscv(x);
+            if lz0 != lz {
+                panic!("__clzdi2({}): std: {}, builtins: {}", x, lz, lz0);
+            }
+            if lz1 != lz {
+                panic!(
+                    "leading_zeros_default({}): std: {}, builtins: {}",
+                    x, lz, lz1
+                );
+            }
+            if lz2 != lz {
+                panic!("leading_zeros_riscv({}): std: {}, builtins: {}", x, lz, lz2);
+            }
+        });
+    }
+
+    {
+        use compiler_builtins::int::leading_zeros::__clzti2;
+        fuzz(N, |x: u128| {
+            if x == 0 {
+                return; // undefined value for an intrinsic
+            }
+            let lz = x.leading_zeros() as usize;
+            let lz0 = __clzti2(x);
+            if lz0 != lz {
+                panic!("__clzti2({}): std: {}, builtins: {}", x, lz, lz0);
+            }
+        });
+    }
 }
 
 #[test]