diff --git a/README.md b/README.md index a24c12739..6ff0de190 100644 --- a/README.md +++ b/README.md @@ -157,6 +157,9 @@ rely on CI. - [x] bswapdi2.c - [x] bswapsi2.c - [x] bswapti2.c +- [x] clzdi2.c +- [x] clzsi2.c +- [x] clzti2.c - [x] comparedf2.c - [x] comparesf2.c - [x] divdf3.c @@ -325,9 +328,6 @@ These builtins are never called by LLVM. - ~~arm/switch32.S~~ - ~~arm/switch8.S~~ - ~~arm/switchu8.S~~ -- ~~clzdi2.c~~ -- ~~clzsi2.c~~ -- ~~clzti2.c~~ - ~~cmpdi2.c~~ - ~~cmpti2.c~~ - ~~ctzdi2.c~~ diff --git a/build.rs b/build.rs index 34467d8f5..b8da7cc56 100644 --- a/build.rs +++ b/build.rs @@ -164,7 +164,6 @@ fn configure_check_cfg() { "__bswapsi2", "__bswapdi2", "__bswapti2", - "__clzsi2", "__divdi3", "__divsi3", "__divmoddi4", @@ -345,8 +344,6 @@ mod c { ("__absvsi2", "absvsi2.c"), ("__addvdi3", "addvdi3.c"), ("__addvsi3", "addvsi3.c"), - ("__clzdi2", "clzdi2.c"), - ("__clzsi2", "clzsi2.c"), ("__cmpdi2", "cmpdi2.c"), ("__ctzdi2", "ctzdi2.c"), ("__ctzsi2", "ctzsi2.c"), @@ -382,7 +379,6 @@ mod c { sources.extend(&[ ("__absvti2", "absvti2.c"), ("__addvti3", "addvti3.c"), - ("__clzti2", "clzti2.c"), ("__cmpti2", "cmpti2.c"), ("__ctzti2", "ctzti2.c"), ("__ffsti2", "ffsti2.c"), @@ -435,8 +431,6 @@ mod c { ("__aeabi_frsub", "arm/aeabi_frsub.c"), ("__bswapdi2", "arm/bswapdi2.S"), ("__bswapsi2", "arm/bswapsi2.S"), - ("__clzdi2", "arm/clzdi2.S"), - ("__clzsi2", "arm/clzsi2.S"), ("__divmodsi4", "arm/divmodsi4.S"), ("__divsi3", "arm/divsi3.S"), ("__modsi3", "arm/modsi3.S"), @@ -572,9 +566,6 @@ mod c { } } sources.remove(&to_remove); - - // But use some generic implementations where possible - sources.extend(&[("__clzdi2", "clzdi2.c"), ("__clzsi2", "clzsi2.c")]) } if llvm_target[0] == "thumbv7m" || llvm_target[0] == "thumbv7em" { diff --git a/src/int/leading_zeros.rs b/src/int/leading_zeros.rs index 9e60ab0d7..1fee9fcf5 100644 --- a/src/int/leading_zeros.rs +++ b/src/int/leading_zeros.rs @@ -3,10 +3,12 @@ // adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`. // Compilers will insert the check for zero in cases where it is needed. +use crate::int::{CastInto, Int}; + public_test_dep! { /// Returns the number of leading binary zeros in `x`. #[allow(dead_code)] -pub(crate) fn usize_leading_zeros_default(x: usize) -> usize { +pub(crate) fn leading_zeros_default>(x: T) -> usize { // The basic idea is to test if the higher bits of `x` are zero and bisect the number // of leading zeros. It is possible for all branches of the bisection to use the same // code path by conditionally shifting the higher parts down to let the next bisection @@ -16,46 +18,47 @@ pub(crate) fn usize_leading_zeros_default(x: usize) -> usize { // because it simplifies the final bisection step. let mut x = x; // the number of potential leading zeros - let mut z = usize::MAX.count_ones() as usize; + let mut z = T::BITS as usize; // a temporary - let mut t: usize; - #[cfg(target_pointer_width = "64")] - { + let mut t: T; + + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { t = x >> 32; - if t != 0 { + if t != T::ZERO { z -= 32; x = t; } } - #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] - { + if T::BITS >= 32 { t = x >> 16; - if t != 0 { + if t != T::ZERO { z -= 16; x = t; } } + const { assert!(T::BITS >= 16) }; t = x >> 8; - if t != 0 { + if t != T::ZERO { z -= 8; x = t; } t = x >> 4; - if t != 0 { + if t != T::ZERO { z -= 4; x = t; } t = x >> 2; - if t != 0 { + if t != T::ZERO { z -= 2; x = t; } // the last two bisections are combined into one conditional t = x >> 1; - if t != 0 { + if t != T::ZERO { z - 2 } else { - z - x + z - x.cast() } // We could potentially save a few cycles by using the LUT trick from @@ -80,12 +83,12 @@ pub(crate) fn usize_leading_zeros_default(x: usize) -> usize { public_test_dep! { /// Returns the number of leading binary zeros in `x`. #[allow(dead_code)] -pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize { +pub(crate) fn leading_zeros_riscv>(x: T) -> usize { let mut x = x; // the number of potential leading zeros - let mut z = usize::MAX.count_ones() as usize; + let mut z = T::BITS; // a temporary - let mut t: usize; + let mut t: u32; // RISC-V does not have a set-if-greater-than-or-equal instruction and // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is @@ -95,11 +98,11 @@ pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize { // right). If we try to save an instruction by using `x < imm` for each bisection, we // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`, // but the immediate will never fit into 12 bits and never save an instruction. - #[cfg(target_pointer_width = "64")] - { + const { assert!(T::BITS <= 64) }; + if T::BITS >= 64 { // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise // `t` is set to 0. - t = ((x >= (1 << 32)) as usize) << 5; + t = ((x >= (T::ONE << 32)) as u32) << 5; // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the // next step to process. x >>= t; @@ -107,43 +110,56 @@ pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize { // leading zeros z -= t; } - #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] - { - t = ((x >= (1 << 16)) as usize) << 4; + if T::BITS >= 32 { + t = ((x >= (T::ONE << 16)) as u32) << 4; x >>= t; z -= t; } - t = ((x >= (1 << 8)) as usize) << 3; + const { assert!(T::BITS >= 16) }; + t = ((x >= (T::ONE << 8)) as u32) << 3; x >>= t; z -= t; - t = ((x >= (1 << 4)) as usize) << 2; + t = ((x >= (T::ONE << 4)) as u32) << 2; x >>= t; z -= t; - t = ((x >= (1 << 2)) as usize) << 1; + t = ((x >= (T::ONE << 2)) as u32) << 1; x >>= t; z -= t; - t = (x >= (1 << 1)) as usize; + t = (x >= (T::ONE << 1)) as u32; x >>= t; z -= t; // All bits except the LSB are guaranteed to be zero for this final bisection step. // If `x != 0` then `x == 1` and subtracts one potential zero from `z`. - z - x + z as usize - x.cast() } } intrinsics! { - #[maybe_use_optimized_c_shim] - #[cfg(any( - target_pointer_width = "16", - target_pointer_width = "32", - target_pointer_width = "64" - ))] - /// Returns the number of leading binary zeros in `x`. - pub extern "C" fn __clzsi2(x: usize) -> usize { + /// Returns the number of leading binary zeros in `x` + pub extern "C" fn __clzsi2(x: u32) -> usize { if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { - usize_leading_zeros_riscv(x) + leading_zeros_riscv(x) + } else { + leading_zeros_default(x) + } + } + + /// Returns the number of leading binary zeros in `x` + pub extern "C" fn __clzdi2(x: u64) -> usize { + if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { + leading_zeros_riscv(x) + } else { + leading_zeros_default(x) + } + } + + /// Returns the number of leading binary zeros in `x` + pub extern "C" fn __clzti2(x: u128) -> usize { + let hi = (x >> 64) as u64; + if hi == 0 { + 64 + __clzdi2(x as u64) } else { - usize_leading_zeros_default(x) + __clzdi2(hi) } } } diff --git a/src/int/mod.rs b/src/int/mod.rs index ddbffd740..728ce1d57 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -12,7 +12,6 @@ pub mod shift; pub mod udiv; pub use big::{i256, u256}; -pub use leading_zeros::__clzsi2; public_test_dep! { /// Minimal integer implementations needed on all integer types, including wide integers. diff --git a/testcrate/tests/misc.rs b/testcrate/tests/misc.rs index c19923b75..f830a5423 100644 --- a/testcrate/tests/misc.rs +++ b/testcrate/tests/misc.rs @@ -65,31 +65,70 @@ fn fuzz_values() { #[test] fn leading_zeros() { - use compiler_builtins::int::__clzsi2; - use compiler_builtins::int::leading_zeros::{ - usize_leading_zeros_default, usize_leading_zeros_riscv, - }; - fuzz(N, |x: usize| { - let lz = x.leading_zeros() as usize; - let lz0 = __clzsi2(x); - let lz1 = usize_leading_zeros_default(x); - let lz2 = usize_leading_zeros_riscv(x); - if lz0 != lz { - panic!("__clzsi2({}): std: {}, builtins: {}", x, lz, lz0); - } - if lz1 != lz { - panic!( - "usize_leading_zeros_default({}): std: {}, builtins: {}", - x, lz, lz1 - ); - } - if lz2 != lz { - panic!( - "usize_leading_zeros_riscv({}): std: {}, builtins: {}", - x, lz, lz2 - ); - } - }) + use compiler_builtins::int::leading_zeros::{leading_zeros_default, leading_zeros_riscv}; + { + use compiler_builtins::int::leading_zeros::__clzsi2; + fuzz(N, |x: u32| { + if x == 0 { + return; // undefined value for an intrinsic + } + let lz = x.leading_zeros() as usize; + let lz0 = __clzsi2(x); + let lz1 = leading_zeros_default(x); + let lz2 = leading_zeros_riscv(x); + if lz0 != lz { + panic!("__clzsi2({}): std: {}, builtins: {}", x, lz, lz0); + } + if lz1 != lz { + panic!( + "leading_zeros_default({}): std: {}, builtins: {}", + x, lz, lz1 + ); + } + if lz2 != lz { + panic!("leading_zeros_riscv({}): std: {}, builtins: {}", x, lz, lz2); + } + }); + } + + { + use compiler_builtins::int::leading_zeros::__clzdi2; + fuzz(N, |x: u64| { + if x == 0 { + return; // undefined value for an intrinsic + } + let lz = x.leading_zeros() as usize; + let lz0 = __clzdi2(x); + let lz1 = leading_zeros_default(x); + let lz2 = leading_zeros_riscv(x); + if lz0 != lz { + panic!("__clzdi2({}): std: {}, builtins: {}", x, lz, lz0); + } + if lz1 != lz { + panic!( + "leading_zeros_default({}): std: {}, builtins: {}", + x, lz, lz1 + ); + } + if lz2 != lz { + panic!("leading_zeros_riscv({}): std: {}, builtins: {}", x, lz, lz2); + } + }); + } + + { + use compiler_builtins::int::leading_zeros::__clzti2; + fuzz(N, |x: u128| { + if x == 0 { + return; // undefined value for an intrinsic + } + let lz = x.leading_zeros() as usize; + let lz0 = __clzti2(x); + if lz0 != lz { + panic!("__clzti2({}): std: {}, builtins: {}", x, lz, lz0); + } + }); + } } #[test]