-
Notifications
You must be signed in to change notification settings - Fork 229
Implement remaining __clz*i2 intrinsics #639
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,10 +3,12 @@ | |
// adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`. | ||
// Compilers will insert the check for zero in cases where it is needed. | ||
|
||
use crate::int::{CastInto, Int}; | ||
|
||
public_test_dep! { | ||
/// Returns the number of leading binary zeros in `x`. | ||
#[allow(dead_code)] | ||
pub(crate) fn usize_leading_zeros_default(x: usize) -> usize { | ||
pub(crate) fn leading_zeros_default<T: Int + CastInto<usize>>(x: T) -> usize { | ||
// The basic idea is to test if the higher bits of `x` are zero and bisect the number | ||
// of leading zeros. It is possible for all branches of the bisection to use the same | ||
// code path by conditionally shifting the higher parts down to let the next bisection | ||
|
@@ -16,46 +18,47 @@ pub(crate) fn usize_leading_zeros_default(x: usize) -> usize { | |
// because it simplifies the final bisection step. | ||
let mut x = x; | ||
// the number of potential leading zeros | ||
let mut z = usize::MAX.count_ones() as usize; | ||
let mut z = T::BITS as usize; | ||
// a temporary | ||
let mut t: usize; | ||
#[cfg(target_pointer_width = "64")] | ||
{ | ||
let mut t: T; | ||
|
||
const { assert!(T::BITS <= 64) }; | ||
if T::BITS >= 64 { | ||
Comment on lines
+25
to
+26
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Optional nit: the second condition could become There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd rather leave this as is, as it is more future-proof (in case any 128 bit implementations would actually pop up, and whoever applies 128 bit part happens to be both incredibly careless and doesn't test anything...) More seriously though, result of the expression is a compile-time constant so it doesn't matter at all which operator to use here, and >= is more consistent with 32 bit part down below. |
||
t = x >> 32; | ||
if t != 0 { | ||
if t != T::ZERO { | ||
z -= 32; | ||
x = t; | ||
} | ||
} | ||
#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] | ||
{ | ||
if T::BITS >= 32 { | ||
t = x >> 16; | ||
if t != 0 { | ||
if t != T::ZERO { | ||
z -= 16; | ||
x = t; | ||
} | ||
} | ||
const { assert!(T::BITS >= 16) }; | ||
t = x >> 8; | ||
if t != 0 { | ||
if t != T::ZERO { | ||
z -= 8; | ||
x = t; | ||
} | ||
t = x >> 4; | ||
if t != 0 { | ||
if t != T::ZERO { | ||
z -= 4; | ||
x = t; | ||
} | ||
t = x >> 2; | ||
if t != 0 { | ||
if t != T::ZERO { | ||
z -= 2; | ||
x = t; | ||
} | ||
// the last two bisections are combined into one conditional | ||
t = x >> 1; | ||
if t != 0 { | ||
if t != T::ZERO { | ||
z - 2 | ||
} else { | ||
z - x | ||
z - x.cast() | ||
} | ||
|
||
// We could potentially save a few cycles by using the LUT trick from | ||
|
@@ -80,12 +83,12 @@ pub(crate) fn usize_leading_zeros_default(x: usize) -> usize { | |
public_test_dep! { | ||
/// Returns the number of leading binary zeros in `x`. | ||
#[allow(dead_code)] | ||
pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize { | ||
pub(crate) fn leading_zeros_riscv<T: Int + CastInto<usize>>(x: T) -> usize { | ||
let mut x = x; | ||
// the number of potential leading zeros | ||
let mut z = usize::MAX.count_ones() as usize; | ||
let mut z = T::BITS; | ||
// a temporary | ||
let mut t: usize; | ||
let mut t: u32; | ||
|
||
// RISC-V does not have a set-if-greater-than-or-equal instruction and | ||
// `(x >= power-of-two) as usize` will get compiled into two instructions, but this is | ||
|
@@ -95,55 +98,68 @@ pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize { | |
// right). If we try to save an instruction by using `x < imm` for each bisection, we | ||
// have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`, | ||
// but the immediate will never fit into 12 bits and never save an instruction. | ||
#[cfg(target_pointer_width = "64")] | ||
{ | ||
const { assert!(T::BITS <= 64) }; | ||
if T::BITS >= 64 { | ||
// If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise | ||
// `t` is set to 0. | ||
t = ((x >= (1 << 32)) as usize) << 5; | ||
t = ((x >= (T::ONE << 32)) as u32) << 5; | ||
// If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the | ||
// next step to process. | ||
x >>= t; | ||
// If `t` was set to `1 << 5`, then we subtract 32 from the number of potential | ||
// leading zeros | ||
z -= t; | ||
} | ||
#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] | ||
{ | ||
t = ((x >= (1 << 16)) as usize) << 4; | ||
if T::BITS >= 32 { | ||
t = ((x >= (T::ONE << 16)) as u32) << 4; | ||
x >>= t; | ||
z -= t; | ||
} | ||
t = ((x >= (1 << 8)) as usize) << 3; | ||
const { assert!(T::BITS >= 16) }; | ||
t = ((x >= (T::ONE << 8)) as u32) << 3; | ||
x >>= t; | ||
z -= t; | ||
t = ((x >= (1 << 4)) as usize) << 2; | ||
t = ((x >= (T::ONE << 4)) as u32) << 2; | ||
x >>= t; | ||
z -= t; | ||
t = ((x >= (1 << 2)) as usize) << 1; | ||
t = ((x >= (T::ONE << 2)) as u32) << 1; | ||
x >>= t; | ||
z -= t; | ||
t = (x >= (1 << 1)) as usize; | ||
t = (x >= (T::ONE << 1)) as u32; | ||
x >>= t; | ||
z -= t; | ||
// All bits except the LSB are guaranteed to be zero for this final bisection step. | ||
// If `x != 0` then `x == 1` and subtracts one potential zero from `z`. | ||
z - x | ||
z as usize - x.cast() | ||
} | ||
} | ||
|
||
intrinsics! { | ||
#[maybe_use_optimized_c_shim] | ||
#[cfg(any( | ||
target_pointer_width = "16", | ||
target_pointer_width = "32", | ||
target_pointer_width = "64" | ||
))] | ||
/// Returns the number of leading binary zeros in `x`. | ||
pub extern "C" fn __clzsi2(x: usize) -> usize { | ||
/// Returns the number of leading binary zeros in `x` | ||
pub extern "C" fn __clzsi2(x: u32) -> usize { | ||
if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { | ||
usize_leading_zeros_riscv(x) | ||
leading_zeros_riscv(x) | ||
} else { | ||
leading_zeros_default(x) | ||
} | ||
} | ||
|
||
/// Returns the number of leading binary zeros in `x` | ||
pub extern "C" fn __clzdi2(x: u64) -> usize { | ||
if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { | ||
leading_zeros_riscv(x) | ||
} else { | ||
leading_zeros_default(x) | ||
} | ||
} | ||
|
||
/// Returns the number of leading binary zeros in `x` | ||
pub extern "C" fn __clzti2(x: u128) -> usize { | ||
let hi = (x >> 64) as u64; | ||
if hi == 0 { | ||
64 + __clzdi2(x as u64) | ||
} else { | ||
usize_leading_zeros_default(x) | ||
__clzdi2(hi) | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you could also remove
__clzdi2
and__clzsi2
on lines 349-350 now that these aren't broken. Possiblycompiler-builtins/build.rs
Line 577 in 2bf0425
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wasn't sure what's the policy on these. Is it "only apply C replacements for broken/missing functions"?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That is accurate, part of the goal of this crate is to be able to target some platforms without needing a C toolchain. I think that usually once something gets ported over and tested, it can be removed from the C sources lists.
I guess maybe we only used the C implementations on thumb because ours was broken?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
Aarch64 CI is down because of new rustc I guess (0/1 asm labels are now off limits instead of just discouraged). This particular thing about numeric labels amazes me so much... This is due to llvm bug, but instead of dealing with that people decided to document it in rust book (so I guess it is a feature now) and make it a compile-time error.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think it's an easy llvm bug to fix unfortunately. That lint should almost certainly be x86-only though, where LLVM was already throwing an error and the lint just makes it a more accurate error. I brought it up.
You can add
to the top of
src/aarch64_linux.rs
just to get that to pass.