Skip to content

Commit

Permalink
Make some usize-typed masks definition agnostic to the size of usize
Browse files Browse the repository at this point in the history
Some masks where defined as
```rust
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
```
where it was assumed that `usize` is never wider than 64, which is currently true.

To make those constants valid in a hypothetical 128-bit target, these constants have been redefined in an `usize`-width-agnostic way
```rust
const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; size_of::<usize>()]);
```

There are already some cases where Rust anticipates the possibility of supporting 128-bit targets, such as not implementing `From<usize>` for `u64`.
  • Loading branch information
eduardosm committed Apr 15, 2022
1 parent e7575f9 commit 93ae6f8
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 13 deletions.
2 changes: 1 addition & 1 deletion library/core/benches/ascii/is_ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,6 @@ fn is_ascii_align_to_unrolled(bytes: &[u8]) -> bool {

#[inline]
fn contains_nonascii(v: usize) -> bool {
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; core::mem::size_of::<usize>()]);
(NONASCII_MASK & v) != 0
}
21 changes: 21 additions & 0 deletions library/core/src/num/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,27 @@ impl usize {
widening_impl! { usize, u128, 64, unsigned }
}

impl usize {
/// Returns an `usize` where every byte is equal to `x`.
#[inline]
pub(crate) const fn repeat_u8(x: u8) -> usize {
usize::from_ne_bytes([x; mem::size_of::<usize>()])
}

/// Returns an `usize` where every byte pair is equal to `x`.
#[inline]
pub(crate) const fn repeat_u16(x: u16) -> usize {
let mut r = 0usize;
let mut i = 0;
while i < mem::size_of::<usize>() {
// Use `wrapping_shl` to make it work on targets with 16-bit `usize`
r = r.wrapping_shl(16) | (x as usize);
i += 2;
}
r
}
}

/// A classification of floating point numbers.
///
/// This `enum` is used as the return type for [`f32::classify`] and [`f64::classify`]. See
Expand Down
2 changes: 1 addition & 1 deletion library/core/src/slice/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ impl<'a> fmt::Debug for EscapeAscii<'a> {
/// from `../str/mod.rs`, which does something similar for utf8 validation.
#[inline]
fn contains_nonascii(v: usize) -> bool {
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
const NONASCII_MASK: usize = usize::repeat_u8(0x80);
(NONASCII_MASK & v) != 0
}

Expand Down
8 changes: 2 additions & 6 deletions library/core/src/slice/memchr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,8 @@
use crate::cmp;
use crate::mem;

const LO_U64: u64 = 0x0101010101010101;
const HI_U64: u64 = 0x8080808080808080;

// Use truncation.
const LO_USIZE: usize = LO_U64 as usize;
const HI_USIZE: usize = HI_U64 as usize;
const LO_USIZE: usize = usize::repeat_u8(0x01);
const HI_USIZE: usize = usize::repeat_u8(0x80);
const USIZE_BYTES: usize = mem::size_of::<usize>();

/// Returns `true` if `x` contains any zero byte.
Expand Down
6 changes: 3 additions & 3 deletions library/core/src/str/count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,16 +112,16 @@ fn do_count_chars(s: &str) -> usize {
// true)
#[inline]
fn contains_non_continuation_byte(w: usize) -> usize {
const LSB: usize = 0x0101_0101_0101_0101u64 as usize;
const LSB: usize = usize::repeat_u8(0x01);
((!w >> 7) | (w >> 6)) & LSB
}

// Morally equivalent to `values.to_ne_bytes().into_iter().sum::<usize>()`, but
// more efficient.
#[inline]
fn sum_bytes_in_usize(values: usize) -> usize {
const LSB_SHORTS: usize = 0x0001_0001_0001_0001_u64 as usize;
const SKIP_BYTES: usize = 0x00ff_00ff_00ff_00ff_u64 as usize;
const LSB_SHORTS: usize = usize::repeat_u16(0x0001);
const SKIP_BYTES: usize = usize::repeat_u16(0x00ff);

let pair_sum: usize = (values & SKIP_BYTES) + ((values >> 8) & SKIP_BYTES);
pair_sum.wrapping_mul(LSB_SHORTS) >> ((USIZE_SIZE - 2) * 8)
Expand Down
3 changes: 1 addition & 2 deletions library/core/src/str/validations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,7 @@ where
Some(ch)
}

// use truncation to fit u64 into usize
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
const NONASCII_MASK: usize = usize::repeat_u8(0x80);

/// Returns `true` if any byte in the word `x` is nonascii (>= 128).
#[inline]
Expand Down

0 comments on commit 93ae6f8

Please sign in to comment.