Skip to content

Commit 4ed7627

Browse files
authored
Rollup merge of #96081 - eduardosm:masks_usize_size_agnostic, r=yaahc
Make some `usize`-typed masks definitions agnostic to the size of `usize` Some masks where defined as ```rust const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize; ``` where it was assumed that `usize` is never wider than 64, which is currently true. To make those constants valid in a hypothetical 128-bit target, these constants have been redefined in an `usize`-width-agnostic way ```rust const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; size_of::<usize>()]); ``` There are already some cases where Rust anticipates the possibility of supporting 128-bit targets, such as not implementing `From<usize>` for `u64`.
2 parents 1bce78a + 93ae6f8 commit 4ed7627

File tree

6 files changed

+29
-13
lines changed

6 files changed

+29
-13
lines changed

library/core/benches/ascii/is_ascii.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,6 @@ fn is_ascii_align_to_unrolled(bytes: &[u8]) -> bool {
7777

7878
#[inline]
7979
fn contains_nonascii(v: usize) -> bool {
80-
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
80+
const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; core::mem::size_of::<usize>()]);
8181
(NONASCII_MASK & v) != 0
8282
}

library/core/src/num/mod.rs

+21
Original file line numberDiff line numberDiff line change
@@ -890,6 +890,27 @@ impl usize {
890890
widening_impl! { usize, u128, 64, unsigned }
891891
}
892892

893+
impl usize {
894+
/// Returns an `usize` where every byte is equal to `x`.
895+
#[inline]
896+
pub(crate) const fn repeat_u8(x: u8) -> usize {
897+
usize::from_ne_bytes([x; mem::size_of::<usize>()])
898+
}
899+
900+
/// Returns an `usize` where every byte pair is equal to `x`.
901+
#[inline]
902+
pub(crate) const fn repeat_u16(x: u16) -> usize {
903+
let mut r = 0usize;
904+
let mut i = 0;
905+
while i < mem::size_of::<usize>() {
906+
// Use `wrapping_shl` to make it work on targets with 16-bit `usize`
907+
r = r.wrapping_shl(16) | (x as usize);
908+
i += 2;
909+
}
910+
r
911+
}
912+
}
913+
893914
/// A classification of floating point numbers.
894915
///
895916
/// This `enum` is used as the return type for [`f32::classify`] and [`f64::classify`]. See

library/core/src/slice/ascii.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ impl<'a> fmt::Debug for EscapeAscii<'a> {
235235
/// from `../str/mod.rs`, which does something similar for utf8 validation.
236236
#[inline]
237237
fn contains_nonascii(v: usize) -> bool {
238-
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
238+
const NONASCII_MASK: usize = usize::repeat_u8(0x80);
239239
(NONASCII_MASK & v) != 0
240240
}
241241

library/core/src/slice/memchr.rs

+2-6
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,8 @@
44
use crate::cmp;
55
use crate::mem;
66

7-
const LO_U64: u64 = 0x0101010101010101;
8-
const HI_U64: u64 = 0x8080808080808080;
9-
10-
// Use truncation.
11-
const LO_USIZE: usize = LO_U64 as usize;
12-
const HI_USIZE: usize = HI_U64 as usize;
7+
const LO_USIZE: usize = usize::repeat_u8(0x01);
8+
const HI_USIZE: usize = usize::repeat_u8(0x80);
139
const USIZE_BYTES: usize = mem::size_of::<usize>();
1410

1511
/// Returns `true` if `x` contains any zero byte.

library/core/src/str/count.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -112,16 +112,16 @@ fn do_count_chars(s: &str) -> usize {
112112
// true)
113113
#[inline]
114114
fn contains_non_continuation_byte(w: usize) -> usize {
115-
const LSB: usize = 0x0101_0101_0101_0101u64 as usize;
115+
const LSB: usize = usize::repeat_u8(0x01);
116116
((!w >> 7) | (w >> 6)) & LSB
117117
}
118118

119119
// Morally equivalent to `values.to_ne_bytes().into_iter().sum::<usize>()`, but
120120
// more efficient.
121121
#[inline]
122122
fn sum_bytes_in_usize(values: usize) -> usize {
123-
const LSB_SHORTS: usize = 0x0001_0001_0001_0001_u64 as usize;
124-
const SKIP_BYTES: usize = 0x00ff_00ff_00ff_00ff_u64 as usize;
123+
const LSB_SHORTS: usize = usize::repeat_u16(0x0001);
124+
const SKIP_BYTES: usize = usize::repeat_u16(0x00ff);
125125

126126
let pair_sum: usize = (values & SKIP_BYTES) + ((values >> 8) & SKIP_BYTES);
127127
pair_sum.wrapping_mul(LSB_SHORTS) >> ((USIZE_SIZE - 2) * 8)

library/core/src/str/validations.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,7 @@ where
112112
Some(ch)
113113
}
114114

115-
// use truncation to fit u64 into usize
116-
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
115+
const NONASCII_MASK: usize = usize::repeat_u8(0x80);
117116

118117
/// Returns `true` if any byte in the word `x` is nonascii (>= 128).
119118
#[inline]

0 commit comments

Comments
 (0)