@@ -10,9 +10,10 @@ use crate::ops;
10
10
impl [ u8 ] {
11
11
/// Checks if all bytes in this slice are within the ASCII range.
12
12
#[ stable( feature = "ascii_methods_on_intrinsics" , since = "1.23.0" ) ]
13
+ #[ rustc_const_unstable( feature = "const_slice_is_ascii" , issue = "111090" ) ]
13
14
#[ must_use]
14
15
#[ inline]
15
- pub fn is_ascii ( & self ) -> bool {
16
+ pub const fn is_ascii ( & self ) -> bool {
16
17
is_ascii ( self )
17
18
}
18
19
@@ -21,7 +22,7 @@ impl [u8] {
21
22
#[ unstable( feature = "ascii_char" , issue = "110998" ) ]
22
23
#[ must_use]
23
24
#[ inline]
24
- pub fn as_ascii ( & self ) -> Option < & [ ascii:: Char ] > {
25
+ pub const fn as_ascii ( & self ) -> Option < & [ ascii:: Char ] > {
25
26
if self . is_ascii ( ) {
26
27
// SAFETY: Just checked that it's ASCII
27
28
Some ( unsafe { self . as_ascii_unchecked ( ) } )
@@ -262,11 +263,29 @@ impl<'a> fmt::Debug for EscapeAscii<'a> {
262
263
/// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
263
264
/// from `../str/mod.rs`, which does something similar for utf8 validation.
264
265
#[ inline]
265
- fn contains_nonascii ( v : usize ) -> bool {
266
+ const fn contains_nonascii ( v : usize ) -> bool {
266
267
const NONASCII_MASK : usize = usize:: repeat_u8 ( 0x80 ) ;
267
268
( NONASCII_MASK & v) != 0
268
269
}
269
270
271
+ /// ASCII test *without* the chunk-at-a-time optimizations.
272
+ ///
273
+ /// This is carefully structured to produce nice small code -- it's smaller in
274
+ /// `-O` than what the "obvious" ways produces under `-C opt-level=s`. If you
275
+ /// touch it, be sure to run (and update if needed) the assembly test.
276
+ #[ unstable( feature = "str_internals" , issue = "none" ) ]
277
+ #[ doc( hidden) ]
278
+ #[ inline]
279
+ pub const fn is_ascii_simple ( mut bytes : & [ u8 ] ) -> bool {
280
+ while let [ rest @ .., last] = bytes {
281
+ if !last. is_ascii ( ) {
282
+ break ;
283
+ }
284
+ bytes = rest;
285
+ }
286
+ bytes. is_empty ( )
287
+ }
288
+
270
289
/// Optimized ASCII test that will use usize-at-a-time operations instead of
271
290
/// byte-at-a-time operations (when possible).
272
291
///
@@ -280,7 +299,7 @@ fn contains_nonascii(v: usize) -> bool {
280
299
/// If any of these loads produces something for which `contains_nonascii`
281
300
/// (above) returns true, then we know the answer is false.
282
301
#[ inline]
283
- fn is_ascii ( s : & [ u8 ] ) -> bool {
302
+ const fn is_ascii ( s : & [ u8 ] ) -> bool {
284
303
const USIZE_SIZE : usize = mem:: size_of :: < usize > ( ) ;
285
304
286
305
let len = s. len ( ) ;
@@ -292,7 +311,7 @@ fn is_ascii(s: &[u8]) -> bool {
292
311
// We also do this for architectures where `size_of::<usize>()` isn't
293
312
// sufficient alignment for `usize`, because it's a weird edge case.
294
313
if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < mem:: align_of :: < usize > ( ) {
295
- return s . iter ( ) . all ( |b| b . is_ascii ( ) ) ;
314
+ return is_ascii_simple ( s ) ;
296
315
}
297
316
298
317
// We always read the first word unaligned, which means `align_offset` is
@@ -321,18 +340,26 @@ fn is_ascii(s: &[u8]) -> bool {
321
340
// Paranoia check about alignment, since we're about to do a bunch of
322
341
// unaligned loads. In practice this should be impossible barring a bug in
323
342
// `align_offset` though.
324
- debug_assert_eq ! ( word_ptr. addr( ) % mem:: align_of:: <usize >( ) , 0 ) ;
343
+ // While this method is allowed to spuriously fail in CTFE, if it doesn't
344
+ // have alignment information it should have given a `usize::MAX` for
345
+ // `align_offset` earlier, sending things through the scalar path instead of
346
+ // this one, so this check should pass if it's reachable.
347
+ debug_assert ! ( word_ptr. is_aligned_to( mem:: align_of:: <usize >( ) ) ) ;
325
348
326
349
// Read subsequent words until the last aligned word, excluding the last
327
350
// aligned word by itself to be done in tail check later, to ensure that
328
351
// tail is always one `usize` at most to extra branch `byte_pos == len`.
329
352
while byte_pos < len - USIZE_SIZE {
330
- debug_assert ! (
331
- // Sanity check that the read is in bounds
332
- ( word_ptr. addr( ) + USIZE_SIZE ) <= start. addr( ) . wrapping_add( len) &&
333
- // And that our assumptions about `byte_pos` hold.
334
- ( word_ptr. addr( ) - start. addr( ) ) == byte_pos
335
- ) ;
353
+ // Sanity check that the read is in bounds
354
+ debug_assert ! ( byte_pos + USIZE_SIZE <= len) ;
355
+ // And that our assumptions about `byte_pos` hold.
356
+ debug_assert ! ( matches!(
357
+ word_ptr. cast:: <u8 >( ) . guaranteed_eq( start. wrapping_add( byte_pos) ) ,
358
+ // These are from the same allocation, so will hopefully always be
359
+ // known to match even in CTFE, but if it refuses to compare them
360
+ // that's ok since it's just a debug check anyway.
361
+ None | Some ( true ) ,
362
+ ) ) ;
336
363
337
364
// SAFETY: We know `word_ptr` is properly aligned (because of
338
365
// `align_offset`), and we know that we have enough bytes between `word_ptr` and the end
0 commit comments