From 2a52a46684468c1b223504c2de0a7dd1fc57eda7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn=20Horstmann?= Date: Thu, 9 May 2024 14:45:51 +0200 Subject: [PATCH] Add assumes to avoid bounds checks Surprisingly the compiler removed these bounds checks automatically when targeting x86_64, but not on aarch64. --- library/alloc/src/str.rs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/library/alloc/src/str.rs b/library/alloc/src/str.rs index 506211ae9bc6..2806789c6266 100644 --- a/library/alloc/src/str.rs +++ b/library/alloc/src/str.rs @@ -623,9 +623,15 @@ fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) { let mut i = 0_usize; // process the input in chunks to enable auto-vectorization + let mut is_ascii = [false; N]; while slice.len() >= N { + // Safety: out_slice was allocated with same lengths as input slice and gets updated with + // the same offsets + unsafe { + core::intrinsics::assume(slice.len() == out_slice.len()); + } + let chunk = &slice[..N]; - let mut is_ascii = [false; N]; for j in 0..N { is_ascii[j] = chunk[j] <= 127; @@ -634,7 +640,7 @@ fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) { // auto-vectorization for this check is a bit fragile, // sum and comparing against the chunk size gives the best result, // specifically a pmovmsk instruction on x86. - if is_ascii.into_iter().map(|x| x as u8).sum::() as usize != N { + if is_ascii.iter().map(|x| *x as u8).sum::() as usize != N { break; } @@ -649,6 +655,12 @@ fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) { // handle the remainder as individual bytes while !slice.is_empty() { + // Safety: out_slice was allocated with same lengths as input slice and gets updated with + // the same offsets + unsafe { + core::intrinsics::assume(slice.len() == out_slice.len()); + } + let byte = slice[0]; if byte > 127 { break;