From 2a52a46684468c1b223504c2de0a7dd1fc57eda7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B6rn=20Horstmann?= <git@jhorstmann.net>
Date: Thu, 9 May 2024 14:45:51 +0200
Subject: [PATCH] Add assumes to avoid bounds checks

Surprisingly the compiler removed these bounds checks automatically when
targeting x86_64, but not on aarch64.
---
 library/alloc/src/str.rs | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/library/alloc/src/str.rs b/library/alloc/src/str.rs
index 506211ae9bc6..2806789c6266 100644
--- a/library/alloc/src/str.rs
+++ b/library/alloc/src/str.rs
@@ -623,9 +623,15 @@ fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) {
     let mut i = 0_usize;
 
     // process the input in chunks to enable auto-vectorization
+    let mut is_ascii = [false; N];
     while slice.len() >= N {
+        // Safety: out_slice was allocated with same lengths as input slice and gets updated with
+        // the same offsets
+        unsafe {
+            core::intrinsics::assume(slice.len() == out_slice.len());
+        }
+
         let chunk = &slice[..N];
-        let mut is_ascii = [false; N];
 
         for j in 0..N {
             is_ascii[j] = chunk[j] <= 127;
@@ -634,7 +640,7 @@ fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) {
         // auto-vectorization for this check is a bit fragile,
         // sum and comparing against the chunk size gives the best result,
         // specifically a pmovmsk instruction on x86.
-        if is_ascii.into_iter().map(|x| x as u8).sum::<u8>() as usize != N {
+        if is_ascii.iter().map(|x| *x as u8).sum::<u8>() as usize != N {
             break;
         }
 
@@ -649,6 +655,12 @@ fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) {
 
     // handle the remainder as individual bytes
     while !slice.is_empty() {
+        // Safety: out_slice was allocated with same lengths as input slice and gets updated with
+        // the same offsets
+        unsafe {
+            core::intrinsics::assume(slice.len() == out_slice.len());
+        }
+
         let byte = slice[0];
         if byte > 127 {
             break;