forked from rust-lang/rust
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Rollup merge of rust-lang#123778 - jhorstmann:optimize-upper-lower-au…
…to-vectorization, r=the8472 Improve autovectorization of to_lowercase / to_uppercase functions Refactor the code in the `convert_while_ascii` helper function to make it more suitable for auto-vectorization and also process the full ascii prefix of the string. The generic case conversion logic will only be invoked starting from the first non-ascii character. The runtime on a microbenchmark with a small ascii-only input decreases from ~55ns to ~18ns per iteration. The new implementation also reduces the amount of unsafe code and encapsulates all unsafe inside the helper function. Fixes rust-lang#123712
- Loading branch information
Showing
4 changed files
with
124 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
46 changes: 46 additions & 0 deletions
46
tests/codegen/issues/issue-123712-str-to-lower-autovectorization.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
//@ compile-flags: -Copt-level=3 | ||
#![crate_type = "lib"] | ||
|
||
/// Ensure that the ascii-prefix loop for `str::to_lowercase` and `str::to_uppercase` uses vector | ||
/// instructions. Since these methods do not get inlined, the relevant code is duplicated here and | ||
/// should be updated when the implementation changes. | ||
// CHECK-LABEL: @lower_while_ascii | ||
// CHECK: [[A:%[0-9]]] = load <16 x i8> | ||
// CHECK-NEXT: [[B:%[0-9]]] = icmp slt <16 x i8> [[A]], zeroinitializer | ||
// CHECK-NEXT: [[C:%[0-9]]] = bitcast <16 x i1> [[B]] to i16 | ||
#[no_mangle] | ||
pub fn lower_while_ascii(mut input: &[u8], mut output: &mut [u8]) -> usize { | ||
// Process the input in chunks to enable auto-vectorization. | ||
const N: usize = 16; | ||
|
||
output = &mut output[..input.len()]; | ||
|
||
let mut ascii_prefix_len = 0_usize; | ||
let mut is_ascii = [false; N]; | ||
|
||
while input.len() >= N { | ||
let chunk = unsafe { input.get_unchecked(..N) }; | ||
let out_chunk = unsafe { output.get_unchecked_mut(..N) }; | ||
|
||
for j in 0..N { | ||
is_ascii[j] = chunk[j] <= 127; | ||
} | ||
|
||
// auto-vectorization for this check is a bit fragile, | ||
// sum and comparing against the chunk size gives the best result, | ||
// specifically a pmovmsk instruction on x86. | ||
if is_ascii.iter().map(|x| *x as u8).sum::<u8>() as usize != N { | ||
break; | ||
} | ||
|
||
for j in 0..N { | ||
out_chunk[j] = chunk[j].to_ascii_lowercase(); | ||
} | ||
|
||
ascii_prefix_len += N; | ||
input = unsafe { input.get_unchecked(N..) }; | ||
output = unsafe { output.get_unchecked_mut(N..) }; | ||
} | ||
|
||
ascii_prefix_len | ||
} |