Skip to content

Commit 807bcc0

Browse files
okanecogitbot
authored and
gitbot
committed
Add new implementation benchmark
Add LONG benchmarks for more comparison between the methods
1 parent 3f0dc56 commit 807bcc0

File tree

1 file changed

+42
-3
lines changed

1 file changed

+42
-3
lines changed

core/benches/ascii/is_ascii.rs

+42-3
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,12 @@ macro_rules! benches {
1010
// Ensure we benchmark cases where the functions are called with strings
1111
// that are not perfectly aligned or have a length which is not a
1212
// multiple of size_of::<usize>() (or both)
13-
benches!(mod unaligned_head MEDIUM[1..] $($name $arg $body)+);
14-
benches!(mod unaligned_tail MEDIUM[..(MEDIUM.len() - 1)] $($name $arg $body)+);
15-
benches!(mod unaligned_both MEDIUM[1..(MEDIUM.len() - 1)] $($name $arg $body)+);
13+
benches!(mod unaligned_head_medium MEDIUM[1..] $($name $arg $body)+);
14+
benches!(mod unaligned_tail_medium MEDIUM[..(MEDIUM.len() - 1)] $($name $arg $body)+);
15+
benches!(mod unaligned_both_medium MEDIUM[1..(MEDIUM.len() - 1)] $($name $arg $body)+);
16+
benches!(mod unaligned_head_long LONG[1..] $($name $arg $body)+);
17+
benches!(mod unaligned_tail_long LONG[..(LONG.len() - 1)] $($name $arg $body)+);
18+
benches!(mod unaligned_both_long LONG[1..(LONG.len() - 1)] $($name $arg $body)+);
1619
};
1720

1821
(mod $mod_name: ident $input: ident [$range: expr] $($name: ident $arg: ident $body: block)+) => {
@@ -49,6 +52,42 @@ benches! {
4952
fn case03_align_to_unrolled(bytes: &[u8]) {
5053
is_ascii_align_to_unrolled(bytes)
5154
}
55+
56+
fn case04_while_loop(bytes: &[u8]) {
57+
// Constant chosen to enable `pmovmskb` instruction on x86-64
58+
const N: usize = 32;
59+
60+
let mut i = 0;
61+
62+
while i + N <= bytes.len() {
63+
let chunk_end = i + N;
64+
65+
// Get LLVM to produce a `pmovmskb` instruction on x86-64 which
66+
// creates a mask from the most significant bit of each byte.
67+
// ASCII bytes are less than 128 (0x80), so their most significant
68+
// bit is unset. Thus, detecting non-ASCII bytes can be done in one
69+
// instruction.
70+
let mut count = 0;
71+
while i < chunk_end {
72+
count += (bytes[i] <= 127) as u8;
73+
i += 1;
74+
}
75+
76+
// All bytes should be <= 127 so count is equal to chunk size.
77+
if count != N as u8 {
78+
return false;
79+
}
80+
}
81+
82+
// Process the remaining `bytes.len() % N` bytes.
83+
let mut is_ascii = true;
84+
while i < bytes.len() {
85+
is_ascii &= bytes[i] <= 127;
86+
i += 1;
87+
}
88+
89+
is_ascii
90+
}
5291
}
5392

5493
// These are separate since it's easier to debug errors if they don't go through

0 commit comments

Comments
 (0)