@@ -10,9 +10,12 @@ macro_rules! benches {
10
10
// Ensure we benchmark cases where the functions are called with strings
11
11
// that are not perfectly aligned or have a length which is not a
12
12
// multiple of size_of::<usize>() (or both)
13
- benches!( mod unaligned_head MEDIUM [ 1 ..] $( $name $arg $body) +) ;
14
- benches!( mod unaligned_tail MEDIUM [ ..( MEDIUM . len( ) - 1 ) ] $( $name $arg $body) +) ;
15
- benches!( mod unaligned_both MEDIUM [ 1 ..( MEDIUM . len( ) - 1 ) ] $( $name $arg $body) +) ;
13
+ benches!( mod unaligned_head_medium MEDIUM [ 1 ..] $( $name $arg $body) +) ;
14
+ benches!( mod unaligned_tail_medium MEDIUM [ ..( MEDIUM . len( ) - 1 ) ] $( $name $arg $body) +) ;
15
+ benches!( mod unaligned_both_medium MEDIUM [ 1 ..( MEDIUM . len( ) - 1 ) ] $( $name $arg $body) +) ;
16
+ benches!( mod unaligned_head_long LONG [ 1 ..] $( $name $arg $body) +) ;
17
+ benches!( mod unaligned_tail_long LONG [ ..( LONG . len( ) - 1 ) ] $( $name $arg $body) +) ;
18
+ benches!( mod unaligned_both_long LONG [ 1 ..( LONG . len( ) - 1 ) ] $( $name $arg $body) +) ;
16
19
} ;
17
20
18
21
( mod $mod_name: ident $input: ident [ $range: expr] $( $name: ident $arg: ident $body: block) +) => {
@@ -49,6 +52,42 @@ benches! {
49
52
fn case03_align_to_unrolled( bytes: & [ u8 ] ) {
50
53
is_ascii_align_to_unrolled( bytes)
51
54
}
55
+
56
+ fn case04_while_loop( bytes: & [ u8 ] ) {
57
+ // Constant chosen to enable `pmovmskb` instruction on x86-64
58
+ const N : usize = 32 ;
59
+
60
+ let mut i = 0 ;
61
+
62
+ while i + N <= bytes. len( ) {
63
+ let chunk_end = i + N ;
64
+
65
+ // Get LLVM to produce a `pmovmskb` instruction on x86-64 which
66
+ // creates a mask from the most significant bit of each byte.
67
+ // ASCII bytes are less than 128 (0x80), so their most significant
68
+ // bit is unset. Thus, detecting non-ASCII bytes can be done in one
69
+ // instruction.
70
+ let mut count = 0 ;
71
+ while i < chunk_end {
72
+ count += ( bytes[ i] <= 127 ) as u8 ;
73
+ i += 1 ;
74
+ }
75
+
76
+ // All bytes should be <= 127 so count is equal to chunk size.
77
+ if count != N as u8 {
78
+ return false ;
79
+ }
80
+ }
81
+
82
+ // Process the remaining `bytes.len() % N` bytes.
83
+ let mut is_ascii = true ;
84
+ while i < bytes. len( ) {
85
+ is_ascii &= bytes[ i] <= 127 ;
86
+ i += 1 ;
87
+ }
88
+
89
+ is_ascii
90
+ }
52
91
}
53
92
54
93
// These are separate since it's easier to debug errors if they don't go through
0 commit comments