Skip to content

Commit 8fe73e8

Browse files
committed
Auto merge of #76971 - bugadani:issue-75659, r=Amanieu
Refactor memchr to allow optimization Closes #75659 The implementation already uses naive search if the slice if short enough, but the case is complicated enough to not be optimized away. This PR refactors memchr so that it exists early when the slice is short enough. Codegen-wise, as shown in #75659, memchr was not inlined previously so the only way I could find to test this is to check if there is no memchr call. Let me know if there is a more robust solution here.
2 parents 2ad6187 + de623bf commit 8fe73e8

File tree

4 files changed

+90
-16
lines changed

4 files changed

+90
-16
lines changed

library/core/src/slice/cmp.rs

+2
Original file line numberDiff line numberDiff line change
@@ -268,12 +268,14 @@ where
268268
}
269269

270270
impl SliceContains for u8 {
271+
#[inline]
271272
fn slice_contains(&self, x: &[Self]) -> bool {
272273
memchr::memchr(*self, x).is_some()
273274
}
274275
}
275276

276277
impl SliceContains for i8 {
278+
#[inline]
277279
fn slice_contains(&self, x: &[Self]) -> bool {
278280
let byte = *self as u8;
279281
// SAFETY: `i8` and `u8` have the same memory layout, thus casting `x.as_ptr()`

library/core/src/slice/memchr.rs

+24-16
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ const HI_U64: u64 = 0x8080808080808080;
1212
// Use truncation.
1313
const LO_USIZE: usize = LO_U64 as usize;
1414
const HI_USIZE: usize = HI_U64 as usize;
15+
const USIZE_BYTES: usize = mem::size_of::<usize>();
1516

1617
/// Returns `true` if `x` contains any zero byte.
1718
///
@@ -38,19 +39,29 @@ fn repeat_byte(b: u8) -> usize {
3839
}
3940

4041
/// Returns the first index matching the byte `x` in `text`.
42+
#[inline]
4143
pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
44+
// Fast path for small slices
45+
if text.len() < 2 * USIZE_BYTES {
46+
return text.iter().position(|elt| *elt == x);
47+
}
48+
49+
memchr_general_case(x, text)
50+
}
51+
52+
fn memchr_general_case(x: u8, text: &[u8]) -> Option<usize> {
4253
// Scan for a single byte value by reading two `usize` words at a time.
4354
//
4455
// Split `text` in three parts
4556
// - unaligned initial part, before the first word aligned address in text
4657
// - body, scan by 2 words at a time
4758
// - the last remaining part, < 2 word size
59+
60+
// search up to an aligned boundary
4861
let len = text.len();
4962
let ptr = text.as_ptr();
50-
let usize_bytes = mem::size_of::<usize>();
63+
let mut offset = ptr.align_offset(USIZE_BYTES);
5164

52-
// search up to an aligned boundary
53-
let mut offset = ptr.align_offset(usize_bytes);
5465
if offset > 0 {
5566
offset = cmp::min(offset, len);
5667
if let Some(index) = text[..offset].iter().position(|elt| *elt == x) {
@@ -60,22 +71,19 @@ pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
6071

6172
// search the body of the text
6273
let repeated_x = repeat_byte(x);
74+
while offset <= len - 2 * USIZE_BYTES {
75+
unsafe {
76+
let u = *(ptr.add(offset) as *const usize);
77+
let v = *(ptr.add(offset + USIZE_BYTES) as *const usize);
6378

64-
if len >= 2 * usize_bytes {
65-
while offset <= len - 2 * usize_bytes {
66-
unsafe {
67-
let u = *(ptr.add(offset) as *const usize);
68-
let v = *(ptr.add(offset + usize_bytes) as *const usize);
69-
70-
// break if there is a matching byte
71-
let zu = contains_zero_byte(u ^ repeated_x);
72-
let zv = contains_zero_byte(v ^ repeated_x);
73-
if zu || zv {
74-
break;
75-
}
79+
// break if there is a matching byte
80+
let zu = contains_zero_byte(u ^ repeated_x);
81+
let zv = contains_zero_byte(v ^ repeated_x);
82+
if zu || zv {
83+
break;
7684
}
77-
offset += usize_bytes * 2;
7885
}
86+
offset += USIZE_BYTES * 2;
7987
}
8088

8189
// Find the byte after the point the body loop stopped.

library/core/src/slice/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1636,6 +1636,7 @@ impl<T> [T] {
16361636
/// assert!(!v.iter().any(|e| e == "hi"));
16371637
/// ```
16381638
#[stable(feature = "rust1", since = "1.0.0")]
1639+
#[inline]
16391640
pub fn contains(&self, x: &T) -> bool
16401641
where
16411642
T: PartialEq,

src/test/codegen/issue-75659.rs

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// This test checks that the call to memchr/slice_contains is optimized away
2+
// when searching in small slices.
3+
4+
// compile-flags: -O
5+
// only-x86_64
6+
7+
#![crate_type = "lib"]
8+
9+
// CHECK-LABEL: @foo1
10+
#[no_mangle]
11+
pub fn foo1(x: u8, data: &[u8; 1]) -> bool {
12+
// CHECK-NOT: memchr
13+
// CHECK-NOT: slice_contains
14+
data.contains(&x)
15+
}
16+
17+
// CHECK-LABEL: @foo2
18+
#[no_mangle]
19+
pub fn foo2(x: u8, data: &[u8; 2]) -> bool {
20+
// CHECK-NOT: memchr
21+
// CHECK-NOT: slice_contains
22+
data.contains(&x)
23+
}
24+
25+
// CHECK-LABEL: @foo3
26+
#[no_mangle]
27+
pub fn foo3(x: u8, data: &[u8; 3]) -> bool {
28+
// CHECK-NOT: memchr
29+
// CHECK-NOT: slice_contains
30+
data.contains(&x)
31+
}
32+
33+
// CHECK-LABEL: @foo4
34+
#[no_mangle]
35+
pub fn foo4(x: u8, data: &[u8; 4]) -> bool {
36+
// CHECK-NOT: memchr
37+
// CHECK-NOT: slice_contains
38+
data.contains(&x)
39+
}
40+
41+
// CHECK-LABEL: @foo8
42+
#[no_mangle]
43+
pub fn foo8(x: u8, data: &[u8; 8]) -> bool {
44+
// CHECK-NOT: memchr
45+
// CHECK-NOT: slice_contains
46+
data.contains(&x)
47+
}
48+
49+
// CHECK-LABEL: @foo8_i8
50+
#[no_mangle]
51+
pub fn foo8_i8(x: i8, data: &[i8; 8]) -> bool {
52+
// CHECK-NOT: memchr
53+
// CHECK-NOT: slice_contains
54+
!data.contains(&x)
55+
}
56+
57+
// Check that the general case isn't inlined
58+
// CHECK-LABEL: @foo80
59+
#[no_mangle]
60+
pub fn foo80(x: u8, data: &[u8; 80]) -> bool {
61+
// CHECK: call core::slice::memchr
62+
data.contains(&x)
63+
}

0 commit comments

Comments
 (0)