Skip to content

Commit

Permalink
Move final mask and early return out of control character validation …
Browse files Browse the repository at this point in the history
…loop
  • Loading branch information
dtolnay committed Aug 5, 2024
1 parent 3063d69 commit 146f02c
Showing 1 changed file with 30 additions and 29 deletions.
59 changes: 30 additions & 29 deletions src/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -426,13 +426,12 @@ impl<'a> SliceRead<'a> {
}
}

#[inline(always)]
fn skip_to_escape(&mut self, forbid_control_characters: bool) {
let rest = &self.slice[self.index..];
let end = self.index + memchr::memchr2(b'"', b'\\', rest).unwrap_or(rest.len());
let len_to_validate = memchr::memchr2(b'"', b'\\', rest).unwrap_or(rest.len());

if !forbid_control_characters {
self.index = end;
self.index += len_to_validate;
return;
}

Expand All @@ -443,36 +442,40 @@ impl<'a> SliceRead<'a> {
// benchmarks and is faster than both SSE2 and AVX-based code, and it's cross-platform, so
// probably the right fit.
// [1]: https://groups.google.com/forum/#!original/comp.lang.c/2HtQXvg7iKc/xOJeipH6KLMJ
const STEP: usize = mem::size_of::<usize>();

// Moving this to a local variable removes a spill in the hot loop.
let mut index = self.index;

if self.slice.len() >= STEP {
while index < end.min(self.slice.len() - STEP + 1) {
// We can safely overread past end in most cases. This ensures that SWAR code is
// used to handle the tail in the hot path.
const ONE_BYTES: usize = usize::MAX / 255;
let chars = usize::from_ne_bytes(self.slice[index..][..STEP].try_into().unwrap());
let mask = chars.wrapping_sub(ONE_BYTES * 0x20) & !chars & (ONE_BYTES << 7);

if mask != 0 {
index += mask.trailing_zeros() as usize / 8;
break;
}

index += STEP;
type Chunk = usize;
const STEP: usize = mem::size_of::<Chunk>();
const ONE_BYTES: Chunk = Chunk::MAX / 255;

if self.index + len_to_validate / STEP * STEP + STEP <= self.slice.len() {
let mut ptr = unsafe { self.slice.as_ptr().add(self.index) };
let end_ptr = unsafe { ptr.add(len_to_validate / STEP * STEP) };
let mut last_mask = 0;
let mut combined_mask = 0;
while ptr <= end_ptr {
combined_mask |= last_mask;
let chars = unsafe { ptr.cast::<Chunk>().read_unaligned() };
last_mask = chars.wrapping_sub(ONE_BYTES * 0x20) & !chars;
ptr = unsafe { ptr.add(STEP) };
}
}

if index < end {
if let Some(offset) = self.slice[index..end].iter().position(|&c| c <= 0x1F) {
self.index = index + offset;
if combined_mask & (ONE_BYTES << 7) == 0
&& (last_mask & (ONE_BYTES << 7)).trailing_zeros() as usize / 8
>= len_to_validate % STEP
{
self.index += len_to_validate;
return;
}
}

self.index = end;
self.skip_to_escape_slow();
}

#[cold]
#[inline(never)]
fn skip_to_escape_slow(&mut self) {
while self.index < self.slice.len() && !is_escape(self.slice[self.index]) {
self.index += 1;
}
}

/// The big optimization here over IoRead is that if the string contains no
Expand Down Expand Up @@ -823,8 +826,6 @@ pub trait Fused: private::Sealed {}
impl<'a> Fused for SliceRead<'a> {}
impl<'a> Fused for StrRead<'a> {}

// This is only used in IoRead. SliceRead hardcodes the arguments to memchr.
#[cfg(feature = "std")]
fn is_escape(ch: u8) -> bool {
ch == b'"' || ch == b'\\' || ch < 0x20
}
Expand Down

0 comments on commit 146f02c

Please sign in to comment.