Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: check with unaligned SIMD chunks #12

Merged
merged 1 commit into from
Sep 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
210 changes: 105 additions & 105 deletions README.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion benches/bench/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ macro_rules! benches {
#[bench]
fn $name(b: &mut Bencher) {
b.iter(|| {
::const_hex::check(black_box($dec))
::const_hex::check_raw(black_box($dec))
});
}
)*
Expand Down
39 changes: 18 additions & 21 deletions src/arch/aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ pub(crate) unsafe fn encode_neon<const UPPER: bool>(input: &[u8], output: *mut u

#[inline]
pub(crate) fn check(input: &[u8]) -> bool {
if cfg!(miri) || !has_neon() || input.len() < CHUNK_SIZE {
if cfg!(miri) || !has_neon() {
return generic::check(input);
}
unsafe { check_neon(input) }
Expand All @@ -80,26 +80,23 @@ pub(crate) unsafe fn check_neon(input: &[u8]) -> bool {
let ascii_la = vdupq_n_u8(b'a' - 1);
let ascii_lf = vdupq_n_u8(b'f' + 1);

let (prefix, chunks, suffix) = input.align_to::<uint8x16_t>();
generic::check(prefix)
&& chunks.iter().all(|&chunk| {
let ge0 = vcgtq_u8(chunk, ascii_zero);
let le9 = vcltq_u8(chunk, ascii_nine);
let valid_digit = vandq_u8(ge0, le9);

let geua = vcgtq_u8(chunk, ascii_ua);
let leuf = vcltq_u8(chunk, ascii_uf);
let valid_upper = vandq_u8(geua, leuf);

let gela = vcgtq_u8(chunk, ascii_la);
let lelf = vcltq_u8(chunk, ascii_lf);
let valid_lower = vandq_u8(gela, lelf);

let valid_letter = vorrq_u8(valid_lower, valid_upper);
let valid_mask = vorrq_u8(valid_digit, valid_letter);
vminvq_u8(valid_mask) == 0xFF
})
&& generic::check(suffix)
generic::check_unaligned_chunks(input, |chunk| {
let ge0 = vcgtq_u8(chunk, ascii_zero);
let le9 = vcltq_u8(chunk, ascii_nine);
let valid_digit = vandq_u8(ge0, le9);

let geua = vcgtq_u8(chunk, ascii_ua);
let leuf = vcltq_u8(chunk, ascii_uf);
let valid_upper = vandq_u8(geua, leuf);

let gela = vcgtq_u8(chunk, ascii_la);
let lelf = vcltq_u8(chunk, ascii_lf);
let valid_lower = vandq_u8(gela, lelf);

let valid_letter = vorrq_u8(valid_lower, valid_upper);
let valid_mask = vorrq_u8(valid_digit, valid_letter);
vminvq_u8(valid_mask) == 0xFF
})
}

pub(crate) use generic::decode_checked;
Expand Down
13 changes: 13 additions & 0 deletions src/arch/generic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,19 @@ pub(crate) const fn check(mut input: &[u8]) -> bool {
true
}

/// Runs the given check function on unaligned chunks of `T` in `input`, with the remainder passed
/// to the generic [`check`].
#[inline]
#[allow(dead_code)]
pub(crate) fn check_unaligned_chunks<T: Copy>(
input: &[u8],
mut check_chunk: impl FnMut(T) -> bool,
) -> bool {
let mut chunks = input.chunks_exact(core::mem::size_of::<T>());
chunks.all(|chunk| check_chunk(unsafe { chunk.as_ptr().cast::<T>().read_unaligned() }))
&& check(chunks.remainder())
}

/// Default checked decoding function.
///
/// # Safety
Expand Down
17 changes: 7 additions & 10 deletions src/arch/portable_simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,13 @@ pub(crate) unsafe fn encode<const UPPER: bool>(input: &[u8], output: *mut u8) {
}

pub(crate) fn check(input: &[u8]) -> bool {
let (prefix, chunks, suffix) = input.as_simd::<CHUNK_SIZE>();
generic::check(prefix)
&& chunks.iter().all(|&chunk| {
let valid_digit = chunk.simd_ge(Simd::splat(b'0')) & chunk.simd_le(Simd::splat(b'9'));
let valid_upper = chunk.simd_ge(Simd::splat(b'A')) & chunk.simd_le(Simd::splat(b'F'));
let valid_lower = chunk.simd_ge(Simd::splat(b'a')) & chunk.simd_le(Simd::splat(b'f'));
let valid = valid_digit | valid_upper | valid_lower;
valid.all()
})
&& generic::check(suffix)
generic::check_unaligned_chunks::<Simd>(input, |chunk| {
let valid_digit = chunk.simd_ge(Simd::splat(b'0')) & chunk.simd_le(Simd::splat(b'9'));
let valid_upper = chunk.simd_ge(Simd::splat(b'A')) & chunk.simd_le(Simd::splat(b'F'));
let valid_lower = chunk.simd_ge(Simd::splat(b'a')) & chunk.simd_le(Simd::splat(b'f'));
let valid = valid_digit | valid_upper | valid_lower;
valid.all()
})
}

pub(crate) use generic::decode_checked;
Expand Down
39 changes: 18 additions & 21 deletions src/arch/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ unsafe fn encode_ssse3<const UPPER: bool>(input: &[u8], output: *mut u8) {

#[inline]
pub(crate) fn check(input: &[u8]) -> bool {
if !has_sse2() || input.len() < CHUNK_SIZE_SSE {
if !has_sse2() {
return generic::check(input);
}
unsafe { check_sse2(input) }
Expand All @@ -99,26 +99,23 @@ unsafe fn check_sse2(input: &[u8]) -> bool {
let ascii_la = _mm_set1_epi8((b'a' - 1) as i8);
let ascii_lf = _mm_set1_epi8((b'f' + 1) as i8);

let (prefix, chunks, suffix) = input.align_to::<__m128i>();
generic::check(prefix)
&& chunks.iter().all(|&chunk| {
let ge0 = _mm_cmpgt_epi8(chunk, ascii_zero);
let le9 = _mm_cmplt_epi8(chunk, ascii_nine);
let valid_digit = _mm_and_si128(ge0, le9);

let geua = _mm_cmpgt_epi8(chunk, ascii_ua);
let leuf = _mm_cmplt_epi8(chunk, ascii_uf);
let valid_upper = _mm_and_si128(geua, leuf);

let gela = _mm_cmpgt_epi8(chunk, ascii_la);
let lelf = _mm_cmplt_epi8(chunk, ascii_lf);
let valid_lower = _mm_and_si128(gela, lelf);

let valid_letter = _mm_or_si128(valid_lower, valid_upper);
let valid_mask = _mm_movemask_epi8(_mm_or_si128(valid_digit, valid_letter));
valid_mask == 0xffff
})
&& generic::check(suffix)
generic::check_unaligned_chunks(input, |chunk| {
let ge0 = _mm_cmpgt_epi8(chunk, ascii_zero);
let le9 = _mm_cmplt_epi8(chunk, ascii_nine);
let valid_digit = _mm_and_si128(ge0, le9);

let geua = _mm_cmpgt_epi8(chunk, ascii_ua);
let leuf = _mm_cmplt_epi8(chunk, ascii_uf);
let valid_upper = _mm_and_si128(geua, leuf);

let gela = _mm_cmpgt_epi8(chunk, ascii_la);
let lelf = _mm_cmplt_epi8(chunk, ascii_lf);
let valid_lower = _mm_and_si128(gela, lelf);

let valid_letter = _mm_or_si128(valid_lower, valid_upper);
let valid_mask = _mm_movemask_epi8(_mm_or_si128(valid_digit, valid_letter));
valid_mask == 0xffff
})
}

#[inline]
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -666,7 +666,7 @@ const unsafe fn invalid_hex_error(input: &[u8]) -> FromHexError {
if cfg!(debug_assertions) {
panic!("input was valid but `check` failed")
} else {
core::hint::unreachable_unchecked()
unsafe { core::hint::unreachable_unchecked() }
}
}
};
Expand Down