diff --git a/Cargo.toml b/Cargo.toml index a6d1af5..7bd9ef6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,5 +41,10 @@ serde = ["hex?/serde", "dep:serde"] # This should not be needed most of the time. hex = ["dep:hex"] +# Support for the `portable-simd` nightly feature. +# Note that `-Zbuild-std` may be necessary to unlock better performance than +# the specialized implementations. +portable-simd = [] + # Nightly features for better performance. nightly = [] diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index cb6a3bf..3ba6f88 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -18,4 +18,5 @@ path = "fuzz_targets/fuzz_const_hex.rs" test = false doc = false -[workspace] +[features] +portable-simd = ["const-hex/portable-simd"] diff --git a/src/lib.rs b/src/lib.rs index 99c842d..38ab927 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,6 +17,7 @@ #![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] #![cfg_attr(feature = "nightly", feature(core_intrinsics, inline_const))] +#![cfg_attr(feature = "portable-simd", feature(portable_simd))] #![allow( clippy::cast_lossless, clippy::inline_always, @@ -39,7 +40,10 @@ use alloc::{string::String, vec::Vec}; // The main encoding and decoding functions. cfg_if! { - if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + if #[cfg(feature = "portable-simd")] { + mod portable_simd; + use portable_simd as imp; + } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { mod x86; use x86 as imp; } else { @@ -134,6 +138,10 @@ pub const HEX_CHARS_UPPER: &[u8; 16] = b"0123456789ABCDEF"; /// [`u8::MAX`] is used for invalid values. pub const HEX_DECODE_LUT: &[u8; 256] = &make_decode_lut(); +pub unsafe fn encode2(input: &[u8], output: *mut u8) { + imp::encode::(input, output) +} + /// A correctly sized stack allocation for the formatted bytes to be written /// into. /// diff --git a/src/portable_simd.rs b/src/portable_simd.rs new file mode 100644 index 0000000..1b90488 --- /dev/null +++ b/src/portable_simd.rs @@ -0,0 +1,48 @@ +use crate::default; +use core::simd::{u8x16, Swizzle}; +use core::slice; + +const CHUNK_SIZE: usize = core::mem::size_of::(); + +/// Hex encoding function using [`std::simd`][core::simd]. +/// +/// # Safety +/// +/// `output` must be a valid pointer to at least `2 * input.len()` bytes. +pub(super) unsafe fn encode(input: &[u8], output: *mut u8) { + let mut i = 0; + let (prefix, chunks, suffix) = input.as_simd::(); + + default::encode::(prefix, output); + i += prefix.len(); + + let hex_table = u8x16::from_array(*crate::get_chars_table::()); + for &chunk in chunks { + // Load input bytes and mask to nibbles. + let mut lo = chunk & u8x16::splat(15); + let mut hi = chunk >> u8x16::splat(4); + + // Lookup the corresponding ASCII hex digit for each nibble. + lo = hex_table.swizzle_dyn(lo); + hi = hex_table.swizzle_dyn(hi); + + // Interleave the nibbles ([hi[0], lo[0], hi[1], lo[1], ...]). + let (hex_lo, hex_hi) = u8x16::interleave(hi, lo); + + // Store result into the output buffer. + hex_lo.copy_to_slice(slice::from_raw_parts_mut(output.add(i), CHUNK_SIZE)); + i += CHUNK_SIZE; + hex_hi.copy_to_slice(slice::from_raw_parts_mut(output.add(i), CHUNK_SIZE)); + i += CHUNK_SIZE; + } + + default::encode::(suffix, output.add(i)); +} + +pub(super) use default::decode; + +struct HexCharsTable; + +impl Swizzle<16, 16> for HexCharsTable { + const INDEX: [usize; 16] = [0; 16]; +}