Skip to content

Commit

Permalink
Initial aarch64 backend implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
DaniPopes committed Aug 28, 2023
1 parent 34710dc commit c931917
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 0 deletions.
50 changes: 50 additions & 0 deletions src/aarch64.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
use crate::generic;
use core::arch::aarch64::*;

const CHUNK_SIZE: usize = core::mem::size_of::<uint8x16_t>();

/// Hex encoding function using aarch64 intrisics.
///
/// # Safety
///
/// `output` must be a valid pointer to at least `2 * input.len()` bytes.
// SAFETY: this is only compiled when the target feature is enabled.
#[target_feature(enable = "neon")]
pub(super) unsafe fn encode<const UPPER: bool>(input: &[u8], output: *mut u8) {
if input.len() < CHUNK_SIZE {
return generic::encode::<UPPER>(input, output);
}

// Load table and construct masks.
let hex_table = vld1q_u8(super::get_chars_table::<UPPER>().as_ptr());
let mask_lo = vdupq_n_u8(0x0F);
let mask_hi = vdupq_n_u8(0xF0);

let input_chunks = input.chunks_exact(CHUNK_SIZE);
let input_remainder = input_chunks.remainder();

let mut i = 0;
for input_chunk in input_chunks {
// Load input bytes and mask to nibbles.
let input_bytes = vld1q_u8(input_chunk.as_ptr() as *const u8);
let mut lo = vandq_u8(input_bytes, mask_lo);
let mut hi = vshrq_n_u8(vandq_u8(input_bytes, mask_hi), 4);

// Lookup the corresponding ASCII hex digit for each nibble.
lo = vqtbl1q_u8(hex_table, lo);
hi = vqtbl1q_u8(hex_table, hi);

// Interleave the nibbles ([hi[0], lo[0], hi[1], lo[1], ...]).
let result = vzipq_u8(lo, hi);

// Store result into the output buffer.
vst2q_u8(output.add(i), result);
i += CHUNK_SIZE * 2;
}

if !input_remainder.is_empty() {
generic::encode::<UPPER>(input_remainder, output.add(i));
}
}

pub(super) use generic::decode;
3 changes: 3 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ cfg_if! {
} else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
mod x86;
use x86 as imp;
} else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] {
mod aarch64;
use aarch64 as imp;
} else {
use generic as imp;
}
Expand Down

0 comments on commit c931917

Please sign in to comment.