diff --git a/src/aarch64.rs b/src/aarch64.rs
new file mode 100644
index 0000000..13abef9
--- /dev/null
+++ b/src/aarch64.rs
@@ -0,0 +1,50 @@
+use crate::default;
+use core::arch::aarch64::*;
+
+const CHUNK_SIZE: usize = core::mem::size_of::<uint8x16_t>();
+
+/// Hex encoding function using aarch64 intrisics.
+///
+/// # Safety
+///
+/// `output` must be a valid pointer to at least `2 * input.len()` bytes.
+// SAFETY: this is only compiled when the target feature is enabled.
+#[target_feature(enable = "neon")]
+pub(super) unsafe fn encode<const UPPER: bool>(input: &[u8], output: *mut u8) {
+    if input.len() < CHUNK_SIZE {
+        return default::encode::<UPPER>(input, output);
+    }
+
+    // Load table and construct masks.
+    let hex_table = vld1q_u8(super::get_chars_table::<UPPER>().as_ptr());
+    let mask_lo = vdupq_n_u8(0x0F);
+    let mask_hi = vdupq_n_u8(0xF0);
+
+    let input_chunks = input.chunks_exact(CHUNK_SIZE);
+    let input_remainder = input_chunks.remainder();
+
+    let mut i = 0;
+    for input_chunk in input_chunks {
+        // Load input bytes and mask to nibbles.
+        let input_bytes = vld1q_u8(input_chunk.as_ptr() as *const u8);
+        let mut lo = vandq_u8(input_bytes, mask_lo);
+        let mut hi = vshrq_n_u8(vandq_u8(input_bytes, mask_hi), 4);
+
+        // Lookup the corresponding ASCII hex digit for each nibble.
+        lo = vqtbl1q_u8(hex_table, lo);
+        hi = vqtbl1q_u8(hex_table, hi);
+
+        // Interleave the nibbles ([hi[0], lo[0], hi[1], lo[1], ...]).
+        let result = vzipq_u8(hi, lo);
+
+        // Store result into the output buffer.
+        vst2q_u8(output.add(i), result);
+        i += CHUNK_SIZE * 2;
+    }
+
+    if !input_remainder.is_empty() {
+        default::encode::<UPPER>(input_remainder, output.add(i));
+    }
+}
+
+pub(super) use default::decode;
diff --git a/src/lib.rs b/src/lib.rs
index aacd8a6..5fa34d9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -46,6 +46,9 @@ cfg_if! {
     } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
         mod x86;
         use x86 as imp;
+    } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] {
+        mod aarch64;
+        use aarch64 as imp;
     } else {
         use generic as imp;
     }