Skip to content

Commit

Permalink
feat: add u8x16::unpack_low/high (#147)
Browse files Browse the repository at this point in the history
This commit adds u8x16::unpack_low and u8x16::unpack_high which is basically SSE2's unpack_low_i8_m128i and unpack_high_i8_m128i, but also allows SIMD128 and NEON backends.
  • Loading branch information
RRRadicalEdward authored Jan 19, 2024
1 parent 86ff930 commit 6ad5b3d
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 0 deletions.
56 changes: 56 additions & 0 deletions src/u8x16_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,62 @@ impl u8x16 {
}
}

/// Unpack and interleave low lanes of two u8x16
pub fn unpack_low(lhs: u8x16, rhs: u8x16) -> u8x16 {
pick! {
if #[cfg(target_feature = "sse2")] {
u8x16 { sse: unpack_low_i8_m128i(lhs.sse, rhs.sse) }
} else if #[cfg(target_feature = "simd128")] {
u8x16 { simd: u8x16_shuffle::<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23>(lhs.simd, rhs.simd) }
} else if #[cfg(all(target_feature = "neon", target_arch = "aarch64"))] {
let lhs = unsafe { vget_low_u8(lhs.neon) };
let rhs = unsafe { vget_low_u8(rhs.neon) };

let zipped = unsafe { vzip_u8(lhs, rhs) };
u8x16 { neon: unsafe { vcombine_u8(zipped.0, zipped.1) } }
} else {
u8x16::new([
lhs.as_array_ref()[0], rhs.as_array_ref()[0],
lhs.as_array_ref()[1], rhs.as_array_ref()[1],
lhs.as_array_ref()[2], rhs.as_array_ref()[2],
lhs.as_array_ref()[3], rhs.as_array_ref()[3],
lhs.as_array_ref()[4], rhs.as_array_ref()[4],
lhs.as_array_ref()[5], rhs.as_array_ref()[5],
lhs.as_array_ref()[6], rhs.as_array_ref()[6],
lhs.as_array_ref()[7], rhs.as_array_ref()[7],
])
}
}
}

/// Unpack and interleave high lanes of two u8x16
pub fn unpack_high(lhs: u8x16, rhs: u8x16) -> u8x16 {
pick! {
if #[cfg(target_feature = "sse2")] {
u8x16 { sse: unpack_high_i8_m128i(lhs.sse, rhs.sse) }
} else if #[cfg(target_feature = "simd128")] {
u8x16 { simd: u8x16_shuffle::<8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31>(lhs.simd, rhs.simd) }
} else if #[cfg(all(target_feature = "neon", target_arch = "aarch64"))] {
let lhs = unsafe { vget_high_u8(lhs.neon) };
let rhs = unsafe { vget_high_u8(rhs.neon) };

let zipped = unsafe { vzip_u8(lhs, rhs) };
u8x16 { neon: unsafe { vcombine_u8(zipped.0, zipped.1) } }
} else {
u8x16::new([
lhs.as_array_ref()[8], rhs.as_array_ref()[8],
lhs.as_array_ref()[9], rhs.as_array_ref()[9],
lhs.as_array_ref()[10], rhs.as_array_ref()[10],
lhs.as_array_ref()[11], rhs.as_array_ref()[11],
lhs.as_array_ref()[12], rhs.as_array_ref()[12],
lhs.as_array_ref()[13], rhs.as_array_ref()[13],
lhs.as_array_ref()[14], rhs.as_array_ref()[14],
lhs.as_array_ref()[15], rhs.as_array_ref()[15],
])
}
}
}

#[inline]
pub fn to_array(self) -> [u8; 16] {
cast(self)
Expand Down
16 changes: 16 additions & 0 deletions tests/all_tests/t_u8x16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,19 @@ fn impl_u8x16_min() {
let actual = a.min(b);
assert_eq!(expected, actual);
}

#[test]
fn impl_unpack_low_u8() {
let a = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let b = u8x16::from([12, 11, 22, 13, 99, 15, 16, 17, 8, 19, 2, 21, 22, 3, 24, 127]);
let c: [u8; 16] = u8x16::unpack_low(a, b).into();
assert_eq!(c, [0, 12, 1, 11, 2, 22, 3, 13, 4, 99, 5, 15, 6, 16, 7, 17]);
}

#[test]
fn impl_unpack_high_u8() {
let a = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let b = u8x16::from([12, 11, 22, 13, 99, 15, 16, 17, 8, 19, 2, 21, 22, 3, 24, 127]);
let c: [u8; 16] = u8x16::unpack_high(a, b).into();
assert_eq!(c, [8, 8, 9, 19, 10, 2, 11, 21, 12, 22, 13, 3, 14, 24, 15, 127]);
}

0 comments on commit 6ad5b3d

Please sign in to comment.