Skip to content

Commit

Permalink
feat: add u8x16::narrow_i16x8 (#148)
Browse files Browse the repository at this point in the history
* feat: add u8x16::narrow_i16x8

* style: run cargo fmt and fix clippy warnings
  • Loading branch information
RRRadicalEdward authored Jan 29, 2024
1 parent a048507 commit bd6850c
Show file tree
Hide file tree
Showing 17 changed files with 88 additions and 20 deletions.
2 changes: 1 addition & 1 deletion src/f32x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1575,7 +1575,7 @@ impl f32x4 {
}

#[inline]
pub fn as_array_mut(&mut self) -> &mut[f32; 4] {
pub fn as_array_mut(&mut self) -> &mut [f32; 4] {
cast_mut(self)
}
}
2 changes: 1 addition & 1 deletion src/f64x2_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1605,7 +1605,7 @@ impl f64x2 {
}

#[inline]
pub fn as_array_mut(&mut self) -> &mut[f64; 2] {
pub fn as_array_mut(&mut self) -> &mut [f64; 2] {
cast_mut(self)
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/f64x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1470,7 +1470,7 @@ impl f64x4 {
}

#[inline]
pub fn as_array_mut(&mut self) -> &mut[f64; 4] {
pub fn as_array_mut(&mut self) -> &mut [f64; 4] {
cast_mut(self)
}
}
Expand Down
8 changes: 6 additions & 2 deletions src/i16x16_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,11 @@ impl i16x16 {
}

/// Calculates partial dot product.
/// Multiplies packed signed 16-bit integers, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers.
/// Multiplies packed signed 16-bit integers, producing intermediate signed
/// 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit
/// integers.
#[inline]
#[must_use]
pub fn dot(self, rhs: Self) -> i32x8 {
pick! {
if #[cfg(target_feature="avx2")] {
Expand Down Expand Up @@ -555,7 +559,7 @@ impl i16x16 {
}

#[inline]
pub fn as_array_mut(&mut self) -> &mut[i16; 16] {
pub fn as_array_mut(&mut self) -> &mut [i16; 16] {
cast_mut(self)
}
}
8 changes: 6 additions & 2 deletions src/i16x8_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,11 @@ impl i16x8 {
}

/// Calculates partial dot product.
/// Multiplies packed signed 16-bit integers, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers.
/// Multiplies packed signed 16-bit integers, producing intermediate signed
/// 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit
/// integers.
#[inline]
#[must_use]
pub fn dot(self, rhs: Self) -> i32x4 {
pick! {
if #[cfg(target_feature="sse2")] {
Expand Down Expand Up @@ -1040,7 +1044,7 @@ impl i16x8 {
}

#[inline]
pub fn as_array_mut(&mut self) -> &mut[i16; 8] {
pub fn as_array_mut(&mut self) -> &mut [i16; 8] {
cast_mut(self)
}
}
2 changes: 1 addition & 1 deletion src/i32x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,7 @@ impl i32x4 {
}

#[inline]
pub fn as_array_mut(&mut self) -> &mut[i32; 4] {
pub fn as_array_mut(&mut self) -> &mut [i32; 4] {
cast_mut(self)
}
}
2 changes: 1 addition & 1 deletion src/i32x8_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ impl i32x8 {
}

#[inline]
pub fn as_array_mut(&mut self) -> &mut[i32; 8] {
pub fn as_array_mut(&mut self) -> &mut [i32; 8] {
cast_mut(self)
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/i64x2_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ impl i64x2 {
}

#[inline]
pub fn as_array_mut(&mut self) -> &mut[i64; 2] {
pub fn as_array_mut(&mut self) -> &mut [i64; 2] {
cast_mut(self)
}
}
2 changes: 1 addition & 1 deletion src/i64x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ impl i64x4 {
}

#[inline]
pub fn as_array_mut(&mut self) -> &mut[i64; 4] {
pub fn as_array_mut(&mut self) -> &mut [i64; 4] {
cast_mut(self)
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/i8x16_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,7 @@ impl i8x16 {
}

#[inline]
pub fn as_array_mut(&mut self) -> &mut[i8; 16] {
pub fn as_array_mut(&mut self) -> &mut [i8; 16] {
cast_mut(self)
}
}
2 changes: 1 addition & 1 deletion src/i8x32_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ impl i8x32 {
}

#[inline]
pub fn as_array_mut(&mut self) -> &mut[i8; 32] {
pub fn as_array_mut(&mut self) -> &mut [i8; 32] {
cast_mut(self)
}
}
2 changes: 1 addition & 1 deletion src/u16x8_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ impl u16x8 {
}

#[inline]
pub fn as_array_mut(&mut self) -> &mut[u16; 8] {
pub fn as_array_mut(&mut self) -> &mut [u16; 8] {
cast_mut(self)
}
}
2 changes: 1 addition & 1 deletion src/u32x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ impl u32x4 {
}

#[inline]
pub fn as_array_mut(&mut self) -> &mut[u32; 4] {
pub fn as_array_mut(&mut self) -> &mut [u32; 4] {
cast_mut(self)
}
}
2 changes: 1 addition & 1 deletion src/u32x8_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ impl u32x8 {
}

#[inline]
pub fn as_array_mut(&mut self) -> &mut[u32; 8] {
pub fn as_array_mut(&mut self) -> &mut [u32; 8] {
cast_mut(self)
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/u64x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ impl u64x4 {
}

#[inline]
pub fn as_array_mut(&mut self) -> &mut[u64; 4] {
pub fn as_array_mut(&mut self) -> &mut [u64; 4] {
cast_mut(self)
}
}
Expand Down
52 changes: 51 additions & 1 deletion src/u8x16_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,8 @@ impl u8x16 {
}

/// Unpack and interleave low lanes of two u8x16
#[inline]
#[must_use]
pub fn unpack_low(lhs: u8x16, rhs: u8x16) -> u8x16 {
pick! {
if #[cfg(target_feature = "sse2")] {
Expand Down Expand Up @@ -488,6 +490,8 @@ impl u8x16 {
}

/// Unpack and interleave high lanes of two u8x16
#[inline]
#[must_use]
pub fn unpack_high(lhs: u8x16, rhs: u8x16) -> u8x16 {
pick! {
if #[cfg(target_feature = "sse2")] {
Expand Down Expand Up @@ -515,6 +519,52 @@ impl u8x16 {
}
}

/// Pack and saturate two i16x8 to u8x16
#[inline]
#[must_use]
pub fn narrow_i16x8(lhs: i16x8, rhs: i16x8) -> Self {
pick! {
if #[cfg(target_feature = "sse2")] {
u8x16 { sse: pack_i16_to_u8_m128i(lhs.sse, rhs.sse) }
} else if #[cfg(target_feature = "simd128")] {
u8x16 { simd: u8x16_narrow_i16x8(lhs.simd, rhs.simd) }
} else if #[cfg(all(target_feature = "neon", target_arch = "aarch64"))] {
let lhs = unsafe { vqmovun_s16(lhs.neon) };
let rhs = unsafe { vqmovun_s16(rhs.neon) };
u8x16 { neon: unsafe { vcombine_u8(lhs, rhs) } }
} else {
fn clamp(a: i16) -> u8 {
if a < u8::MIN as i16 {
u8::MIN
} else if a > u8::MAX as i16 {
u8::MAX
} else {
a as u8
}
}

Self { arr: [
clamp(lhs.as_array_ref()[0]),
clamp(lhs.as_array_ref()[1]),
clamp(lhs.as_array_ref()[2]),
clamp(lhs.as_array_ref()[3]),
clamp(lhs.as_array_ref()[4]),
clamp(lhs.as_array_ref()[5]),
clamp(lhs.as_array_ref()[6]),
clamp(lhs.as_array_ref()[7]),
clamp(rhs.as_array_ref()[0]),
clamp(rhs.as_array_ref()[1]),
clamp(rhs.as_array_ref()[2]),
clamp(rhs.as_array_ref()[3]),
clamp(rhs.as_array_ref()[4]),
clamp(rhs.as_array_ref()[5]),
clamp(rhs.as_array_ref()[6]),
clamp(rhs.as_array_ref()[7]),
]}
}
}
}

#[inline]
pub fn to_array(self) -> [u8; 16] {
cast(self)
Expand All @@ -526,7 +576,7 @@ impl u8x16 {
}

#[inline]
pub fn as_array_mut(&mut self) -> &mut[u8; 16] {
pub fn as_array_mut(&mut self) -> &mut [u8; 16] {
cast_mut(self)
}
}
14 changes: 12 additions & 2 deletions tests/all_tests/t_u8x16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,15 +167,25 @@ fn impl_u8x16_min() {
#[test]
fn impl_unpack_low_u8() {
let a = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let b = u8x16::from([12, 11, 22, 13, 99, 15, 16, 17, 8, 19, 2, 21, 22, 3, 24, 127]);
let b =
u8x16::from([12, 11, 22, 13, 99, 15, 16, 17, 8, 19, 2, 21, 22, 3, 24, 127]);
let c: [u8; 16] = u8x16::unpack_low(a, b).into();
assert_eq!(c, [0, 12, 1, 11, 2, 22, 3, 13, 4, 99, 5, 15, 6, 16, 7, 17]);
}

#[test]
fn impl_unpack_high_u8() {
let a = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let b = u8x16::from([12, 11, 22, 13, 99, 15, 16, 17, 8, 19, 2, 21, 22, 3, 24, 127]);
let b =
u8x16::from([12, 11, 22, 13, 99, 15, 16, 17, 8, 19, 2, 21, 22, 3, 24, 127]);
let c: [u8; 16] = u8x16::unpack_high(a, b).into();
assert_eq!(c, [8, 8, 9, 19, 10, 2, 11, 21, 12, 22, 13, 3, 14, 24, 15, 127]);
}

#[test]
fn impl_narrow_i16x8() {
let a = i16x8::from([-1, 2, -3, 4, -5, 6, -7, 8]);
let b = i16x8::from([9, 10, 11, 12, 13, -14, 15, -16]);
let c: [u8; 16] = u8x16::narrow_i16x8(a, b).into();
assert_eq!(c, [0, 2, 0, 4, 0, 6, 0, 8, 9, 10, 11, 12, 13, 0, 15, 0]);
}

0 comments on commit bd6850c

Please sign in to comment.