diff --git a/src/f32x4_.rs b/src/f32x4_.rs index 8b5cd62..34aea30 100644 --- a/src/f32x4_.rs +++ b/src/f32x4_.rs @@ -4,13 +4,13 @@ pick! { if #[cfg(target_feature="sse")] { #[derive(Default, Clone, Copy, PartialEq)] #[repr(C, align(16))] - pub struct f32x4 { sse: m128 } + pub struct f32x4 { pub(crate) sse: m128 } } else if #[cfg(target_feature="simd128")] { use core::arch::wasm32::*; #[derive(Clone, Copy)] #[repr(transparent)] - pub struct f32x4 { simd: v128 } + pub struct f32x4 { pub(crate) simd: v128 } impl Default for f32x4 { fn default() -> Self { @@ -27,7 +27,7 @@ pick! { use core::arch::aarch64::*; #[repr(C)] #[derive(Copy, Clone)] - pub struct f32x4 { neon : float32x4_t } + pub struct f32x4 { pub(crate) neon : float32x4_t } impl Default for f32x4 { #[inline] @@ -48,7 +48,7 @@ pick! { } else { #[derive(Default, Clone, Copy, PartialEq)] #[repr(C, align(16))] - pub struct f32x4 { arr: [f32;4] } + pub struct f32x4 { pub(crate) arr: [f32;4] } } } @@ -1578,4 +1578,24 @@ impl f32x4 { pub fn as_array_mut(&mut self) -> &mut [f32; 4] { cast_mut(self) } + + #[inline] + pub fn from_i32x4(v: i32x4) -> Self { + pick! { + if #[cfg(target_feature="sse2")] { + Self { sse: convert_to_m128_from_i32_m128i(v.sse) } + } else if #[cfg(target_feature="simd128")] { + Self { simd: f32x4_convert_i32x4(v.simd) } + } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] { + Self { neon: unsafe { vcvtq_f32_s32(v.neon) }} + } else { + Self { arr: [ + v.as_array_ref()[0] as f32, + v.as_array_ref()[1] as f32, + v.as_array_ref()[2] as f32, + v.as_array_ref()[3] as f32, + ] } + } + } + } } diff --git a/src/f32x8_.rs b/src/f32x8_.rs index 5df2ef1..8c5dd97 100644 --- a/src/f32x8_.rs +++ b/src/f32x8_.rs @@ -1418,6 +1418,26 @@ impl f32x8 { pub fn as_array_mut(&mut self) -> &mut [f32; 8] { cast_mut(self) } + + #[inline] + pub fn from_i32x8(v: i32x8) -> Self { + pick! { + if #[cfg(target_feature="avx2")] { + Self { avx: convert_to_m256_from_i32_m256i(v.avx2) } + } else { + Self::new([ + v.as_array_ref()[0] as f32, + v.as_array_ref()[1] as f32, + v.as_array_ref()[2] as f32, + v.as_array_ref()[3] as f32, + v.as_array_ref()[4] as f32, + v.as_array_ref()[5] as f32, + v.as_array_ref()[6] as f32, + v.as_array_ref()[7] as f32, + ]) + } + } + } } impl Not for f32x8 { diff --git a/src/f64x2_.rs b/src/f64x2_.rs index dc7b6cf..704f20c 100644 --- a/src/f64x2_.rs +++ b/src/f64x2_.rs @@ -4,13 +4,13 @@ pick! { if #[cfg(target_feature="sse2")] { #[derive(Default, Clone, Copy, PartialEq)] #[repr(C, align(16))] - pub struct f64x2 { sse: m128d } + pub struct f64x2 { pub(crate) sse: m128d } } else if #[cfg(target_feature="simd128")] { use core::arch::wasm32::*; #[derive(Clone, Copy)] #[repr(transparent)] - pub struct f64x2 { simd: v128 } + pub struct f64x2 { pub(crate) simd: v128 } impl Default for f64x2 { fn default() -> Self { @@ -27,7 +27,7 @@ pick! { use core::arch::aarch64::*; #[repr(C)] #[derive(Copy, Clone)] - pub struct f64x2 { neon : float64x2_t } + pub struct f64x2 { pub(crate) neon: float64x2_t } impl Default for f64x2 { #[inline] @@ -51,7 +51,7 @@ pick! { } else { #[derive(Default, Clone, Copy, PartialEq)] #[repr(C, align(16))] - pub struct f64x2 { arr: [f64;2] } + pub struct f64x2 { pub(crate) arr: [f64;2] } } } @@ -1608,6 +1608,33 @@ impl f64x2 { pub fn as_array_mut(&mut self) -> &mut [f64; 2] { cast_mut(self) } + + /// Converts the lower two i32 lanes to two f64 lanes (and dropping ther higher two i32 lanes) + #[inline] + pub fn from_i32x4_lower2(v: i32x4) -> Self { + pick! { + if #[cfg(target_feature="sse2")] { + Self { sse: convert_to_m128d_from_lower2_i32_m128i(v.sse) } + } else if #[cfg(target_feature="simd128")] { + Self { simd: f64x2_convert_low_i32x4(v.simd)} + } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] { + Self { neon: unsafe { vcvtq_f64_s64(vmovl_s32(vget_low_s32(v.neon))) }} + } else { + Self { arr: [ + v.as_array_ref()[0] as f64, + v.as_array_ref()[1] as f64, + ]} + } + } + } +} + +impl From for f64x2 { + /// Converts the lower two i32 lanes to two f64 lanes (and dropping ther higher two i32 lanes) + #[inline] + fn from(v: i32x4) -> Self { + Self::from_i32x4_lower2(v) + } } impl Not for f64x2 { diff --git a/src/f64x4_.rs b/src/f64x4_.rs index 1d9ddef..6010955 100644 --- a/src/f64x4_.rs +++ b/src/f64x4_.rs @@ -4,11 +4,11 @@ pick! { if #[cfg(target_feature="avx")] { #[derive(Default, Clone, Copy, PartialEq)] #[repr(C, align(32))] - pub struct f64x4 { avx: m256d } + pub struct f64x4 { pub(crate) avx: m256d } } else { #[derive(Default, Clone, Copy, PartialEq)] #[repr(C, align(32))] - pub struct f64x4 { a : f64x2, b : f64x2 } + pub struct f64x4 { pub(crate) a: f64x2, pub(crate) b: f64x2 } } } @@ -1473,6 +1473,29 @@ impl f64x4 { pub fn as_array_mut(&mut self) -> &mut [f64; 4] { cast_mut(self) } + + #[inline] + pub fn from_i32x4(v: i32x4) -> Self { + pick! { + if #[cfg(target_feature="avx")] { + Self { avx: convert_to_m256d_from_i32_m128i(v.sse) } + } else { + Self::new([ + v.as_array_ref()[0] as f64, + v.as_array_ref()[1] as f64, + v.as_array_ref()[2] as f64, + v.as_array_ref()[3] as f64, + ]) + } + } + } +} + +impl From for f64x4 { + #[inline] + fn from(v: i32x4) -> Self { + Self::from_i32x4(v) + } } impl Not for f64x4 { diff --git a/src/i64x2_.rs b/src/i64x2_.rs index 3419f12..c3704a4 100644 --- a/src/i64x2_.rs +++ b/src/i64x2_.rs @@ -4,13 +4,13 @@ pick! { if #[cfg(target_feature="sse2")] { #[derive(Default, Clone, Copy, PartialEq, Eq)] #[repr(C, align(16))] - pub struct i64x2 { sse: m128i } + pub struct i64x2 { pub(crate) sse: m128i } } else if #[cfg(target_feature="simd128")] { use core::arch::wasm32::*; #[derive(Clone, Copy)] #[repr(transparent)] - pub struct i64x2 { simd: v128 } + pub struct i64x2 { pub(crate) simd: v128 } impl Default for i64x2 { fn default() -> Self { @@ -29,7 +29,7 @@ pick! { use core::arch::aarch64::*; #[repr(C)] #[derive(Copy, Clone)] - pub struct i64x2 { neon : int64x2_t } + pub struct i64x2 { pub(crate) neon : int64x2_t } impl Default for i64x2 { #[inline] diff --git a/tests/all_tests/t_f32x4.rs b/tests/all_tests/t_f32x4.rs index 8a45200..e2d24d7 100644 --- a/tests/all_tests/t_f32x4.rs +++ b/tests/all_tests/t_f32x4.rs @@ -816,3 +816,10 @@ fn impl_f32x4_sum() { let duration = now.elapsed().as_micros(); println!("Time take {} {}us", sum2, duration); } + +#[test] +fn impl_f32x4_from_i32x4() { + let i = i32x4::from([1, 2, 3, 4]); + let f = f32x4::from([1.0, 2.0, 3.0, 4.0]); + assert_eq!(f32x4::from_i32x4(i), f) +} diff --git a/tests/all_tests/t_f32x8.rs b/tests/all_tests/t_f32x8.rs index d2755fc..6ae6e90 100644 --- a/tests/all_tests/t_f32x8.rs +++ b/tests/all_tests/t_f32x8.rs @@ -930,3 +930,10 @@ fn impl_transpose_for_f32x8() { assert_eq!(result, expected); } + +#[test] +fn impl_f32x8_from_i32x8() { + let i = i32x8::from([1, 2, 3, 4, 5, 6, 7, 8]); + let f = f32x8::from([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]); + assert_eq!(f32x8::from_i32x8(i), f) +} diff --git a/tests/all_tests/t_f64x2.rs b/tests/all_tests/t_f64x2.rs index 3e47df4..3382867 100644 --- a/tests/all_tests/t_f64x2.rs +++ b/tests/all_tests/t_f64x2.rs @@ -815,3 +815,10 @@ fn impl_f64x2_sum() { let duration = now.elapsed().as_micros(); println!("Time take {} {}us", sum2, duration); } + +#[test] +fn impl_f64x2_from_i32x4() { + let i = i32x4::from([1, 2, 3, 4]); + let f = f64x2::from([1.0, 2.0]); + assert_eq!(f64x2::from_i32x4_lower2(i), f) +} diff --git a/tests/all_tests/t_f64x4.rs b/tests/all_tests/t_f64x4.rs index 5addc73..5bcf6b1 100644 --- a/tests/all_tests/t_f64x4.rs +++ b/tests/all_tests/t_f64x4.rs @@ -709,3 +709,11 @@ fn impl_f64x4_sum() { let duration = now.elapsed().as_micros(); println!("Time take {} {}us", sum2, duration); } + +#[test] +fn impl_f64x4_from_i32x4() { + let i = i32x4::from([1, 2, 3, 4]); + let f = f64x4::from([1.0, 2.0, 3.0, 4.0]); + assert_eq!(f64x4::from(i), f); + assert_eq!(f64x4::from_i32x4(i), f); +}