diff --git a/src/compute/aggregate/sum.rs b/src/compute/aggregate/sum.rs index 2314aef95c2..dfae43c48a8 100644 --- a/src/compute/aggregate/sum.rs +++ b/src/compute/aggregate/sum.rs @@ -12,6 +12,7 @@ use crate::{ array::{Array, PrimitiveArray}, bitmap::Bitmap, }; +use num_traits::Zero; /// Object that can reduce itself to a number. This is used in the context of SIMD to reduce /// a MD (e.g. `[f32; 16]`) into a single number (`f32`). @@ -40,22 +41,19 @@ fn split_by_alignment(values: &[T]) -> (&[T], &[T]) { fn nonnull_sum(values: &[T]) -> T where T: NativeType + Simd + Add + std::iter::Sum, - T::Simd: Add + Sum, + T::Simd: Add + Sum + Copy, { - let (head, aligned_values) = split_by_alignment::(values); - - let mut chunks = aligned_values.chunks_exact(T::Simd::LANES); - // Safety: - // we just made sure that we work on a slice af data aligned to T::Simd - let sum = chunks.by_ref().fold(T::Simd::default(), |acc, chunk| { - acc + unsafe { T::Simd::from_chunk_aligned_unchecked(chunk) } - }); + // T::Simd is the vector type T and the alignment is similar to aligning to [T; alignment] + // the alignment of T::Simd ensures that it fits T. + let (head, simd_vals, tail) = unsafe { values.align_to::() }; - let remainder = T::Simd::from_incomplete_chunk(chunks.remainder(), T::default()); - let reduced = sum + remainder; + let mut reduced = T::Simd::from_incomplete_chunk(&[], T::default()); + for chunk in simd_vals { + reduced = reduced + *chunk + } - reduced.simd_sum() + head.iter().copied().sum() + reduced.simd_sum() + head.iter().copied().sum() + tail.iter().copied().sum() } /// # Panics @@ -111,7 +109,7 @@ where pub fn sum_primitive(array: &PrimitiveArray) -> Option where T: NativeType + Simd + Add + std::iter::Sum, - T::Simd: Add + Sum, + T::Simd: Add + Sum + Copy, { let null_count = array.null_count(); diff --git a/src/types/simd/mod.rs b/src/types/simd/mod.rs index 41745dd81ba..a52c257d76a 100644 --- a/src/types/simd/mod.rs +++ b/src/types/simd/mod.rs @@ -28,13 +28,6 @@ pub trait NativeSimd: Default { /// * iff `v.len()` != `T::LANES` fn from_chunk(v: &[Self::Native]) -> Self; - /// Convert itself from a slice. - /// # Safety: - /// Caller must ensure: - /// * `v.len() == T::LANES` - /// * slice is aligned to `Self` - unsafe fn from_chunk_aligned_unchecked(v: &[Self::Native]) -> Self; - /// creates a new Self from `v` by populating items from `v` up to its length. /// Items from `v` at positions larger than the number of lanes are ignored; /// remaining items are populated with `remaining`. diff --git a/src/types/simd/native.rs b/src/types/simd/native.rs index f2b55117d4a..7dd007bed74 100644 --- a/src/types/simd/native.rs +++ b/src/types/simd/native.rs @@ -29,11 +29,6 @@ macro_rules! simd { ($name)(v.try_into().unwrap()) } - #[inline] - unsafe fn from_chunk_aligned_unchecked(v: &[$type]) -> Self { - ($name)(v.try_into().unwrap()) - } - #[inline] fn from_incomplete_chunk(v: &[$type], remaining: $type) -> Self { let mut a = [remaining; $lanes]; diff --git a/src/types/simd/packed.rs b/src/types/simd/packed.rs index cb5096ca826..b8bb35d9806 100644 --- a/src/types/simd/packed.rs +++ b/src/types/simd/packed.rs @@ -23,11 +23,6 @@ macro_rules! simd { <$name>::from_slice_unaligned(v) } - #[inline] - unsafe fn from_chunk_aligned_unchecked(v: &[$type]) -> Self { - <$name>::from_slice_aligned_unchecked(v) - } - #[inline] fn from_incomplete_chunk(v: &[$type], remaining: $type) -> Self { let mut a = [remaining; $lanes];