From 92c8317d2ae45a86b632b24996cd29a30d0dc3ce Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Thu, 2 Dec 2021 18:04:33 -0800 Subject: [PATCH 1/2] Add `[T]::as_simd(_mut)` SIMD-style optimizations are the most common use for `[T]::align_to(_mut)`, but that's `unsafe`. So these are *safe* wrappers around it, now that we have the `Simd` type available, to make it easier to use. ```rust impl [T] { pub fn as_simd(&self) -> (&[T], &[Simd], &[T]); pub fn as_simd_mut(&mut self) -> (&mut [T], &mut [Simd], &mut [T]); } ``` --- library/core/src/slice/mod.rs | 83 +++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs index a3f59d3075954..7797ae6d49ff2 100644 --- a/library/core/src/slice/mod.rs +++ b/library/core/src/slice/mod.rs @@ -16,6 +16,8 @@ use crate::option::Option::{None, Some}; use crate::ptr; use crate::result::Result; use crate::result::Result::{Err, Ok}; +#[cfg(not(miri))] // Miri does not support all SIMD intrinsics +use crate::simd::{self, Simd}; use crate::slice; #[unstable( @@ -3434,6 +3436,87 @@ impl [T] { } } + /// Split a slice into a prefix, a middle of aligned simd types, and a suffix. + /// + /// This is a safe wrapper around [`slice::align_to`], so has the same weak + /// preconditions as that method. Notably, you must not assume any particular + /// split between the three parts: it's legal for the middle slice to be + /// empty even if the input slice is longer than `3 * LANES`. + /// + /// # Examples + /// + /// ``` + /// #![feature(portable_simd)] + /// + /// let short = &[1, 2, 3]; + /// let (prefix, middle, suffix) = short.as_simd::<4>(); + /// assert_eq!(middle, []); // Not enough elements for anything in the middle + /// + /// // They might be split in any possible way between prefix and suffix + /// let it = prefix.iter().chain(suffix).copied(); + /// assert_eq!(it.collect::>(), vec![1, 2, 3]); + /// + /// fn basic_simd_sum(x: &[f32]) -> f32 { + /// use std::ops::Add; + /// use std::simd::f32x4; + /// let (prefix, middle, suffix) = x.as_simd(); + /// let sums = f32x4::from_array([ + /// prefix.iter().copied().sum(), + /// 0.0, + /// 0.0, + /// suffix.iter().copied().sum(), + /// ]); + /// let sums = middle.iter().copied().fold(sums, f32x4::add); + /// sums.horizontal_sum() + /// } + /// + /// let numbers: Vec = (1..101).map(|x| x as _).collect(); + /// assert_eq!(basic_simd_sum(&numbers[1..99]), 4949.0); + /// ``` + #[unstable(feature = "portable_simd", issue = "86656")] + #[cfg(not(miri))] // Miri does not support all SIMD intrinsics + pub fn as_simd(&self) -> (&[T], &[Simd], &[T]) + where + Simd: AsRef<[T; LANES]>, + T: simd::SimdElement, + simd::LaneCount: simd::SupportedLaneCount, + { + // These are expected to always match, as vector types are laid out like + // arrays per , but we + // might as well double-check since it'll optimize away anyhow. + assert_eq!(mem::size_of::>(), mem::size_of::<[T; LANES]>()); + + // SAFETY: The simd types have the same layout as arrays, just with + // potentially-higher alignment, so the de-facto transmutes are sound. + unsafe { self.align_to() } + } + + /// Split a slice into a prefix, a middle of aligned simd types, and a suffix. + /// + /// This is a safe wrapper around [`slice::align_to`], so has the same weak + /// preconditions as that method. Notably, you must not assume any particular + /// split between the three parts: it's legal for the middle slice to be + /// empty even if the input slice is longer than `3 * LANES`. + /// + /// This is the mutable version of [`slice::as_simd`]; see that for more. + #[unstable(feature = "portable_simd", issue = "86656")] + #[cfg(not(miri))] // Miri does not support all SIMD intrinsics + pub fn as_simd_mut(&mut self) -> (&mut [T], &mut [Simd], &mut [T]) + where + Simd: AsMut<[T; LANES]>, + T: simd::SimdElement, + simd::LaneCount: simd::SupportedLaneCount, + { + // These are expected to always match, as vector types are laid out like + // arrays per , but we + // might as well double-check since it'll optimize away anyhow. + assert_eq!(mem::size_of::>(), mem::size_of::<[T; LANES]>()); + + // SAFETY: The simd types have the same layout as arrays, just with + // potentially-higher alignment, so the de-facto transmutes are sound. + unsafe { self.align_to_mut() } + } + /// Checks if the elements of this slice are sorted. /// /// That is, for each element `a` and its following element `b`, `a <= b` must hold. If the From e4c44c5df707bb0b3de00d24e8b7735ab945362b Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Tue, 14 Dec 2021 15:48:46 -0800 Subject: [PATCH 2/2] Update comments per review feedback --- library/core/src/slice/mod.rs | 56 ++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs index 7797ae6d49ff2..59ac0e86943ee 100644 --- a/library/core/src/slice/mod.rs +++ b/library/core/src/slice/mod.rs @@ -3436,12 +3436,30 @@ impl [T] { } } - /// Split a slice into a prefix, a middle of aligned simd types, and a suffix. + /// Split a slice into a prefix, a middle of aligned SIMD types, and a suffix. /// /// This is a safe wrapper around [`slice::align_to`], so has the same weak - /// preconditions as that method. Notably, you must not assume any particular - /// split between the three parts: it's legal for the middle slice to be - /// empty even if the input slice is longer than `3 * LANES`. + /// postconditions as that method. You're only assured that + /// `self.len() == prefix.len() + middle.len() * LANES + suffix.len()`. + /// + /// Notably, all of the following are possible: + /// - `prefix.len() >= LANES`. + /// - `middle.is_empty()` despite `self.len() >= 3 * LANES`. + /// - `suffix.len() >= LANES`. + /// + /// That said, this is a safe method, so if you're only writing safe code, + /// then this can at most cause incorrect logic, not unsoundness. + /// + /// # Panics + /// + /// This will panic if the size of the SIMD type is different from + /// `LANES` times that of the scalar. + /// + /// At the time of writing, the trait restrictions on `Simd` keeps + /// that from ever happening, as only power-of-two numbers of lanes are + /// supported. It's possible that, in the future, those restrictions might + /// be lifted in a way that would make it possible to see panics from this + /// method for something like `LANES == 3`. /// /// # Examples /// @@ -3491,14 +3509,32 @@ impl [T] { unsafe { self.align_to() } } - /// Split a slice into a prefix, a middle of aligned simd types, and a suffix. + /// Split a slice into a prefix, a middle of aligned SIMD types, and a suffix. /// - /// This is a safe wrapper around [`slice::align_to`], so has the same weak - /// preconditions as that method. Notably, you must not assume any particular - /// split between the three parts: it's legal for the middle slice to be - /// empty even if the input slice is longer than `3 * LANES`. + /// This is a safe wrapper around [`slice::align_to_mut`], so has the same weak + /// postconditions as that method. You're only assured that + /// `self.len() == prefix.len() + middle.len() * LANES + suffix.len()`. + /// + /// Notably, all of the following are possible: + /// - `prefix.len() >= LANES`. + /// - `middle.is_empty()` despite `self.len() >= 3 * LANES`. + /// - `suffix.len() >= LANES`. + /// + /// That said, this is a safe method, so if you're only writing safe code, + /// then this can at most cause incorrect logic, not unsoundness. + /// + /// This is the mutable version of [`slice::as_simd`]; see that for examples. + /// + /// # Panics + /// + /// This will panic if the size of the SIMD type is different from + /// `LANES` times that of the scalar. /// - /// This is the mutable version of [`slice::as_simd`]; see that for more. + /// At the time of writing, the trait restrictions on `Simd` keeps + /// that from ever happening, as only power-of-two numbers of lanes are + /// supported. It's possible that, in the future, those restrictions might + /// be lifted in a way that would make it possible to see panics from this + /// method for something like `LANES == 3`. #[unstable(feature = "portable_simd", issue = "86656")] #[cfg(not(miri))] // Miri does not support all SIMD intrinsics pub fn as_simd_mut(&mut self) -> (&mut [T], &mut [Simd], &mut [T])