Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix layout of non-power-of-two length vectors #422

Merged
merged 8 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 5 additions & 28 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ on:
env:
CARGO_NET_RETRY: 10
RUSTUP_MAX_RETRIES: 10
PROPTEST_CASES: 64

jobs:
rustfmt:
Expand Down Expand Up @@ -181,6 +182,8 @@ jobs:
cross-tests:
name: "${{ matrix.target_feature }} on ${{ matrix.target }} (via cross)"
runs-on: ubuntu-latest
env:
PROPTEST_CASES: 16
strategy:
fail-fast: false

Expand Down Expand Up @@ -245,36 +248,10 @@ jobs:
- name: Test (release)
run: cross test --verbose --target=${{ matrix.target }} --release

features:
name: "Test cargo features (${{ matrix.simd }} × ${{ matrix.features }})"
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
simd:
- ""
- "avx512"
features:
- ""
- "--features std"
- "--features all_lane_counts"
programmerjake marked this conversation as resolved.
Show resolved Hide resolved
- "--all-features"

steps:
- uses: actions/checkout@v2
- name: Detect AVX512
run: echo "CPU_FEATURE=$(lscpu | grep -o avx512[a-z]* | sed s/avx/+avx/ | tr '\n' ',' )" >> $GITHUB_ENV
- name: Check build
if: ${{ matrix.simd == '' }}
run: RUSTFLAGS="-Dwarnings" cargo test --all-targets --no-default-features ${{ matrix.features }}
- name: Check AVX
if: ${{ matrix.simd == 'avx512' && contains(env.CPU_FEATURE, 'avx512') }}
run: |
echo "Found AVX features: $CPU_FEATURE"
RUSTFLAGS="-Dwarnings -Ctarget-feature=$CPU_FEATURE" cargo test --all-targets --no-default-features ${{ matrix.features }}

miri:
runs-on: ubuntu-latest
env:
PROPTEST_CASES: 16
steps:
- uses: actions/checkout@v2
- name: Test (Miri)
Expand Down
6 changes: 6 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,9 @@ members = [
"crates/std_float",
"crates/test_helpers",
]

[profile.test.package."*"]
opt-level = 2

[profile.test.package.test_helpers]
opt-level = 2
2 changes: 2 additions & 0 deletions Cross.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[build.env]
passthrough = ["PROPTEST_CASES"]
3 changes: 1 addition & 2 deletions crates/core_simd/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@ categories = ["hardware-support", "no-std"]
license = "MIT OR Apache-2.0"

[features]
default = ["as_crate"]
default = ["as_crate", "std"]
as_crate = []
std = []
all_lane_counts = []

[target.'cfg(target_arch = "wasm32")'.dev-dependencies]
wasm-bindgen = "0.2"
Expand Down
8 changes: 3 additions & 5 deletions crates/core_simd/src/lane_count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,8 @@ macro_rules! supported_lane_count {
};
}

supported_lane_count!(1, 2, 4, 8, 16, 32, 64);
#[cfg(feature = "all_lane_counts")]
supported_lane_count!(
3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
);
25 changes: 20 additions & 5 deletions crates/core_simd/src/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ macro_rules! int_divrem_guard {
( $lhs:ident,
$rhs:ident,
{ const PANIC_ZERO: &'static str = $zero:literal;
$simd_call:ident
$simd_call:ident, $op:tt
},
$int:ident ) => {
if $rhs.simd_eq(Simd::splat(0 as _)).any() {
Expand All @@ -96,8 +96,23 @@ macro_rules! int_divrem_guard {
// Nice base case to make it easy to const-fold away the other branch.
$rhs
};
// Safety: $lhs and rhs are vectors
unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }

// aarch64 div fails for arbitrary `v % 0`, mod fails when rhs is MIN, for non-powers-of-two
// these operations aren't vectorized on aarch64 anyway
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are LLVM backend bugs, right? simd_div/simd_rem still should work the same on all targets?

That seems worth tracking somewhere, having subtly buggy intrinsics is no good.

Copy link
Member

@programmerjake programmerjake Aug 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also, theoretically LLVM should be able to generate SIMD code for division/remainder by a constant, by using the exact same fancy math as it would use for scalars (which it unfortunately currently does after scalarization of div ops for non-power-of-2 vectors), so once LLVM's bugs are fixed, I think we should switch back to generating SIMD ops.

https://clang.godbolt.org/z/MxK47TWGs

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, these are definitely backend bugs

#[cfg(target_arch = "aarch64")]
{
let mut out = Simd::splat(0 as _);
for i in 0..Self::LEN {
out[i] = $lhs[i] $op rhs[i];
}
out
}

#[cfg(not(target_arch = "aarch64"))]
{
// Safety: $lhs and rhs are vectors
unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
}
}
};
}
Expand Down Expand Up @@ -205,14 +220,14 @@ for_base_ops! {
impl Div::div {
int_divrem_guard {
const PANIC_ZERO: &'static str = "attempt to divide by zero";
simd_div
simd_div, /
}
}

impl Rem::rem {
int_divrem_guard {
const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
simd_rem
simd_rem, %
}
}

Expand Down
28 changes: 28 additions & 0 deletions crates/core_simd/src/simd/num/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,13 +255,41 @@ macro_rules! impl_trait {
type Bits = Simd<$bits_ty, N>;
type Cast<T: SimdElement> = Simd<T, N>;

#[cfg(not(target_arch = "aarch64"))]
#[inline]
fn cast<T: SimdCast>(self) -> Self::Cast<T>
{
// Safety: supported types are guaranteed by SimdCast
unsafe { core::intrinsics::simd::simd_as(self) }
}

// https://github.com/llvm/llvm-project/issues/94694
#[cfg(target_arch = "aarch64")]
#[inline]
fn cast<T: SimdCast>(self) -> Self::Cast<T>
{
const { assert!(N <= 64) };
if N <= 2 || N == 4 || N == 8 || N == 16 || N == 32 || N == 64 {
// Safety: supported types are guaranteed by SimdCast
unsafe { core::intrinsics::simd::simd_as(self) }
} else if N < 4 {
let x = self.resize::<4>(Default::default()).cast();
x.resize::<N>(x[0])
} else if N < 8 {
let x = self.resize::<8>(Default::default()).cast();
x.resize::<N>(x[0])
} else if N < 16 {
let x = self.resize::<16>(Default::default()).cast();
x.resize::<N>(x[0])
} else if N < 32 {
let x = self.resize::<32>(Default::default()).cast();
x.resize::<N>(x[0])
} else {
let x = self.resize::<64>(Default::default()).cast();
x.resize::<N>(x[0])
}
}

#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>
Expand Down
2 changes: 1 addition & 1 deletion crates/core_simd/src/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ use crate::simd::{
// directly constructing an instance of the type (i.e. `let vector = Simd(array)`) should be
// avoided, as it will likely become illegal on `#[repr(simd)]` structs in the future. It also
// causes rustc to emit illegal LLVM IR in some cases.
#[repr(simd)]
#[repr(simd, packed)]
Copy link
Member

@RalfJung RalfJung Jun 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the plan for simd without packed? Miri currently ICEs when such a type is used with a simd intrinsic and the size is not a power of 2. If portable-simd doesn't need support for that then do we need to have it at all? Can we just make simd itself have the behavior that simd, packed now has?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

simd without packed is used by stdarch. portable-simd might use it in the future too, though imo probably won't.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK stdarch only uses power-of-2 vectors, where packed makes no difference?

pub struct Simd<T, const N: usize>([T; N])
where
LaneCount<N>: SupportedLaneCount,
Expand Down
35 changes: 35 additions & 0 deletions crates/core_simd/tests/layout.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#![feature(portable_simd)]

macro_rules! layout_tests {
{ $($mod:ident, $ty:ty,)* } => {
$(
mod $mod {
test_helpers::test_lanes! {
fn no_padding<const LANES: usize>() {
assert_eq!(
core::mem::size_of::<core_simd::simd::Simd::<$ty, LANES>>(),
core::mem::size_of::<[$ty; LANES]>(),
);
}
}
}
)*
}
}

layout_tests! {
i8, i8,
i16, i16,
i32, i32,
i64, i64,
isize, isize,
u8, u8,
u16, u16,
u32, u32,
u64, u64,
usize, usize,
f32, f32,
f64, f64,
mut_ptr, *mut (),
const_ptr, *const (),
}
1 change: 0 additions & 1 deletion crates/core_simd/tests/masks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ macro_rules! test_mask_api {
assert_eq!(Mask::<$type, 2>::from_bitmask(bitmask), mask);
}

#[cfg(feature = "all_lane_counts")]
#[test]
fn roundtrip_bitmask_conversion_odd() {
let values = [
Expand Down
3 changes: 0 additions & 3 deletions crates/test_helpers/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,3 @@ publish = false

[dependencies]
proptest = { version = "0.10", default-features = false, features = ["alloc"] }

[features]
all_lane_counts = []
Loading
Loading