From 329412dad35d42221319b0e2b9c225d07cd39985 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Tue, 24 Oct 2023 02:01:47 -0400 Subject: [PATCH 1/3] Implement repr(packed) for repr(simd) --- compiler/rustc_codegen_llvm/src/intrinsic.rs | 9 +++- compiler/rustc_ty_utils/src/layout.rs | 18 +++++++- tests/ui/simd/repr_packed.rs | 45 ++++++++++++++++++++ 3 files changed, 69 insertions(+), 3 deletions(-) create mode 100644 tests/ui/simd/repr_packed.rs diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index cc7e78b9c62bf..cb7a988757bd0 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -10,7 +10,7 @@ use crate::value::Value; use rustc_codegen_ssa::base::{compare_simd_types, wants_msvc_seh, wants_wasm_eh}; use rustc_codegen_ssa::common::{IntPredicate, TypeKind}; use rustc_codegen_ssa::errors::{ExpectedPointerMutability, InvalidMonomorphization}; -use rustc_codegen_ssa::mir::operand::OperandRef; +use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue}; use rustc_codegen_ssa::mir::place::PlaceRef; use rustc_codegen_ssa::traits::*; use rustc_hir as hir; @@ -946,6 +946,13 @@ fn generic_simd_intrinsic<'ll, 'tcx>( tcx.normalize_erasing_late_bound_regions(ty::ParamEnv::reveal_all(), callee_ty.fn_sig(tcx)); let arg_tys = sig.inputs(); + // Vectors must be immediates (non-power-of-2 #[repr(packed)] are not) + for (ty, arg) in arg_tys.iter().zip(args) { + if ty.is_simd() && !matches!(arg.val, OperandValue::Immediate(_)) { + return_error!(InvalidMonomorphization::SimdArgument { span, name, ty: *ty }); + } + } + if name == sym::simd_select_bitmask { let (len, _) = require_simd!(arg_tys[1], SimdArgument); diff --git a/compiler/rustc_ty_utils/src/layout.rs b/compiler/rustc_ty_utils/src/layout.rs index 826c69ee7160b..83b75fae3926b 100644 --- a/compiler/rustc_ty_utils/src/layout.rs +++ b/compiler/rustc_ty_utils/src/layout.rs @@ -435,7 +435,21 @@ fn layout_of_uncached<'tcx>( .size .checked_mul(e_len, dl) .ok_or_else(|| error(cx, LayoutError::SizeOverflow(ty)))?; - let align = dl.vector_align(size); + + let (abi, align) = if def.repr().packed() && !e_len.is_power_of_two() { + // Non-power-of-two vectors have padding up to the next power-of-two. + // If we're a packed repr, remove the padding while keeping the alignment as close + // to a vector as possible. + ( + Abi::Aggregate { sized: true }, + AbiAndPrefAlign { + abi: Align::max_for_offset(size), + pref: dl.vector_align(size).pref, + }, + ) + } else { + (Abi::Vector { element: e_abi, count: e_len }, dl.vector_align(size)) + }; let size = size.align_to(align.abi); // Compute the placement of the vector fields: @@ -448,7 +462,7 @@ fn layout_of_uncached<'tcx>( tcx.mk_layout(LayoutS { variants: Variants::Single { index: FIRST_VARIANT }, fields, - abi: Abi::Vector { element: e_abi, count: e_len }, + abi, largest_niche: e_ly.largest_niche, size, align, diff --git a/tests/ui/simd/repr_packed.rs b/tests/ui/simd/repr_packed.rs new file mode 100644 index 0000000000000..bfe9a2433f64d --- /dev/null +++ b/tests/ui/simd/repr_packed.rs @@ -0,0 +1,45 @@ +// run-pass + +#![feature(repr_simd, platform_intrinsics)] +#![allow(non_camel_case_types)] + +#[repr(simd, packed)] +struct Simd([T; N]); + +fn check_size_align() { + use std::mem; + assert_eq!(mem::size_of::>(), mem::size_of::<[T; N]>()); + assert_eq!(mem::size_of::>() % mem::align_of::>(), 0); +} + +fn check_ty() { + check_size_align::(); + check_size_align::(); + check_size_align::(); + check_size_align::(); + check_size_align::(); + check_size_align::(); + check_size_align::(); +} + +extern "platform-intrinsic" { + fn simd_add(a: T, b: T) -> T; +} + +fn main() { + check_ty::(); + check_ty::(); + check_ty::(); + check_ty::(); + check_ty::(); + check_ty::(); + check_ty::(); + + unsafe { + // powers-of-two have no padding and work as usual + // non-powers-of-two have padding and need to be expanded to full vectors + let x: Simd = + simd_add(Simd::([0., 1., 2., 3.]), Simd::([2., 2., 2., 2.])); + assert_eq!(std::mem::transmute::<_, [f64; 4]>(x), [2., 3., 4., 5.]); + } +} From 20413b1947a3c70b8b4fc4c9ec4b153435a54d38 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Tue, 24 Oct 2023 22:21:42 -0400 Subject: [PATCH 2/3] Add test using non-power-of-two vector --- tests/ui/simd/repr_packed.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/ui/simd/repr_packed.rs b/tests/ui/simd/repr_packed.rs index bfe9a2433f64d..df2d59a58b887 100644 --- a/tests/ui/simd/repr_packed.rs +++ b/tests/ui/simd/repr_packed.rs @@ -6,6 +6,9 @@ #[repr(simd, packed)] struct Simd([T; N]); +#[repr(simd)] +struct FullSimd([T; N]); + fn check_size_align() { use std::mem; assert_eq!(mem::size_of::>(), mem::size_of::<[T; N]>()); @@ -37,9 +40,20 @@ fn main() { unsafe { // powers-of-two have no padding and work as usual - // non-powers-of-two have padding and need to be expanded to full vectors let x: Simd = simd_add(Simd::([0., 1., 2., 3.]), Simd::([2., 2., 2., 2.])); assert_eq!(std::mem::transmute::<_, [f64; 4]>(x), [2., 3., 4., 5.]); + + // non-powers-of-two have padding and need to be expanded to full vectors + fn load(v: Simd) -> FullSimd { + unsafe { + let mut tmp = core::mem::MaybeUninit::>::uninit(); + std::ptr::copy_nonoverlapping(&v as *const _, tmp.as_mut_ptr().cast(), 1); + tmp.assume_init() + } + } + let x: FullSimd = + simd_add(load(Simd::([0., 1., 2.])), load(Simd::([2., 2., 2.]))); + assert_eq!(x.0, [2., 3., 4.]); } } From c623489b9bb5ad8dcb1094197710364b137d8b49 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 18 Nov 2023 11:22:55 -0500 Subject: [PATCH 3/3] Add codegen test --- tests/codegen/simd/repr-packed.rs | 32 +++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 tests/codegen/simd/repr-packed.rs diff --git a/tests/codegen/simd/repr-packed.rs b/tests/codegen/simd/repr-packed.rs new file mode 100644 index 0000000000000..27b9821cbe5d1 --- /dev/null +++ b/tests/codegen/simd/repr-packed.rs @@ -0,0 +1,32 @@ +// compile-flags: -C no-prepopulate-passes + +#![crate_type = "lib"] +#![feature(repr_simd, platform_intrinsics)] + +#[repr(simd, packed)] +pub struct Simd([T; N]); + +#[repr(simd)] +#[derive(Copy, Clone)] +pub struct FullSimd([T; N]); + +extern "platform-intrinsic" { + fn simd_mul(a: T, b: T) -> T; +} + +// non-powers-of-two have padding and need to be expanded to full vectors +fn load(v: Simd) -> FullSimd { + unsafe { + let mut tmp = core::mem::MaybeUninit::>::uninit(); + std::ptr::copy_nonoverlapping(&v as *const _, tmp.as_mut_ptr().cast(), 1); + tmp.assume_init() + } +} + +// CHECK-LABEL: @square_packed +#[no_mangle] +pub fn square_packed(x: Simd) -> FullSimd { + // CHECK: align 4 dereferenceable(12) %x + let x = load(x); + unsafe { simd_mul(x, x) } +}