From c14d9b25c9575a75127b4ab360c28470c89fe5c7 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Sat, 12 Oct 2024 22:09:07 +0900 Subject: [PATCH] Support s390x z13 vector ABI --- compiler/rustc_codegen_gcc/src/builder.rs | 8 +- compiler/rustc_codegen_gcc/src/context.rs | 12 +- compiler/rustc_middle/src/ty/layout.rs | 17 +- compiler/rustc_span/src/symbol.rs | 1 + compiler/rustc_target/src/abi/call/mod.rs | 4 +- compiler/rustc_target/src/abi/call/s390x.rs | 67 +++-- compiler/rustc_target/src/spec/mod.rs | 4 + .../spec/targets/s390x_unknown_linux_gnu.rs | 3 - .../spec/targets/s390x_unknown_linux_musl.rs | 3 - tests/assembly/s390x-vector-abi.rs | 230 ++++++++++++++++++ 10 files changed, 322 insertions(+), 27 deletions(-) create mode 100644 tests/assembly/s390x-vector-abi.rs diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs index b611f9ba8bcb3..9276957dd374c 100644 --- a/compiler/rustc_codegen_gcc/src/builder.rs +++ b/compiler/rustc_codegen_gcc/src/builder.rs @@ -30,7 +30,7 @@ use rustc_middle::ty::{Instance, ParamEnv, Ty, TyCtxt}; use rustc_span::Span; use rustc_span::def_id::DefId; use rustc_target::abi::call::FnAbi; -use rustc_target::spec::{HasTargetSpec, HasWasmCAbiOpt, Target, WasmCAbi}; +use rustc_target::spec::{HasS390xVector, HasTargetSpec, HasWasmCAbiOpt, Target, WasmCAbi}; use crate::common::{SignType, TypeReflection, type_is_pointer}; use crate::context::CodegenCx; @@ -2347,6 +2347,12 @@ impl<'tcx> HasWasmCAbiOpt for Builder<'_, '_, 'tcx> { } } +impl<'tcx> HasS390xVector for Builder<'_, '_, 'tcx> { + fn has_s390x_vector(&self) -> bool { + self.cx.has_s390x_vector() + } +} + pub trait ToGccComp { fn to_gcc_comparison(&self) -> ComparisonOp; } diff --git a/compiler/rustc_codegen_gcc/src/context.rs b/compiler/rustc_codegen_gcc/src/context.rs index 7cb49bf799135..5c9cc85836908 100644 --- a/compiler/rustc_codegen_gcc/src/context.rs +++ b/compiler/rustc_codegen_gcc/src/context.rs @@ -17,9 +17,12 @@ use rustc_middle::ty::layout::{ use rustc_middle::ty::{self, Instance, ParamEnv, PolyExistentialTraitRef, Ty, TyCtxt}; use rustc_session::Session; use rustc_span::source_map::respan; +use rustc_span::symbol::sym; use rustc_span::{DUMMY_SP, Span}; use rustc_target::abi::{HasDataLayout, PointeeInfo, Size, TargetDataLayout, VariantIdx}; -use rustc_target::spec::{HasTargetSpec, HasWasmCAbiOpt, Target, TlsModel, WasmCAbi}; +use rustc_target::spec::{ + HasS390xVector, HasTargetSpec, HasWasmCAbiOpt, Target, TlsModel, WasmCAbi, +}; use crate::callee::get_fn; use crate::common::SignType; @@ -538,6 +541,13 @@ impl<'gcc, 'tcx> HasWasmCAbiOpt for CodegenCx<'gcc, 'tcx> { } } +impl<'gcc, 'tcx> HasS390xVector for CodegenCx<'gcc, 'tcx> { + fn has_s390x_vector(&self) -> bool { + // `unstable_target_features` is used here because "vector" is gated behind s390x_target_feature. + self.tcx.sess.unstable_target_features.contains(&sym::vector) + } +} + impl<'gcc, 'tcx> LayoutOfHelpers<'tcx> for CodegenCx<'gcc, 'tcx> { #[inline] fn handle_layout_err(&self, err: LayoutError<'tcx>, span: Span, ty: Ty<'tcx>) -> ! { diff --git a/compiler/rustc_middle/src/ty/layout.rs b/compiler/rustc_middle/src/ty/layout.rs index 6c12b691c26c0..c64e799b40e8d 100644 --- a/compiler/rustc_middle/src/ty/layout.rs +++ b/compiler/rustc_middle/src/ty/layout.rs @@ -21,7 +21,9 @@ use rustc_span::{DUMMY_SP, ErrorGuaranteed, Span}; use rustc_target::abi::call::FnAbi; use rustc_target::abi::{FieldIdx, TyAbiInterface, VariantIdx, call}; use rustc_target::spec::abi::Abi as SpecAbi; -use rustc_target::spec::{HasTargetSpec, HasWasmCAbiOpt, PanicStrategy, Target, WasmCAbi}; +use rustc_target::spec::{ + HasS390xVector, HasTargetSpec, HasWasmCAbiOpt, PanicStrategy, Target, WasmCAbi, +}; use tracing::debug; use {rustc_abi as abi, rustc_hir as hir}; @@ -544,6 +546,13 @@ impl<'tcx> HasWasmCAbiOpt for TyCtxt<'tcx> { } } +impl<'tcx> HasS390xVector for TyCtxt<'tcx> { + fn has_s390x_vector(&self) -> bool { + // `unstable_target_features` is used here because "vector" is gated behind s390x_target_feature. + self.sess.unstable_target_features.contains(&sym::vector) + } +} + impl<'tcx> HasTyCtxt<'tcx> for TyCtxt<'tcx> { #[inline] fn tcx(&self) -> TyCtxt<'tcx> { @@ -595,6 +604,12 @@ impl<'tcx> HasWasmCAbiOpt for LayoutCx<'tcx> { } } +impl<'tcx> HasS390xVector for LayoutCx<'tcx> { + fn has_s390x_vector(&self) -> bool { + self.calc.cx.has_s390x_vector() + } +} + impl<'tcx> HasTyCtxt<'tcx> for LayoutCx<'tcx> { fn tcx(&self) -> TyCtxt<'tcx> { self.calc.cx diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index cc3bda99a117b..d8ecc00a7f670 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -2130,6 +2130,7 @@ symbols! { vec_pop, vec_with_capacity, vecdeque_iter, + vector, version, vfp2, vis, diff --git a/compiler/rustc_target/src/abi/call/mod.rs b/compiler/rustc_target/src/abi/call/mod.rs index 352861c5ccb49..02615dea47793 100644 --- a/compiler/rustc_target/src/abi/call/mod.rs +++ b/compiler/rustc_target/src/abi/call/mod.rs @@ -5,7 +5,7 @@ use rustc_macros::HashStable_Generic; use rustc_span::Symbol; use crate::abi::{self, Abi, Align, FieldsShape, HasDataLayout, Size, TyAbiInterface, TyAndLayout}; -use crate::spec::{self, HasTargetSpec, HasWasmCAbiOpt, WasmCAbi}; +use crate::spec::{self, HasS390xVector, HasTargetSpec, HasWasmCAbiOpt, WasmCAbi}; mod aarch64; mod amdgpu; @@ -876,7 +876,7 @@ impl<'a, Ty> FnAbi<'a, Ty> { ) -> Result<(), AdjustForForeignAbiError> where Ty: TyAbiInterface<'a, C> + Copy, - C: HasDataLayout + HasTargetSpec + HasWasmCAbiOpt, + C: HasDataLayout + HasTargetSpec + HasWasmCAbiOpt + HasS390xVector, { if abi == spec::abi::Abi::X86Interrupt { if let Some(arg) = self.args.first_mut() { diff --git a/compiler/rustc_target/src/abi/call/s390x.rs b/compiler/rustc_target/src/abi/call/s390x.rs index 502e733126777..d9f38f60eed35 100644 --- a/compiler/rustc_target/src/abi/call/s390x.rs +++ b/compiler/rustc_target/src/abi/call/s390x.rs @@ -1,19 +1,46 @@ -// FIXME: The assumes we're using the non-vector ABI, i.e., compiling -// for a pre-z13 machine or using -mno-vx. +use crate::abi::call::{ArgAbi, FnAbi, Reg, RegKind}; +use crate::abi::{Abi, HasDataLayout, Size, TyAbiInterface, TyAndLayout}; +use crate::spec::{HasS390xVector, HasTargetSpec}; -use crate::abi::call::{ArgAbi, FnAbi, Reg}; -use crate::abi::{HasDataLayout, TyAbiInterface}; -use crate::spec::HasTargetSpec; +#[derive(Debug, Clone, Copy, PartialEq)] +enum ABI { + NoVector, // no-vector ABI, i.e., compiling for a pre-z13 machine or using -C target-feature=-vector + Vector, // vector ABI, i.e., compiling for a z13 or later machine or using -C target-feature=+vector +} +use ABI::*; -fn classify_ret(ret: &mut ArgAbi<'_, Ty>) { - if !ret.layout.is_aggregate() && ret.layout.size.bits() <= 64 { +fn contains_vector<'a, Ty, C>(cx: &C, layout: TyAndLayout<'a, Ty>, expected_size: Size) -> bool +where + Ty: TyAbiInterface<'a, C> + Copy, +{ + match layout.abi { + Abi::Uninhabited | Abi::Scalar(_) | Abi::ScalarPair(..) => false, + Abi::Vector { .. } => layout.size == expected_size, + Abi::Aggregate { .. } => { + for i in 0..layout.fields.count() { + if contains_vector(cx, layout.field(cx, i), expected_size) { + return true; + } + } + false + } + } +} + +fn classify_ret(ret: &mut ArgAbi<'_, Ty>, abi: ABI) { + let size = ret.layout.size; + if abi == Vector && size.bits() <= 128 && matches!(ret.layout.abi, Abi::Vector { .. }) { + ret.cast_to(Reg { kind: RegKind::Vector, size }); // FIXME: this cast is unneeded? + return; + } + if !ret.layout.is_aggregate() && size.bits() <= 64 { ret.extend_integer_width_to(64); - } else { - ret.make_indirect(); + return; } + ret.make_indirect(); } -fn classify_arg<'a, Ty, C>(cx: &C, arg: &mut ArgAbi<'a, Ty>) +fn classify_arg<'a, Ty, C>(cx: &C, arg: &mut ArgAbi<'a, Ty>, abi: ABI) where Ty: TyAbiInterface<'a, C> + Copy, C: HasDataLayout + HasTargetSpec, @@ -32,19 +59,25 @@ where } return; } - if !arg.layout.is_aggregate() && arg.layout.size.bits() <= 64 { + + let size = arg.layout.size; + if abi == Vector && size.bits() <= 128 && contains_vector(cx, arg.layout, size) { + arg.cast_to(Reg { kind: RegKind::Vector, size }); + return; + } + if !arg.layout.is_aggregate() && size.bits() <= 64 { arg.extend_integer_width_to(64); return; } if arg.layout.is_single_fp_element(cx) { - match arg.layout.size.bytes() { + match size.bytes() { 4 => arg.cast_to(Reg::f32()), 8 => arg.cast_to(Reg::f64()), _ => arg.make_indirect(), } } else { - match arg.layout.size.bytes() { + match size.bytes() { 1 => arg.cast_to(Reg::i8()), 2 => arg.cast_to(Reg::i16()), 4 => arg.cast_to(Reg::i32()), @@ -57,13 +90,15 @@ where pub(crate) fn compute_abi_info<'a, Ty, C>(cx: &C, fn_abi: &mut FnAbi<'a, Ty>) where Ty: TyAbiInterface<'a, C> + Copy, - C: HasDataLayout + HasTargetSpec, + C: HasDataLayout + HasTargetSpec + HasS390xVector, { + let abi = if cx.has_s390x_vector() { Vector } else { NoVector }; + if !fn_abi.ret.is_ignore() { - classify_ret(&mut fn_abi.ret); + classify_ret(&mut fn_abi.ret, abi); } for arg in fn_abi.args.iter_mut() { - classify_arg(cx, arg); + classify_arg(cx, arg, abi); } } diff --git a/compiler/rustc_target/src/spec/mod.rs b/compiler/rustc_target/src/spec/mod.rs index 82e11a3afce32..6c23e91b2cd4b 100644 --- a/compiler/rustc_target/src/spec/mod.rs +++ b/compiler/rustc_target/src/spec/mod.rs @@ -2096,6 +2096,10 @@ pub trait HasWasmCAbiOpt { fn wasm_c_abi_opt(&self) -> WasmCAbi; } +pub trait HasS390xVector { + fn has_s390x_vector(&self) -> bool; +} + type StaticCow = Cow<'static, T>; /// Optional aspects of a target specification. diff --git a/compiler/rustc_target/src/spec/targets/s390x_unknown_linux_gnu.rs b/compiler/rustc_target/src/spec/targets/s390x_unknown_linux_gnu.rs index 3efbb46483613..a84a18a433ffc 100644 --- a/compiler/rustc_target/src/spec/targets/s390x_unknown_linux_gnu.rs +++ b/compiler/rustc_target/src/spec/targets/s390x_unknown_linux_gnu.rs @@ -6,9 +6,6 @@ pub(crate) fn target() -> Target { base.endian = Endian::Big; // z10 is the oldest CPU supported by LLVM base.cpu = "z10".into(); - // FIXME: The ABI implementation in abi/call/s390x.rs is for now hard-coded to assume the no-vector - // ABI. Pass the -vector feature string to LLVM to respect this assumption. - base.features = "-vector".into(); base.max_atomic_width = Some(128); base.min_global_align = Some(16); base.stack_probes = StackProbeType::Inline; diff --git a/compiler/rustc_target/src/spec/targets/s390x_unknown_linux_musl.rs b/compiler/rustc_target/src/spec/targets/s390x_unknown_linux_musl.rs index 65b5c1167bdd8..4bde0fb729c75 100644 --- a/compiler/rustc_target/src/spec/targets/s390x_unknown_linux_musl.rs +++ b/compiler/rustc_target/src/spec/targets/s390x_unknown_linux_musl.rs @@ -6,9 +6,6 @@ pub(crate) fn target() -> Target { base.endian = Endian::Big; // z10 is the oldest CPU supported by LLVM base.cpu = "z10".into(); - // FIXME: The ABI implementation in abi/call/s390x.rs is for now hard-coded to assume the no-vector - // ABI. Pass the -vector feature string to LLVM to respect this assumption. - base.features = "-vector".into(); base.max_atomic_width = Some(128); base.min_global_align = Some(16); base.static_position_independent_executables = true; diff --git a/tests/assembly/s390x-vector-abi.rs b/tests/assembly/s390x-vector-abi.rs new file mode 100644 index 0000000000000..8024212298f06 --- /dev/null +++ b/tests/assembly/s390x-vector-abi.rs @@ -0,0 +1,230 @@ +//@ revisions: z10 z10_vector z13 z13_no_vector +// ignore-tidy-linelength +//@ assembly-output: emit-asm +//@ compile-flags: -O -Z merge-functions=disabled +//@[z10] compile-flags: --target s390x-unknown-linux-gnu +//@[z10] needs-llvm-components: systemz +//@[z10_vector] compile-flags: --target s390x-unknown-linux-gnu -C target-feature=+vector +//@[z10_vector] needs-llvm-components: systemz +//@[z13] compile-flags: --target s390x-unknown-linux-gnu -C target-cpu=z13 +//@[z13] needs-llvm-components: systemz +//@[z13_no_vector] compile-flags: --target s390x-unknown-linux-gnu -C target-cpu=z13 -C target-feature=-vector +//@[z13_no_vector] needs-llvm-components: systemz + +#![feature(no_core, lang_items, repr_simd)] +#![no_core] +#![crate_type = "lib"] +#![allow(non_camel_case_types)] + +#[lang = "sized"] +pub trait Sized {} +#[lang = "copy"] +pub trait Copy {} +#[lang = "freeze"] +pub trait Freeze {} + +impl Copy for [T; N] {} + +#[repr(simd)] +pub struct i8x8([i8; 8]); +#[repr(simd)] +pub struct i8x16([i8; 16]); +#[repr(simd)] +pub struct i8x32([i8; 32]); +#[repr(C)] +pub struct Wrapper(T); +#[repr(transparent)] +pub struct TransparentWrapper(T); + +impl Copy for i8 {} +impl Copy for i64 {} +impl Copy for i8x8 {} +impl Copy for i8x16 {} +impl Copy for i8x32 {} +impl Copy for Wrapper {} +impl Copy for TransparentWrapper {} + +// CHECK-LABEL: vector_ret_small: +// z10: lg %r0, 0(%r3) +// z10-NEXT: stg %r0, 0(%r2) +// z10-NEXT: br %r14 +// z13_no_vector: lg %r0, 0(%r3) +// z13_no_vector-NEXT: stg %r0, 0(%r2) +// z13_no_vector-NEXT: br %r14 +// z10_vector: vlrepg %v24, 0(%r2) +// z10_vector-NEXT: br %r14 +// z13: vlrepg %v24, 0(%r2) +// z13-NEXT: br %r14 +#[no_mangle] +extern "C" fn vector_ret_small(x: &i8x8) -> i8x8 { + *x +} +// CHECK-LABEL: vector_ret: +// z10: mvc 8(8,%r2), 8(%r3) +// z10-NEXT: mvc 0(8,%r2), 0(%r3) +// z10-NEXT: br %r14 +// z13: vl %v24, 0(%r2), 3 +// z13-NEXT: br %r14 +#[no_mangle] +extern "C" fn vector_ret(x: &i8x16) -> i8x16 { + *x +} +// CHECK-LABEL: vector_ret_large: +// z10: mvc 24(8,%r2), 24(%r3) +// z10-NEXT: mvc 16(8,%r2), 16(%r3) +// z10-NEXT: mvc 8(8,%r2), 8(%r3) +// z10-NEXT: mvc 0(8,%r2), 0(%r3) +// z10-NEXT: br %r14 +// z13: vl %v0, 0(%r3), 4 +// z13-NEXT: vl %v1, 16(%r3), 4 +// z13-NEXT: vst %v1, 16(%r2), 4 +// z13-NEXT: vst %v0, 0(%r2), 4 +// z13-NEXT: br %r14 +#[no_mangle] +extern "C" fn vector_ret_large(x: &i8x32) -> i8x32 { + *x +} + +// CHECK-LABEL: vector_wrapper_ret_small: +// CHECK: mvc 0(8,%r2), 0(%r3) +// CHECK-NEXT: br %r14 +#[no_mangle] +extern "C" fn vector_wrapper_ret_small(x: &Wrapper) -> Wrapper { + *x +} +// CHECK-LABEL: vector_wrapper_ret: +// CHECK: mvc 0(16,%r2), 0(%r3) +// CHECK-NEXT: br %r14 +#[no_mangle] +extern "C" fn vector_wrapper_ret(x: &Wrapper) -> Wrapper { + *x +} +// CHECK-LABEL: vector_wrapper_ret_large: +// z10: mvc 0(32,%r2), 0(%r3) +// z10-NEXT: br %r14 +// z13: vl %v0, 16(%r3), 4 +// z13-NEXT: vst %v0, 16(%r2), 4 +// z13-NEXT: vl %v0, 0(%r3), 4 +// z13-NEXT: vst %v0, 0(%r2), 4 +// z13-NEXT: br %r14 +#[no_mangle] +extern "C" fn vector_wrapper_ret_large(x: &Wrapper) -> Wrapper { + *x +} + +// CHECK-LABEL: vector_transparent_wrapper_ret_small: +// z10: lg %r0, 0(%r3) +// z10-NEXT: stg %r0, 0(%r2) +// z10-NEXT: br %r14 +// z13_no_vector: lg %r0, 0(%r3) +// z13_no_vector-NEXT: stg %r0, 0(%r2) +// z13_no_vector-NEXT: br %r14 +// z10_vector: vlrepg %v24, 0(%r2) +// z10_vector-NEXT: br %r14 +// z13: vlrepg %v24, 0(%r2) +// z13-NEXT: br %r14 +#[no_mangle] +extern "C" fn vector_transparent_wrapper_ret_small( + x: &TransparentWrapper, +) -> TransparentWrapper { + *x +} +// CHECK-LABEL: vector_transparent_wrapper_ret: +// z10: mvc 8(8,%r2), 8(%r3) +// z10-NEXT: mvc 0(8,%r2), 0(%r3) +// z10-NEXT: br %r14 +// z13: vl %v24, 0(%r2), 3 +// z13-NEXT: br %r14 +#[no_mangle] +extern "C" fn vector_transparent_wrapper_ret( + x: &TransparentWrapper, +) -> TransparentWrapper { + *x +} +// CHECK-LABEL: vector_transparent_wrapper_ret_large: +// z10: mvc 24(8,%r2), 24(%r3) +// z10-NEXT: mvc 16(8,%r2), 16(%r3) +// z10-NEXT: mvc 8(8,%r2), 8(%r3) +// z10-NEXT: mvc 0(8,%r2), 0(%r3) +// z10-NEXT: br %r14 +// z13: vl %v0, 0(%r3), 4 +// z13-NEXT: vl %v1, 16(%r3), 4 +// z13-NEXT: vst %v1, 16(%r2), 4 +// z13-NEXT: vst %v0, 0(%r2), 4 +// z13-NEXT: br %r14 +#[no_mangle] +extern "C" fn vector_transparent_wrapper_ret_large( + x: &TransparentWrapper, +) -> TransparentWrapper { + *x +} + +// FIXME: should check output for z10, but it is very long... +// vector_arg_small: +// .cfi_startproc +// stmg %r6, %r15, 48(%r15) +// .cfi_offset %r6, -112 +// .cfi_offset %r15, -40 +// risbg %r5, %r4, 32, 55, 8 +// sll %r2, 24 +// lb %r0, 175(%r15) +// rosbg %r2, %r3, 40, 47, 16 +// rosbg %r2, %r5, 48, 63, 0 +// llc %r1, 167(%r15) +// sll %r0, 8 +// ic %r0, 183(%r15) +// sllg %r2, %r2, 32 +// sll %r6, 24 +// rosbg %r6, %r1, 32, 47, 16 +// rosbg %r6, %r0, 48, 63, 0 +// lr %r2, %r6 +// lmg %r6, %r15, 48(%r15) +// br %r14 +// CHECK-LABEL: vector_arg_small: +// z13: vlgvg %r2, %v24, 0 +// z13-NEXT: br %r14 +#[no_mangle] +extern "C" fn vector_arg_small(x: i8x8) -> i64 { + unsafe { *(&x as *const i8x8 as *const i64) } +} +// CHECK-LABEL: vector_arg: +// z10: lg %r2, 0(%r2) +// z10-NEXT: br %r14 +// z13: vlgvg %r2, %v24, 0 +// z13-NEXT: br %r14 +#[no_mangle] +extern "C" fn vector_arg(x: i8x16) -> i64 { + unsafe { *(&x as *const i8x16 as *const i64) } +} +// CHECK-LABEL: vector_arg_large: +// CHECK: lg %r2, 0(%r2) +// CHECK-NEXT: br %r14 +#[no_mangle] +extern "C" fn vector_arg_large(x: i8x32) -> i64 { + unsafe { *(&x as *const i8x32 as *const i64) } +} + +// FIXME: should check output for z10, but it is very long... +// CHECK-LABEL: vector_wrapper_arg_small: +// z13: vlgvg %r2, %v24, 0 +// z13-NEXT: br %r14 +#[no_mangle] +extern "C" fn vector_wrapper_arg_small(x: Wrapper) -> i64 { + unsafe { *(&x as *const Wrapper as *const i64) } +} +// CHECK-LABEL: vector_wrapper_arg: +// z10: lg %r2, 0(%r2) +// z10-NEXT: br %r14 +// z13: vlgvg %r2, %v24, 0 +// z13-NEXT: br %r14 +#[no_mangle] +extern "C" fn vector_wrapper_arg(x: Wrapper) -> i64 { + unsafe { *(&x as *const Wrapper as *const i64) } +} +// CHECK-LABEL: vector_wrapper_arg_large: +// CHECK: lg %r2, 0(%r2) +// CHECK-NEXT: br %r14 +#[no_mangle] +extern "C" fn vector_wrapper_arg_large(x: Wrapper) -> i64 { + unsafe { *(&x as *const Wrapper as *const i64) } +}