From 7e72e241c4fe2157d85382afd90c80615d434dc9 Mon Sep 17 00:00:00 2001 From: Andrew Zhogin Date: Tue, 24 Sep 2024 16:35:00 +0700 Subject: [PATCH] rust_for_linux: -Zreg-struct-return commandline flag for X86 (#116973) --- compiler/rustc_codegen_gcc/src/context.rs | 5 +- compiler/rustc_interface/src/tests.rs | 1 + compiler/rustc_middle/src/ty/layout.rs | 5 +- compiler/rustc_session/src/options.rs | 3 ++ compiler/rustc_target/src/abi/call/mod.rs | 4 +- compiler/rustc_target/src/abi/call/x86.rs | 3 +- compiler/rustc_target/src/spec/mod.rs | 2 + compiler/rustc_ty_utils/src/abi.rs | 26 ++++++++-- tests/codegen/reg-struct-return.rs | 63 +++++++++++++++++++++++ 9 files changed, 104 insertions(+), 8 deletions(-) create mode 100644 tests/codegen/reg-struct-return.rs diff --git a/compiler/rustc_codegen_gcc/src/context.rs b/compiler/rustc_codegen_gcc/src/context.rs index 0556436f85418..135b9c98f442c 100644 --- a/compiler/rustc_codegen_gcc/src/context.rs +++ b/compiler/rustc_codegen_gcc/src/context.rs @@ -575,7 +575,10 @@ impl<'gcc, 'tcx> HasWasmCAbiOpt for CodegenCx<'gcc, 'tcx> { impl<'gcc, 'tcx> HasX86AbiOpt for CodegenCx<'gcc, 'tcx> { fn x86_abi_opt(&self) -> X86Abi { - X86Abi { regparm: self.tcx.sess.opts.unstable_opts.regparm } + X86Abi { + regparm: self.tcx.sess.opts.unstable_opts.regparm, + reg_struct_return: self.tcx.sess.opts.unstable_opts.reg_struct_return, + } } } diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs index 37ae37f7318ad..830bfd2384613 100644 --- a/compiler/rustc_interface/src/tests.rs +++ b/compiler/rustc_interface/src/tests.rs @@ -834,6 +834,7 @@ fn test_unstable_options_tracking_hash() { tracked!(profile_emit, Some(PathBuf::from("abc"))); tracked!(profile_sample_use, Some(PathBuf::from("abc"))); tracked!(profiler_runtime, "abc".to_string()); + tracked!(reg_struct_return, true); tracked!(regparm, Some(3)); tracked!(relax_elf_relocations, Some(true)); tracked!(remap_cwd_prefix, Some(PathBuf::from("abc"))); diff --git a/compiler/rustc_middle/src/ty/layout.rs b/compiler/rustc_middle/src/ty/layout.rs index 167fbbe8ffbca..3af5fa64b1d33 100644 --- a/compiler/rustc_middle/src/ty/layout.rs +++ b/compiler/rustc_middle/src/ty/layout.rs @@ -538,7 +538,10 @@ impl<'tcx> HasWasmCAbiOpt for TyCtxt<'tcx> { impl<'tcx> HasX86AbiOpt for TyCtxt<'tcx> { fn x86_abi_opt(&self) -> X86Abi { - X86Abi { regparm: self.sess.opts.unstable_opts.regparm } + X86Abi { + regparm: self.sess.opts.unstable_opts.regparm, + reg_struct_return: self.sess.opts.unstable_opts.reg_struct_return, + } } } diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index 40a2a0c4ddbef..0fcced453aac3 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -1978,6 +1978,9 @@ options! { "enable queries of the dependency graph for regression testing (default: no)"), randomize_layout: bool = (false, parse_bool, [TRACKED], "randomize the layout of types (default: no)"), + reg_struct_return: bool = (false, parse_bool, [TRACKED], + "On x86-32 targets, it overrides the default ABI to return small structs in registers. + It is UNSOUND to link together crates that use different values for this flag!"), regparm: Option = (None, parse_opt_number, [TRACKED], "On x86-32 targets, setting this to N causes the compiler to pass N arguments \ in registers EAX, EDX, and ECX instead of on the stack.\ diff --git a/compiler/rustc_target/src/abi/call/mod.rs b/compiler/rustc_target/src/abi/call/mod.rs index 469e71bd1713d..e20da5371fdff 100644 --- a/compiler/rustc_target/src/abi/call/mod.rs +++ b/compiler/rustc_target/src/abi/call/mod.rs @@ -899,7 +899,9 @@ impl<'a, Ty> FnAbi<'a, Ty> { } _ => (x86::Flavor::General, None), }; - x86::compute_abi_info(cx, self, x86::X86Options { flavor, regparm }); + let reg_struct_return = cx.x86_abi_opt().reg_struct_return; + let opts = x86::X86Options { flavor, regparm, reg_struct_return }; + x86::compute_abi_info(cx, self, opts); } "x86_64" => match abi { spec::abi::Abi::SysV64 { .. } => x86_64::compute_abi_info(cx, self), diff --git a/compiler/rustc_target/src/abi/call/x86.rs b/compiler/rustc_target/src/abi/call/x86.rs index b01725fd53863..78267aff6d7df 100644 --- a/compiler/rustc_target/src/abi/call/x86.rs +++ b/compiler/rustc_target/src/abi/call/x86.rs @@ -11,6 +11,7 @@ pub(crate) enum Flavor { pub(crate) struct X86Options { pub flavor: Flavor, pub regparm: Option, + pub reg_struct_return: bool, } pub(crate) fn compute_abi_info<'a, Ty, C>(cx: &C, fn_abi: &mut FnAbi<'a, Ty>, opts: X86Options) @@ -29,7 +30,7 @@ where // https://www.angelcode.com/dev/callconv/callconv.html // Clang's ABI handling is in lib/CodeGen/TargetInfo.cpp let t = cx.target_spec(); - if t.abi_return_struct_as_int { + if t.abi_return_struct_as_int || opts.reg_struct_return { // According to Clang, everyone but MSVC returns single-element // float aggregates directly in a floating-point register. if !t.is_like_msvc && fn_abi.ret.layout.is_single_fp_element(cx) { diff --git a/compiler/rustc_target/src/spec/mod.rs b/compiler/rustc_target/src/spec/mod.rs index 8939852cae56b..71384fb31a8dc 100644 --- a/compiler/rustc_target/src/spec/mod.rs +++ b/compiler/rustc_target/src/spec/mod.rs @@ -2056,6 +2056,8 @@ pub struct X86Abi { /// On x86-32 targets, the regparm N causes the compiler to pass arguments /// in registers EAX, EDX, and ECX instead of on the stack. pub regparm: Option, + /// Override the default ABI to return small structs in registers + pub reg_struct_return: bool, } pub trait HasX86AbiOpt { diff --git a/compiler/rustc_ty_utils/src/abi.rs b/compiler/rustc_ty_utils/src/abi.rs index 2d0c2e83690a6..78c64ca585614 100644 --- a/compiler/rustc_ty_utils/src/abi.rs +++ b/compiler/rustc_ty_utils/src/abi.rs @@ -727,6 +727,14 @@ fn fn_abi_adjust_for_abi<'tcx>( match arg.layout.abi { Abi::Aggregate { .. } => {} + Abi::ScalarPair(..) => { + // FIXME: It is strange that small struct optimizations are enabled + // for 3-fields are more, but disabled for 2-fields (represented by ScalarPair) + // Here 2-fields optimizations are enabled only for return value + if !tcx.sess.opts.unstable_opts.reg_struct_return || arg_idx.is_some() { + return; + } + } // This is a fun case! The gist of what this is doing is // that we want callers and callees to always agree on the @@ -758,12 +766,22 @@ fn fn_abi_adjust_for_abi<'tcx>( } // Compute `Aggregate` ABI. - let is_indirect_not_on_stack = - matches!(arg.mode, PassMode::Indirect { on_stack: false, .. }); - assert!(is_indirect_not_on_stack, "{:?}", arg); + let is_indirect_not_on_stack = !matches!(arg.layout.abi, Abi::Aggregate { .. }) + || matches!(arg.mode, PassMode::Indirect { on_stack: false, .. }); + assert!(is_indirect_not_on_stack, "is_indirect_not_on_stack: {:?}", arg); let size = arg.layout.size; - if !arg.layout.is_unsized() && size <= Pointer(AddressSpace::DATA).size(cx) { + let ptr_size = Pointer(AddressSpace::DATA).size(cx); + + // In x86 we may return 2x pointer sized struct as i64 + let reg_struct_return_case = tcx.sess.target.arch == "x86" + && arg_idx.is_none() + && tcx.sess.opts.unstable_opts.reg_struct_return + && abi != SpecAbi::RustIntrinsic; + + if !arg.layout.is_unsized() + && (size <= ptr_size || (reg_struct_return_case && (size <= 2 * ptr_size))) + { // We want to pass small aggregates as immediates, but using // an LLVM aggregate type for this leads to bad optimizations, // so we pick an appropriately sized integer type instead. diff --git a/tests/codegen/reg-struct-return.rs b/tests/codegen/reg-struct-return.rs new file mode 100644 index 0000000000000..20e4c3e3352fc --- /dev/null +++ b/tests/codegen/reg-struct-return.rs @@ -0,0 +1,63 @@ +// Checks how `reg-struct-return` flag works with different calling conventions: +// Return struct with 8/16/32/64 bit size will be converted into i8/i16/i32/i64 +// (like abi_return_struct_as_int target spec). +// x86 only. + +//@ compile-flags: --target i686-unknown-linux-gnu -Zreg-struct-return -O -C no-prepopulate-passes +//@ needs-llvm-components: x86 + +#![crate_type = "lib"] +#![no_std] +#![no_core] +#![feature(no_core, lang_items)] + +#[lang = "sized"] +trait Sized {} +#[lang = "copy"] +trait Copy {} + +#[repr(C)] +pub struct Foo { + x: u32, + y: u32, +} + +pub mod tests { + use Foo; + + // CHECK: i64 @f1() + #[no_mangle] + pub extern "fastcall" fn f1() -> Foo { + Foo { x: 1, y: 2 } + } + + // CHECK: i64 @f2() + #[no_mangle] + pub extern "Rust" fn f2() -> Foo { + Foo { x: 1, y: 2 } + } + + // CHECK: i64 @f3() + #[no_mangle] + pub extern "C" fn f3() -> Foo { + Foo { x: 1, y: 2 } + } + + // CHECK: i64 @f4() + #[no_mangle] + pub extern "cdecl" fn f4() -> Foo { + Foo { x: 1, y: 2 } + } + + // CHECK: i64 @f5() + #[no_mangle] + pub extern "stdcall" fn f5() -> Foo { + Foo { x: 1, y: 2 } + } + + // CHECK: i64 @f6() + #[no_mangle] + pub extern "thiscall" fn f6() -> Foo { + Foo { x: 1, y: 2 } + } +}