From 2edf6c878449294efee45ca5b7f5a921658ff666 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 2 Feb 2023 00:14:36 -0800 Subject: [PATCH] Default repr(C) enums to c_int size This is what ISO C strongly implies this is correct, and many processor-specific ABIs imply or mandate this size, so "everyone" (LLVM, gcc...) defaults to emitting enums this way. However, this is by no means guaranteed by ISO C, and the bare-metal Arm targets show it can be overridden, which rustc supports via `c-enum-min-bits` in a target.json. The override is a flag named `-fshort-enums` in clang and gcc, but introducing a CLI flag is probably unnecessary for rustc. This flag can be used by non-Arm microcontroller targets, like AVR and MSP430, but it is not enabled for them by default. Rust programmers who know the size of a target's enums can use explicit reprs, which also lets them match C23 code. This change is most relevant to 16-bit targets: AVR and MSP430. Most of rustc's targets use 32-bit ints, but ILP64 does exist. Regardless, rustc should now correctly handle enums for both very small and very large targets. Thanks to William for confirming MSP430 behavior, and to Waffle for better style and no-core size_of asserts. Co-authored-by: William D. Jones Co-authored-by: Waffle Maybe --- compiler/rustc_abi/src/lib.rs | 4 +- .../src/spec/armebv7r_none_eabi.rs | 2 +- .../src/spec/armebv7r_none_eabihf.rs | 2 +- .../rustc_target/src/spec/armv4t_none_eabi.rs | 2 +- .../rustc_target/src/spec/armv7a_none_eabi.rs | 2 +- .../src/spec/armv7a_none_eabihf.rs | 2 +- .../rustc_target/src/spec/armv7r_none_eabi.rs | 2 +- .../src/spec/armv7r_none_eabihf.rs | 2 +- .../src/spec/hexagon_unknown_linux_musl.rs | 2 +- compiler/rustc_target/src/spec/mod.rs | 30 ++++++----- compiler/rustc_target/src/spec/thumb_base.rs | 2 +- .../src/spec/thumbv4t_none_eabi.rs | 2 +- tests/ui/repr/16-bit-repr-c-enum.rs | 52 +++++++++++++++++++ 13 files changed, 81 insertions(+), 25 deletions(-) create mode 100644 tests/ui/repr/16-bit-repr-c-enum.rs diff --git a/compiler/rustc_abi/src/lib.rs b/compiler/rustc_abi/src/lib.rs index b48654d1ac36a..e2cde157b147e 100644 --- a/compiler/rustc_abi/src/lib.rs +++ b/compiler/rustc_abi/src/lib.rs @@ -170,7 +170,9 @@ pub struct TargetDataLayout { pub instruction_address_space: AddressSpace, - /// Minimum size of #[repr(C)] enums (default I32 bits) + /// Minimum size of #[repr(C)] enums (default c_int::BITS, usually 32) + /// Note: This isn't in LLVM's data layout string, it is `short_enum` + /// so the only valid spec for LLVM is c_int::BITS or 8 pub c_enum_min_size: Integer, } diff --git a/compiler/rustc_target/src/spec/armebv7r_none_eabi.rs b/compiler/rustc_target/src/spec/armebv7r_none_eabi.rs index 8c65d6afcc182..f6f46aac4c391 100644 --- a/compiler/rustc_target/src/spec/armebv7r_none_eabi.rs +++ b/compiler/rustc_target/src/spec/armebv7r_none_eabi.rs @@ -19,7 +19,7 @@ pub fn target() -> Target { max_atomic_width: Some(32), emit_debug_gdb_scripts: false, // GCC and Clang default to 8 for arm-none here - c_enum_min_bits: 8, + c_enum_min_bits: Some(8), ..Default::default() }, } diff --git a/compiler/rustc_target/src/spec/armebv7r_none_eabihf.rs b/compiler/rustc_target/src/spec/armebv7r_none_eabihf.rs index 7013bc60d16ba..9608efe8bcf60 100644 --- a/compiler/rustc_target/src/spec/armebv7r_none_eabihf.rs +++ b/compiler/rustc_target/src/spec/armebv7r_none_eabihf.rs @@ -20,7 +20,7 @@ pub fn target() -> Target { max_atomic_width: Some(32), emit_debug_gdb_scripts: false, // GCC and Clang default to 8 for arm-none here - c_enum_min_bits: 8, + c_enum_min_bits: Some(8), ..Default::default() }, } diff --git a/compiler/rustc_target/src/spec/armv4t_none_eabi.rs b/compiler/rustc_target/src/spec/armv4t_none_eabi.rs index 7ac1aab3b43c8..28b109889e928 100644 --- a/compiler/rustc_target/src/spec/armv4t_none_eabi.rs +++ b/compiler/rustc_target/src/spec/armv4t_none_eabi.rs @@ -49,7 +49,7 @@ pub fn target() -> Target { // from thumb_base, rust-lang/rust#44993. emit_debug_gdb_scripts: false, // from thumb_base, apparently gcc/clang give enums a minimum of 8 bits on no-os targets - c_enum_min_bits: 8, + c_enum_min_bits: Some(8), ..Default::default() }, } diff --git a/compiler/rustc_target/src/spec/armv7a_none_eabi.rs b/compiler/rustc_target/src/spec/armv7a_none_eabi.rs index 4e20fb3256975..d59de86a23012 100644 --- a/compiler/rustc_target/src/spec/armv7a_none_eabi.rs +++ b/compiler/rustc_target/src/spec/armv7a_none_eabi.rs @@ -27,7 +27,7 @@ pub fn target() -> Target { max_atomic_width: Some(64), panic_strategy: PanicStrategy::Abort, emit_debug_gdb_scripts: false, - c_enum_min_bits: 8, + c_enum_min_bits: Some(8), ..Default::default() }; Target { diff --git a/compiler/rustc_target/src/spec/armv7a_none_eabihf.rs b/compiler/rustc_target/src/spec/armv7a_none_eabihf.rs index ae70129ae5182..8cdf3c36ba2ea 100644 --- a/compiler/rustc_target/src/spec/armv7a_none_eabihf.rs +++ b/compiler/rustc_target/src/spec/armv7a_none_eabihf.rs @@ -19,7 +19,7 @@ pub fn target() -> Target { panic_strategy: PanicStrategy::Abort, emit_debug_gdb_scripts: false, // GCC and Clang default to 8 for arm-none here - c_enum_min_bits: 8, + c_enum_min_bits: Some(8), ..Default::default() }; Target { diff --git a/compiler/rustc_target/src/spec/armv7r_none_eabi.rs b/compiler/rustc_target/src/spec/armv7r_none_eabi.rs index 25f301ccce744..5225abf44fca4 100644 --- a/compiler/rustc_target/src/spec/armv7r_none_eabi.rs +++ b/compiler/rustc_target/src/spec/armv7r_none_eabi.rs @@ -18,7 +18,7 @@ pub fn target() -> Target { max_atomic_width: Some(32), emit_debug_gdb_scripts: false, // GCC and Clang default to 8 for arm-none here - c_enum_min_bits: 8, + c_enum_min_bits: Some(8), ..Default::default() }, } diff --git a/compiler/rustc_target/src/spec/armv7r_none_eabihf.rs b/compiler/rustc_target/src/spec/armv7r_none_eabihf.rs index 40449759dd377..9a35e04617f9c 100644 --- a/compiler/rustc_target/src/spec/armv7r_none_eabihf.rs +++ b/compiler/rustc_target/src/spec/armv7r_none_eabihf.rs @@ -19,7 +19,7 @@ pub fn target() -> Target { max_atomic_width: Some(32), emit_debug_gdb_scripts: false, // GCC and Clang default to 8 for arm-none here - c_enum_min_bits: 8, + c_enum_min_bits: Some(8), ..Default::default() }, } diff --git a/compiler/rustc_target/src/spec/hexagon_unknown_linux_musl.rs b/compiler/rustc_target/src/spec/hexagon_unknown_linux_musl.rs index 3aad05eb2719b..4c6ab5f5ae454 100644 --- a/compiler/rustc_target/src/spec/hexagon_unknown_linux_musl.rs +++ b/compiler/rustc_target/src/spec/hexagon_unknown_linux_musl.rs @@ -11,7 +11,7 @@ pub fn target() -> Target { base.has_rpath = true; base.linker_flavor = LinkerFlavor::Unix(Cc::Yes); - base.c_enum_min_bits = 8; + base.c_enum_min_bits = Some(8); Target { llvm_target: "hexagon-unknown-linux-musl".into(), diff --git a/compiler/rustc_target/src/spec/mod.rs b/compiler/rustc_target/src/spec/mod.rs index 1e80b8b759db4..4ae8a41cc6535 100644 --- a/compiler/rustc_target/src/spec/mod.rs +++ b/compiler/rustc_target/src/spec/mod.rs @@ -1344,10 +1344,18 @@ impl Target { }); } - dl.c_enum_min_size = match Integer::from_size(Size::from_bits(self.c_enum_min_bits)) { - Ok(bits) => bits, - Err(err) => return Err(TargetDataLayoutErrors::InvalidBitsSize { err }), - }; + dl.c_enum_min_size = self + .c_enum_min_bits + .map_or_else( + || { + self.c_int_width + .parse() + .map_err(|_| String::from("failed to parse c_int_width")) + }, + Ok, + ) + .and_then(|i| Integer::from_size(Size::from_bits(i))) + .map_err(|err| TargetDataLayoutErrors::InvalidBitsSize { err })?; Ok(dl) } @@ -1701,8 +1709,8 @@ pub struct TargetOptions { /// If present it's a default value to use for adjusting the C ABI. pub default_adjusted_cabi: Option, - /// Minimum number of bits in #[repr(C)] enum. Defaults to 32. - pub c_enum_min_bits: u64, + /// Minimum number of bits in #[repr(C)] enum. Defaults to the size of c_int + pub c_enum_min_bits: Option, /// Whether or not the DWARF `.debug_aranges` section should be generated. pub generate_arange_section: bool, @@ -1932,7 +1940,7 @@ impl Default for TargetOptions { supported_split_debuginfo: Cow::Borrowed(&[SplitDebuginfo::Off]), supported_sanitizers: SanitizerSet::empty(), default_adjusted_cabi: None, - c_enum_min_bits: 32, + c_enum_min_bits: None, generate_arange_section: true, supports_stack_protector: true, entry_name: "main".into(), @@ -2118,12 +2126,6 @@ impl Target { base.$key_name = s; } } ); - ($key_name:ident, u64) => ( { - let name = (stringify!($key_name)).replace("_", "-"); - if let Some(s) = obj.remove(&name).and_then(|j| Json::as_u64(&j)) { - base.$key_name = s; - } - } ); ($key_name:ident, u32) => ( { let name = (stringify!($key_name)).replace("_", "-"); if let Some(s) = obj.remove(&name).and_then(|b| b.as_u64()) { @@ -2492,6 +2494,7 @@ impl Target { key!(is_builtin, bool); key!(c_int_width = "target-c-int-width"); + key!(c_enum_min_bits, Option); // if None, matches c_int_width key!(os); key!(env); key!(abi); @@ -2587,7 +2590,6 @@ impl Target { key!(supported_split_debuginfo, falliable_list)?; key!(supported_sanitizers, SanitizerSet)?; key!(default_adjusted_cabi, Option)?; - key!(c_enum_min_bits, u64); key!(generate_arange_section, bool); key!(supports_stack_protector, bool); key!(entry_name); diff --git a/compiler/rustc_target/src/spec/thumb_base.rs b/compiler/rustc_target/src/spec/thumb_base.rs index 000766c57ce79..4dcf47fe465c3 100644 --- a/compiler/rustc_target/src/spec/thumb_base.rs +++ b/compiler/rustc_target/src/spec/thumb_base.rs @@ -53,7 +53,7 @@ pub fn opts() -> TargetOptions { frame_pointer: FramePointer::Always, // ARM supports multiple ABIs for enums, the linux one matches the default of 32 here // but any arm-none or thumb-none target will be defaulted to 8 on GCC and clang - c_enum_min_bits: 8, + c_enum_min_bits: Some(8), ..Default::default() } } diff --git a/compiler/rustc_target/src/spec/thumbv4t_none_eabi.rs b/compiler/rustc_target/src/spec/thumbv4t_none_eabi.rs index 5a3e4c88d3a9b..e3734932f8855 100644 --- a/compiler/rustc_target/src/spec/thumbv4t_none_eabi.rs +++ b/compiler/rustc_target/src/spec/thumbv4t_none_eabi.rs @@ -55,7 +55,7 @@ pub fn target() -> Target { // suggested from thumb_base, rust-lang/rust#44993. emit_debug_gdb_scripts: false, // suggested from thumb_base, with no-os gcc/clang use 8-bit enums - c_enum_min_bits: 8, + c_enum_min_bits: Some(8), frame_pointer: FramePointer::MayOmit, main_needs_argc_argv: false, diff --git a/tests/ui/repr/16-bit-repr-c-enum.rs b/tests/ui/repr/16-bit-repr-c-enum.rs new file mode 100644 index 0000000000000..2acfde4be46d9 --- /dev/null +++ b/tests/ui/repr/16-bit-repr-c-enum.rs @@ -0,0 +1,52 @@ +// build-pass +// revisions: avr msp430 +// +// [avr] needs-llvm-components: avr +// [avr] compile-flags: --target=avr-unknown-gnu-atmega328 --crate-type=rlib +// [msp430] needs-llvm-components: msp430 +// [msp430] compile-flags: --target=msp430-none-elf --crate-type=rlib +#![feature(no_core, lang_items, intrinsics, staged_api)] +#![no_core] +#![crate_type = "lib"] +#![stable(feature = "", since = "")] +#![allow(dead_code)] + +// Test that the repr(C) attribute doesn't break compilation +// Previous bad assumption was that 32-bit enum default width is fine on msp430, avr +// But the width of the C int on these platforms is 16 bits, and C enums <= C int range +// so we want no more than that, usually. This resulted in errors like +// "layout decided on a larger discriminant type (I32) than typeck (I16)" +#[repr(C)] +enum Foo { + Bar, +} + +extern "rust-intrinsic" { + #[stable(feature = "", since = "")] + #[rustc_const_stable(feature = "", since = "")] + #[rustc_safe_intrinsic] + fn size_of() -> usize; +} + +#[lang="sized"] +trait Sized {} +#[lang="copy"] +trait Copy {} + +const EXPECTED: usize = 2; +const ACTUAL: usize = size_of::(); +// Validate that the size is indeed 16 bits, to match this C static_assert: +/** +```c +#include +enum foo { + BAR +}; +int main(void) +{ + /* passes on msp430-elf-gcc */ + static_assert(sizeof(enum foo) == 2); +} +``` +*/ +const _: [(); EXPECTED] = [(); ACTUAL];