From 3b4b96c9c68f7fff56549034cb5c90979ba92f66 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Sat, 17 May 2014 14:06:50 +1000 Subject: [PATCH 1/8] RFC for structs with undefined layouts. --- active/0000-undefined-struct-layout.md | 113 +++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 active/0000-undefined-struct-layout.md diff --git a/active/0000-undefined-struct-layout.md b/active/0000-undefined-struct-layout.md new file mode 100644 index 00000000000..c8305414ece --- /dev/null +++ b/active/0000-undefined-struct-layout.md @@ -0,0 +1,113 @@ +- Start Date: 2014-05-17 +- RFC PR #: +- Rust Issue #: + +# Summary + +Leave structs with unspecified layout by default like enums, for +optimisation & security purposes. Use something like `#[repr(C)]` to +expose C compatible layout. + +# Motivation + +The members of a struct are always laid in memory in the order in +which they were specified, e.g. + +```rust +struct A { + x: u8, + y: u64, + z: i8, + w: i64, +} +``` + +will put the `u8` first in memory, then the `u64`, the `i8` and lastly +the `i64`. Due to the alignment requirements of various types padding +is often required to ensure the members start at an appropriately +aligned byte. Hence the above struct is not `1 + 8 + 1 + 8 == 18` +bytes, but rather `1 + 7 + 8 + 1 + 7 + 8 == 32` bytes, since it is +laid out like + +```rust +#[packed] // no automatically inserted padding +struct AFull { + x: u8, + _padding1: [u8, .. 7], + y: u64, + z: i8, + _padding2: [u8, .. 7], + w: i64 +} +``` + +If the fields were reordered to + +```rust +struct B { + y: u64, + w: i64, + + x: u8, + i: i8 +} +``` + +then the struct is (strictly) only 18 bytes (but the alignment +requirements of `u64` forces it to take up 24). + +There is also some security advantage to being able to randomise +struct layouts, for example, +[the Grsecurity suite](http://grsecurity.net/) of security +enhancements to the Linux kernel provides +[`GRKERNSEC_RANDSTRUCT`](http://en.wikibooks.org/wiki/Grsecurity/Appendix/Grsecurity_and_PaX_Configuration_Options#Randomize_layout_of_sensitive_kernel_structures) +which randomises "sensitive kernel datastructures" at compile time. + +Notably, Rust's `enum`s already have undefined layout, and provide the +`#[repr]` attribute to control layout more precisely (specifically, +selecting the size of the discriminant). + +# Drawbacks + +Forgetting to add `#[repr(C)]` for a struct intended for FFI use can +cause surprising bugs and crashes. There is already a lint for FFI use +of `enum`s without a `#[repr(...)]` attribute, so this can be extended +to include structs. + +# Detailed design + +A struct declaration like + +```rust +struct Foo { + ... +} +``` + +has no fixed layout, that is, a compiler can chose whichever order of +fields it prefers. + +A fixed layout can be selected with the `#[repr]` attribute + +```rust +#[repr(C)] +struct Foo { + ... +} +``` + +This will force a struct to be laid out like the equivalent definition +in C. + +# Alternatives + +- Have non-C layouts opt-in, via `#[repr(smallest)]` and + `#[repr(random)]` (or similar similar). +- Have layout defined, but not declaration order (like Java(?)), for + example, from largest field to smallest, so `u8` fields get placed + last, and `[u8, .. 1000000]` fields get placed first. The `#[repr]` + attributes would still allow for selecting declaration-order layout. + +# Unresolved questions + +- How does this interact with binary compatibility of dynamic libraries? From f303e45c2dd5246cb2c70c74fe8b10352f24f45e Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Sat, 17 May 2014 14:53:43 +1000 Subject: [PATCH 2/8] General grammar/spelling/copy-editing. --- active/0000-undefined-struct-layout.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/active/0000-undefined-struct-layout.md b/active/0000-undefined-struct-layout.md index c8305414ece..0a24117e7dd 100644 --- a/active/0000-undefined-struct-layout.md +++ b/active/0000-undefined-struct-layout.md @@ -61,7 +61,7 @@ struct layouts, for example, [the Grsecurity suite](http://grsecurity.net/) of security enhancements to the Linux kernel provides [`GRKERNSEC_RANDSTRUCT`](http://en.wikibooks.org/wiki/Grsecurity/Appendix/Grsecurity_and_PaX_Configuration_Options#Randomize_layout_of_sensitive_kernel_structures) -which randomises "sensitive kernel datastructures" at compile time. +which randomises "sensitive kernel data structures" at compile time. Notably, Rust's `enum`s already have undefined layout, and provide the `#[repr]` attribute to control layout more precisely (specifically, @@ -80,11 +80,13 @@ A struct declaration like ```rust struct Foo { + x: T, + y: U, ... } ``` -has no fixed layout, that is, a compiler can chose whichever order of +has no fixed layout, that is, a compiler can choose whichever order of fields it prefers. A fixed layout can be selected with the `#[repr]` attribute @@ -92,6 +94,8 @@ A fixed layout can be selected with the `#[repr]` attribute ```rust #[repr(C)] struct Foo { + x: T, + y: U, ... } ``` @@ -102,7 +106,7 @@ in C. # Alternatives - Have non-C layouts opt-in, via `#[repr(smallest)]` and - `#[repr(random)]` (or similar similar). + `#[repr(random)]` (or similar). - Have layout defined, but not declaration order (like Java(?)), for example, from largest field to smallest, so `u8` fields get placed last, and `[u8, .. 1000000]` fields get placed first. The `#[repr]` From c792a57e23573ae8149bf01dd2e312e13a67e98c Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Sat, 17 May 2014 19:29:19 +1000 Subject: [PATCH 3/8] Remove security from the RFC. --- active/0000-undefined-struct-layout.md | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/active/0000-undefined-struct-layout.md b/active/0000-undefined-struct-layout.md index 0a24117e7dd..8160376f010 100644 --- a/active/0000-undefined-struct-layout.md +++ b/active/0000-undefined-struct-layout.md @@ -5,8 +5,8 @@ # Summary Leave structs with unspecified layout by default like enums, for -optimisation & security purposes. Use something like `#[repr(C)]` to -expose C compatible layout. +optimisation purposes. Use something like `#[repr(C)]` to expose C +compatible layout. # Motivation @@ -56,12 +56,10 @@ struct B { then the struct is (strictly) only 18 bytes (but the alignment requirements of `u64` forces it to take up 24). -There is also some security advantage to being able to randomise -struct layouts, for example, -[the Grsecurity suite](http://grsecurity.net/) of security -enhancements to the Linux kernel provides -[`GRKERNSEC_RANDSTRUCT`](http://en.wikibooks.org/wiki/Grsecurity/Appendix/Grsecurity_and_PaX_Configuration_Options#Randomize_layout_of_sensitive_kernel_structures) -which randomises "sensitive kernel data structures" at compile time. +(Having an undefined layout does allow for possible security +improvements, like randomising struct fields, but this can trivially +be done with a syntax extension that reorders them in the AST itself, +and so is not part of this proposal.) Notably, Rust's `enum`s already have undefined layout, and provide the `#[repr]` attribute to control layout more precisely (specifically, From cf26f37004f3aa1138e22832cd0e48673de3c092 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Sat, 17 May 2014 19:34:37 +1000 Subject: [PATCH 4/8] Mention the lint in the detailed design. --- active/0000-undefined-struct-layout.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/active/0000-undefined-struct-layout.md b/active/0000-undefined-struct-layout.md index 8160376f010..0fa990eb6fd 100644 --- a/active/0000-undefined-struct-layout.md +++ b/active/0000-undefined-struct-layout.md @@ -101,6 +101,27 @@ struct Foo { This will force a struct to be laid out like the equivalent definition in C. +There would be a lint for the use of non-`repr(C)` structs in related +FFI definitions, for example: + +```rust +struct UnspecifiedLayout { + // ... +} + +#[repr(C)] +struct CLayout { + // ... +} + +extern { + fn foo(x: UnspecifiedLayout); // warning: use of non-FFI-safe struct in extern declaration + + fn bar(x: CLayout); // no warning +} +``` + + # Alternatives - Have non-C layouts opt-in, via `#[repr(smallest)]` and @@ -113,3 +134,6 @@ in C. # Unresolved questions - How does this interact with binary compatibility of dynamic libraries? +- Should the lint apply to C-compatible functions defined in Rust like + `extern "C" fn foo(x: UnspecifiedLayout)`? (The equivalent lint for + enums does not pick up this case.) From b883ecae9ba4cdb17279655a2f6afe1ea235e8ea Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Sat, 17 May 2014 19:50:10 +1000 Subject: [PATCH 5/8] Write up the upstream FFI issue a little. --- active/0000-undefined-struct-layout.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/active/0000-undefined-struct-layout.md b/active/0000-undefined-struct-layout.md index 0fa990eb6fd..60855fc7bb1 100644 --- a/active/0000-undefined-struct-layout.md +++ b/active/0000-undefined-struct-layout.md @@ -72,6 +72,31 @@ cause surprising bugs and crashes. There is already a lint for FFI use of `enum`s without a `#[repr(...)]` attribute, so this can be extended to include structs. +Having an unspecified (or otherwise non-C-compatible) layout by +default makes interfacing with C slightly harder. A particularly bad +case is passing to C a struct from an upstream library that doesn't +have a `repr(C)` attribute. This situation seems relatively similar to +one where an upstream library type is missing an implementation of a +core trait e.g. `Hash` if one wishes to use it as a hashmap key. + +It is slightly better if structs had a specified-but-C-incompatible +layout, *and* one has control over the C interface, because then one +can manually arrange the fields in the C definition to match the Rust +order. + +That said, this scenario requires: + +- Needing to pass a Rust struct into C/FFI code, where that FFI code + actually needs to use things from the struct, rather than just pass + it through, e.g., back into a Rust callback. +- The Rust struct is defined upstream & out of your control, and not + intended for use with C code. +- The C/FFI code is designed by someone other than that vendor, or + otherwise not designed for use with the Rust struct (or else it is a + bug in the vendor's library that the Rust struct can't be sanely + passed to C). + + # Detailed design A struct declaration like From 262e32bc88e2ed10885edeef97dae1f8191c252c Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Tue, 20 May 2014 22:53:22 +1000 Subject: [PATCH 6/8] Randomisation can be used for fuzz testing. --- active/0000-undefined-struct-layout.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/active/0000-undefined-struct-layout.md b/active/0000-undefined-struct-layout.md index 60855fc7bb1..588c8609f58 100644 --- a/active/0000-undefined-struct-layout.md +++ b/active/0000-undefined-struct-layout.md @@ -56,10 +56,13 @@ struct B { then the struct is (strictly) only 18 bytes (but the alignment requirements of `u64` forces it to take up 24). -(Having an undefined layout does allow for possible security +Having an undefined layout does allow for possible security improvements, like randomising struct fields, but this can trivially -be done with a syntax extension that reorders them in the AST itself, -and so is not part of this proposal.) +be done with a syntax extension that can be attached to a struct to +reorder the fields in the AST itself. That said, there may be benefits +from being able to randomise all structs in a program +automatically/for testing, effectively fuzzing code (especially +`unsafe` code). Notably, Rust's `enum`s already have undefined layout, and provide the `#[repr]` attribute to control layout more precisely (specifically, From c67994af6b73486dd07f47cacd5ffc3a55358538 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Tue, 20 May 2014 22:55:55 +1000 Subject: [PATCH 7/8] Mention DST + field reordering. --- active/0000-undefined-struct-layout.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/active/0000-undefined-struct-layout.md b/active/0000-undefined-struct-layout.md index 588c8609f58..b258c7272a0 100644 --- a/active/0000-undefined-struct-layout.md +++ b/active/0000-undefined-struct-layout.md @@ -162,6 +162,10 @@ extern { # Unresolved questions - How does this interact with binary compatibility of dynamic libraries? +- How does this interact with DST, where some fields have to be at the + end of a struct? (Just always lay-out unsized fields last? + (i.e. after monomorphisation if a field was originally marked + `Sized?` then it needs to be last).) - Should the lint apply to C-compatible functions defined in Rust like `extern "C" fn foo(x: UnspecifiedLayout)`? (The equivalent lint for enums does not pick up this case.) From d79b258558564f0e6c14fca0cd3354dcc015302e Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Tue, 20 May 2014 22:57:38 +1000 Subject: [PATCH 8/8] Resolve the lint + extern fn question. --- active/0000-undefined-struct-layout.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/active/0000-undefined-struct-layout.md b/active/0000-undefined-struct-layout.md index b258c7272a0..1e2c85ce4c0 100644 --- a/active/0000-undefined-struct-layout.md +++ b/active/0000-undefined-struct-layout.md @@ -142,11 +142,14 @@ struct CLayout { // ... } + extern { fn foo(x: UnspecifiedLayout); // warning: use of non-FFI-safe struct in extern declaration fn bar(x: CLayout); // no warning } + +extern "C" fn foo(x: UnspecifiedLayout) { } // warning: use of non-FFI-safe struct in function with C abi. ``` @@ -166,6 +169,3 @@ extern { end of a struct? (Just always lay-out unsized fields last? (i.e. after monomorphisation if a field was originally marked `Sized?` then it needs to be last).) -- Should the lint apply to C-compatible functions defined in Rust like - `extern "C" fn foo(x: UnspecifiedLayout)`? (The equivalent lint for - enums does not pick up this case.)