From 260c31d06b42b4f4cc3f44449ab8f9aab63076c8 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 1 May 2025 15:30:05 +0200 Subject: [PATCH 1/3] use `#[naked]` for `__rust_probestack` --- compiler-builtins/src/probestack.rs | 127 +++++++--------------------- 1 file changed, 32 insertions(+), 95 deletions(-) diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs index 5b6abd21a..2f8ebf32b 100644 --- a/compiler-builtins/src/probestack.rs +++ b/compiler-builtins/src/probestack.rs @@ -49,77 +49,6 @@ // We only define stack probing for these architectures today. #![cfg(any(target_arch = "x86_64", target_arch = "x86"))] -extern "C" { - pub fn __rust_probestack(); -} - -// A wrapper for our implementation of __rust_probestack, which allows us to -// keep the assembly inline while controlling all CFI directives in the assembly -// emitted for the function. -// -// This is the ELF version. -#[cfg(not(any(target_vendor = "apple", target_os = "uefi")))] -macro_rules! define_rust_probestack { - ($body: expr) => { - concat!( - " - .pushsection .text.__rust_probestack - .globl __rust_probestack - .type __rust_probestack, @function - .hidden __rust_probestack - __rust_probestack: - ", - $body, - " - .size __rust_probestack, . - __rust_probestack - .popsection - " - ) - }; -} - -#[cfg(all(target_os = "uefi", target_arch = "x86_64"))] -macro_rules! define_rust_probestack { - ($body: expr) => { - concat!( - " - .globl __rust_probestack - __rust_probestack: - ", - $body - ) - }; -} - -// Same as above, but for Mach-O. Note that the triple underscore -// is deliberate -#[cfg(target_vendor = "apple")] -macro_rules! define_rust_probestack { - ($body: expr) => { - concat!( - " - .globl ___rust_probestack - ___rust_probestack: - ", - $body - ) - }; -} - -// In UEFI x86 arch, triple underscore is deliberate. -#[cfg(all(target_os = "uefi", target_arch = "x86"))] -macro_rules! define_rust_probestack { - ($body: expr) => { - concat!( - " - .globl ___rust_probestack - ___rust_probestack: - ", - $body - ) - }; -} - // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, // ensuring that if any pages are unmapped we'll make a page fault. // @@ -131,8 +60,10 @@ macro_rules! define_rust_probestack { target_arch = "x86_64", not(all(target_env = "sgx", target_vendor = "fortanix")) ))] -core::arch::global_asm!( - define_rust_probestack!( +#[unsafe(naked)] +#[no_mangle] +pub unsafe extern "C" fn __rust_probestack() { + core::arch::naked_asm!( " .cfi_startproc pushq %rbp @@ -182,10 +113,10 @@ core::arch::global_asm!( .cfi_adjust_cfa_offset -8 ret .cfi_endproc - " - ), - options(att_syntax) -); + ", + options(att_syntax) + ) +} // This function is the same as above, except that some instructions are // [manually patched for LVI]. @@ -195,8 +126,10 @@ core::arch::global_asm!( target_arch = "x86_64", all(target_env = "sgx", target_vendor = "fortanix") ))] -core::arch::global_asm!( - define_rust_probestack!( +#[unsafe(naked)] +#[no_mangle] +pub unsafe extern "C" fn __rust_probestack() { + core::arch::naked_asm!( " .cfi_startproc pushq %rbp @@ -248,10 +181,10 @@ core::arch::global_asm!( lfence jmp *%r11 .cfi_endproc - " - ), - options(att_syntax) -); + ", + options(att_syntax) + ) +} #[cfg(all(target_arch = "x86", not(target_os = "uefi")))] // This is the same as x86_64 above, only translated for 32-bit sizes. Note @@ -259,8 +192,10 @@ core::arch::global_asm!( // function basically can't tamper with anything. // // The ABI here is the same as x86_64, except everything is 32-bits large. -core::arch::global_asm!( - define_rust_probestack!( +#[unsafe(naked)] +#[no_mangle] +pub unsafe extern "C" fn __rust_probestack() { + core::arch::naked_asm!( " .cfi_startproc push %ebp @@ -291,10 +226,10 @@ core::arch::global_asm!( .cfi_adjust_cfa_offset -4 ret .cfi_endproc - " - ), - options(att_syntax) -); + ", + options(att_syntax) + ) +} #[cfg(all(target_arch = "x86", target_os = "uefi"))] // UEFI target is windows like target. LLVM will do _chkstk things like windows. @@ -307,8 +242,10 @@ core::arch::global_asm!( // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp // themselves. -core::arch::global_asm!( - define_rust_probestack!( +#[unsafe(naked)] +#[no_mangle] +pub unsafe extern "C" fn __rust_probestack() { + core::arch::naked_asm!( " .cfi_startproc push %ebp @@ -344,7 +281,7 @@ core::arch::global_asm!( .cfi_adjust_cfa_offset -4 ret .cfi_endproc - " - ), - options(att_syntax) -); + ", + options(att_syntax) + ) +} From a548e1fca44b5a45c6293f87fa5fd1206dd5ba25 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 1 May 2025 21:24:27 +0200 Subject: [PATCH 2/3] merge the sgx/fortanix `__rust_probestack` into the general `x86_64` one --- compiler-builtins/src/probestack.rs | 96 +++++++---------------------- 1 file changed, 23 insertions(+), 73 deletions(-) diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs index 2f8ebf32b..7547d2d57 100644 --- a/compiler-builtins/src/probestack.rs +++ b/compiler-builtins/src/probestack.rs @@ -54,15 +54,31 @@ // // The ABI here is that the stack frame size is located in `%rax`. Upon // return we're not supposed to modify `%rsp` or `%rax`. -// -// Any changes to this function should be replicated to the SGX version below. -#[cfg(all( - target_arch = "x86_64", - not(all(target_env = "sgx", target_vendor = "fortanix")) -))] +#[cfg(target_arch = "x86_64")] #[unsafe(naked)] #[no_mangle] pub unsafe extern "C" fn __rust_probestack() { + #[cfg(not(all(target_env = "sgx", target_vendor = "fortanix")))] + macro_rules! ret { + () => { + "ret" + }; + } + + #[cfg(all(target_env = "sgx", target_vendor = "fortanix"))] + macro_rules! ret { + // for this target, [manually patch for LVI]. + // + // [manually patch for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions + () => { + " + pop %r11 + lfence + jmp *%r11 + " + }; + } + core::arch::naked_asm!( " .cfi_startproc @@ -111,75 +127,9 @@ pub unsafe extern "C" fn __rust_probestack() { leave .cfi_def_cfa_register %rsp .cfi_adjust_cfa_offset -8 - ret - .cfi_endproc ", - options(att_syntax) - ) -} - -// This function is the same as above, except that some instructions are -// [manually patched for LVI]. -// -// [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions -#[cfg(all( - target_arch = "x86_64", - all(target_env = "sgx", target_vendor = "fortanix") -))] -#[unsafe(naked)] -#[no_mangle] -pub unsafe extern "C" fn __rust_probestack() { - core::arch::naked_asm!( + ret!(), " - .cfi_startproc - pushq %rbp - .cfi_adjust_cfa_offset 8 - .cfi_offset %rbp, -16 - movq %rsp, %rbp - .cfi_def_cfa_register %rbp - - mov %rax,%r11 // duplicate %rax as we're clobbering %r11 - - // Main loop, taken in one page increments. We're decrementing rsp by - // a page each time until there's less than a page remaining. We're - // guaranteed that this function isn't called unless there's more than a - // page needed. - // - // Note that we're also testing against `8(%rsp)` to account for the 8 - // bytes pushed on the stack orginally with our return address. Using - // `8(%rsp)` simulates us testing the stack pointer in the caller's - // context. - - // It's usually called when %rax >= 0x1000, but that's not always true. - // Dynamic stack allocation, which is needed to implement unsized - // rvalues, triggers stackprobe even if %rax < 0x1000. - // Thus we have to check %r11 first to avoid segfault. - cmp $0x1000,%r11 - jna 3f -2: - sub $0x1000,%rsp - test %rsp,8(%rsp) - sub $0x1000,%r11 - cmp $0x1000,%r11 - ja 2b - -3: - // Finish up the last remaining stack space requested, getting the last - // bits out of r11 - sub %r11,%rsp - test %rsp,8(%rsp) - - // Restore the stack pointer to what it previously was when entering - // this function. The caller will readjust the stack pointer after we - // return. - add %rax,%rsp - - leave - .cfi_def_cfa_register %rsp - .cfi_adjust_cfa_offset -8 - pop %r11 - lfence - jmp *%r11 .cfi_endproc ", options(att_syntax) From b1934ae10dd0c0dcf15b5e51bb1564f93421a115 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 1 May 2025 21:50:35 +0200 Subject: [PATCH 3/3] add note about why this function is unsafe --- compiler-builtins/src/lib.rs | 1 + compiler-builtins/src/probestack.rs | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs index 6a6b28067..6549d4cef 100644 --- a/compiler-builtins/src/lib.rs +++ b/compiler-builtins/src/lib.rs @@ -8,6 +8,7 @@ #![feature(linkage)] #![feature(naked_functions)] #![feature(repr_simd)] +#![feature(rustc_attrs)] #![cfg_attr(f16_enabled, feature(f16))] #![cfg_attr(f128_enabled, feature(f128))] #![no_builtins] diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs index 7547d2d57..90549f7e6 100644 --- a/compiler-builtins/src/probestack.rs +++ b/compiler-builtins/src/probestack.rs @@ -52,11 +52,13 @@ // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, // ensuring that if any pages are unmapped we'll make a page fault. // +// This function is unsafe because it uses a custom ABI, it does not actually match `extern "C"`. +// // The ABI here is that the stack frame size is located in `%rax`. Upon // return we're not supposed to modify `%rsp` or `%rax`. #[cfg(target_arch = "x86_64")] #[unsafe(naked)] -#[no_mangle] +#[rustc_std_internal_symbol] pub unsafe extern "C" fn __rust_probestack() { #[cfg(not(all(target_env = "sgx", target_vendor = "fortanix")))] macro_rules! ret { @@ -141,9 +143,11 @@ pub unsafe extern "C" fn __rust_probestack() { // that on Unix we're expected to restore everything as it was, this // function basically can't tamper with anything. // +// This function is unsafe because it uses a custom ABI, it does not actually match `extern "C"`. +// // The ABI here is the same as x86_64, except everything is 32-bits large. #[unsafe(naked)] -#[no_mangle] +#[rustc_std_internal_symbol] pub unsafe extern "C" fn __rust_probestack() { core::arch::naked_asm!( " @@ -186,6 +190,8 @@ pub unsafe extern "C" fn __rust_probestack() { // probestack function will also do things like _chkstk in MSVC. // So we need to sub %ax %sp in probestack when arch is x86. // +// This function is unsafe because it uses a custom ABI, it does not actually match `extern "C"`. +// // REF: Rust commit(74e80468347) // rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805 // Comments in LLVM: @@ -193,7 +199,7 @@ pub unsafe extern "C" fn __rust_probestack() { // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp // themselves. #[unsafe(naked)] -#[no_mangle] +#[rustc_std_internal_symbol] pub unsafe extern "C" fn __rust_probestack() { core::arch::naked_asm!( "