Skip to content

Commit b491379

Browse files
committed
Avoid multi-return asm and add misopt test
It is undefined to exit an asm block multiple times with only one entry. And LLVM will derive that a branch ending with a noreturn call can freely clobber stack variables without having observable effects, which is not compatible with an all-branch-executed control flow. See: <https://doc.rust-lang.org/stable/reference/inline-assembly.html#r-asm.rules.only-on-exit> Discussion: <rust-lang/rfcs#2625 (comment)>
1 parent d539e48 commit b491379

File tree

8 files changed

+424
-298
lines changed

8 files changed

+424
-298
lines changed

src/aarch64.rs

+31-26
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
use super::NonZero;
22

3-
pub(crate) type Buf = [*mut (); 4];
4-
53
#[cfg(not(target_os = "macos"))]
64
macro_rules! set_jump_raw_impl {
75
($($tt:tt)*) => {
@@ -27,7 +25,6 @@ macro_rules! set_jump_raw_impl {
2725
lateout("lr") _,
2826
// Caller saved registers.
2927
clobber_abi("C"),
30-
options(readonly),
3128
)
3229
}
3330
}
@@ -64,50 +61,58 @@ macro_rules! set_jump_raw_impl {
6461
lateout("lr") _,
6562
// Caller saved registers.
6663
clobber_abi("C"),
67-
options(readonly),
6864
)
6965
}
7066
}
7167

7268
macro_rules! set_jump_raw {
73-
($val:expr, $buf_ptr:expr, $lander:block) => {
69+
($val:expr, $f:expr, $data:expr, $lander:block) => {
7470
set_jump_raw_impl!(
75-
"adr x0, {lander}",
76-
"mov x2, sp",
77-
"stp x0, x2, [x1]",
78-
"stp x19, x29, [x1, #16]",
71+
"adr x2, {lander}",
72+
"stp x2, x2, [sp, #-16]!",
73+
".cfi_adjust_cfa_offset 16",
74+
"stp x19, x29, [sp, #-16]!",
75+
".cfi_adjust_cfa_offset 16",
76+
"mov x1, sp",
77+
"bl {f}",
78+
"add sp, sp, 32",
79+
".cfi_adjust_cfa_offset -32",
7980

80-
in("x1") $buf_ptr, // Restored in long_jump_raw.
81-
out("x0") $val,
81+
f = sym $f,
8282
lander = label $lander,
83+
inout("x0") $data => $val,
8384
)
8485
};
85-
($val:expr, $buf_ptr:expr) => {
86+
($val:expr, $f:expr, $data:expr) => {
8687
set_jump_raw_impl!(
87-
"adr x0, 2f",
88-
"mov x2, sp",
89-
"stp x0, x2, [x1]",
90-
"stp x19, x29, [x1, #16]",
91-
"mov x0, 0",
88+
"adr x2, 2f",
89+
"stp x2, x2, [sp, #-16]!",
90+
".cfi_adjust_cfa_offset 16",
91+
"stp x19, x29, [sp, #-16]!",
92+
".cfi_adjust_cfa_offset 16",
93+
"mov x1, sp",
94+
"bl {f}",
95+
"add sp, sp, 32",
96+
".cfi_adjust_cfa_offset -32",
9297
"2:",
9398

94-
in("x1") $buf_ptr, // Restored in long_jump_raw.
95-
out("x0") $val,
99+
f = sym $f,
100+
inout("x0") $data => $val,
96101
)
97102
};
98103
}
99104

100105
#[inline]
101-
pub(crate) unsafe fn long_jump_raw(buf: *mut (), result: NonZero<usize>) -> ! {
106+
pub(crate) unsafe fn long_jump_raw(jp: *mut (), result: NonZero<usize>) -> ! {
102107
unsafe {
103108
core::arch::asm!(
104-
"ldp x19, x29, [x1, #16]",
105-
"ldp lr, x2, [x1]",
106-
"mov sp, x2",
107-
"ret",
108-
in("x1") buf,
109+
"ldp x19, x29, [x1]",
110+
"ldr x2, [x1, #16]",
111+
"add sp, x1, 32",
112+
"br x2",
109113
in("x0") result.get(),
110-
options(noreturn),
114+
in("x1") jp,
115+
options(noreturn, nostack, readonly),
111116
)
112117
}
113118
}

src/arm.rs

+34-40
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,9 @@
11
use super::NonZero;
22

3-
pub(crate) type Buf = [*mut (); 4];
4-
5-
macro_rules! set_jump_raw {
6-
($val:expr, $buf_ptr:expr, $lander:block) => {
3+
macro_rules! set_jump_raw_impl {
4+
($($tt:tt)*) => {
75
core::arch::asm!(
8-
"adr lr, {lander}",
9-
"stm r1, {{r6, r11, sp, lr}}",
10-
11-
lander = label $lander,
12-
13-
out("r0") $val,
14-
in("r1") $buf_ptr, // Restored in long_jump_raw.
6+
$($tt)*
157

168
// Callee saved registers.
179
lateout("r4") _,
@@ -25,40 +17,42 @@ macro_rules! set_jump_raw {
2517
lateout("r12") _,
2618
// lateout("sp") _, // sp
2719
lateout("lr") _,
28-
2920
// Caller saved registers.
21+
// FIXME: inline asm clobber list contains reserved registers: D16-D31.
3022
clobber_abi("aapcs"),
31-
options(readonly),
23+
)
24+
}
25+
}
26+
27+
macro_rules! set_jump_raw {
28+
($val:expr, $f:expr, $data:expr, $lander:block) => {
29+
set_jump_raw_impl!(
30+
"adr lr, {lander}",
31+
"push {{r6, r11, sp, lr}}",
32+
".cfi_adjust_cfa_offset 16",
33+
"mov r1, sp",
34+
"bl {f}",
35+
"add sp, sp, 16",
36+
".cfi_adjust_cfa_offset -16",
37+
38+
f = sym $f,
39+
inout("r0") $data => $val,
40+
lander = label $lander,
3241
)
3342
};
34-
($val:expr, $buf_ptr:expr) => {
35-
core::arch::asm!(
43+
($val:expr, $f:expr, $data:expr) => {
44+
set_jump_raw_impl!(
3645
"adr lr, 2f",
37-
"stm r1, {{r6, r11, sp, lr}}",
38-
"mov r0, 0",
46+
"push {{r6, r11, sp, lr}}",
47+
".cfi_adjust_cfa_offset 16",
48+
"mov r1, sp",
49+
"bl {f}",
50+
"add sp, sp, 16",
51+
".cfi_adjust_cfa_offset -16",
3952
"2:",
4053

41-
// Caller saved registers.
42-
// Workaround: see above.
43-
out("r0") $val,
44-
in("r1") $buf_ptr, // Restored in long_jump_raw.
45-
46-
// Callee saved registers.
47-
lateout("r4") _,
48-
lateout("r5") _,
49-
// lateout("r6") _, // LLVM reserved.
50-
lateout("r7") _,
51-
lateout("r8") _,
52-
lateout("r9") _,
53-
lateout("r10") _,
54-
// lateout("r11") _, // LLVM reserved.
55-
lateout("r12") _,
56-
// lateout("sp") _, // sp
57-
lateout("lr") _,
58-
59-
// Caller saved registers.
60-
clobber_abi("aapcs"),
61-
options(readonly),
54+
f = sym $f,
55+
inout("r0") $data => $val,
6256
)
6357
};
6458
}
@@ -68,9 +62,9 @@ pub(crate) unsafe fn long_jump_raw(buf: *mut (), result: NonZero<usize>) -> ! {
6862
unsafe {
6963
core::arch::asm!(
7064
"ldm r1, {{r6, r11, sp, pc}}",
71-
in("r1") buf,
7265
in("r0") result.get(),
73-
options(noreturn),
66+
in("r1") buf,
67+
options(noreturn, nostack, readonly),
7468
)
7569
}
7670
}

src/lib.rs

+51-36
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
//! [fatal interaction with optimizer][misopt].
1111
//! This crate does not suffer from the misoptimization (covered in `tests/smoke.rs`).
1212
//!
13-
//! ⚠️ We admit that since it's not yet undefined to force unwind Rust POFs and/or
14-
//! longjmp's half execution semantic, `long_jump` is still technically undefined behavior.
13+
//! ⚠️ We admit that since it's not yet undefined to force unwind Rust POFs,
14+
//! `long_jump` is still technically undefined behavior.
1515
//! But this crate is an attempt to make a semantically-correct abstraction free from
1616
//! misoptimization, and you accept the risk by using this crate.
1717
//! If you find any misoptimization in practice, please open an issue.
@@ -25,7 +25,7 @@
2525
//! Single `usize` `JumpPoint`. Let optimizer save only necessary states rather than bulk saving
2626
//! all callee-saved registers. Inline-able `set_jump` without procedure call cost.
2727
//!
28-
//! - 2.3ns `set_jump` setup and 3.2ns `long_jump` on a modern x86\_64 CPU.
28+
//! - 2.4ns `set_jump` setup and 2.9ns `long_jump` on a modern x86\_64 CPU.
2929
//! ~300-490x faster than `catch_unwind`-`panic_any!`.
3030
//!
3131
//! - No std.
@@ -90,17 +90,16 @@
9090
//!
9191
//! - [`sjlj`](https://crates.io/crates/sjlj)
9292
//!
93-
//! - Uses inline assembly but involving an un-inline-able call instruction.
9493
//! - Only x86\_64 is supported.
95-
//! - Suffers from [misoptimization][misopt].
94+
//! - Suffers from [misoptimization][misopt] due to multi-return.
9695
//! - Slower `long_jump` because of more register restoring.
9796
//!
9897
//! [misopt]: https://github.com/rust-lang/rfcs/issues/2625
9998
#![cfg_attr(feature = "unstable-asm-goto", feature(asm_goto))]
10099
#![cfg_attr(feature = "unstable-asm-goto", feature(asm_goto_with_outputs))]
101100
#![cfg_attr(not(test), no_std)]
102101
use core::marker::PhantomData;
103-
use core::mem::{size_of, MaybeUninit};
102+
use core::mem::ManuallyDrop;
104103
use core::num::NonZero;
105104

106105
#[cfg(target_arch = "x86_64")]
@@ -147,8 +146,6 @@ mod imp {
147146

148147
compile_error!("sjlj2: unsupported platform");
149148

150-
pub type Buf = [usize; 0];
151-
152149
macro_rules! set_jump_raw {
153150
($val:tt, $($tt:tt)*) => {
154151
$val = 0 as _
@@ -206,7 +203,7 @@ impl JumpPoint<'_> {
206203
}
207204

208205
/// Alias of [`set_jump`].
209-
#[inline(always)]
206+
#[inline]
210207
pub fn set_jump<T, F, G>(ordinary: F, lander: G) -> T
211208
where
212209
F: FnOnce(JumpPoint<'_>) -> T,
@@ -220,7 +217,7 @@ impl JumpPoint<'_> {
220217
/// # Safety
221218
///
222219
/// See [`long_jump`].
223-
#[inline(always)]
220+
#[inline]
224221
pub unsafe fn long_jump(self, result: NonZero<usize>) -> ! {
225222
long_jump(self, result)
226223
}
@@ -237,18 +234,25 @@ impl JumpPoint<'_> {
237234
/// Since it is implemented with [`core::mem::needs_drop`], it may generates false positive
238235
/// compile errors.
239236
///
240-
/// This function is even inline-able without procedure-call cost!
237+
/// This function is inline-able but `ordinary` is called in a C function wrapper because it must
238+
/// has its own stack frame to avoid [misoptimization][misopt2].
239+
///
240+
/// [misopt2]: https://github.com/rust-lang/libc/issues/1596
241241
///
242242
/// # Safety
243243
///
244244
/// Yes, this function is actually safe to use. [`long_jump`] is unsafe, however.
245245
///
246246
/// # Panics
247247
///
248-
/// It is possible to unwind from `ordinary` or `lander` closure.
248+
/// It is allowed to panic in `ordinary` or `lander` closure. But panics in `ordinary` cannot
249+
/// unwind across `set_jump`, or it will abort the process.
250+
/// Panics in `lander` can unwind across `set_jump` though.
249251
#[doc(alias = "setjmp")]
250-
#[inline(always)]
251-
pub fn set_jump<T, F, G>(mut ordinary: F, lander: G) -> T
252+
#[inline]
253+
// For better logical order.
254+
#[allow(clippy::items_after_statements)]
255+
pub fn set_jump<T, F, G>(ordinary: F, lander: G) -> T
252256
where
253257
F: FnOnce(JumpPoint<'_>) -> T,
254258
G: FnOnce(NonZero<usize>) -> T,
@@ -266,40 +270,51 @@ where
266270
// unwind between its return from F or G and the return from `set_jump`.
267271
}
268272

269-
let mut buf = <MaybeUninit<imp::Buf>>::uninit();
270-
let ptr = buf.as_mut_ptr();
271-
let mut val: usize;
273+
union Data<T, F> {
274+
func: ManuallyDrop<F>,
275+
ret: ManuallyDrop<T>,
276+
}
272277

273-
// Show the optimizer that `ordinary` may be called in both ordinary and landing paths.
274-
// So it cannot assume that variables that are captured by `ordinary` are unchanged in the
275-
// lander path -- they must be reloaded.
276-
// This fixes <https://github.com/rust-lang/rfcs/issues/2625>.
277-
if size_of::<F>() == 0 {
278-
// F must not mutate local states because it captures nothing.
279-
} else {
280-
unsafe {
281-
core::arch::asm!(
282-
"/*{}*/",
283-
in(reg) core::ptr::addr_of_mut!(ordinary),
284-
// May access memory.
285-
);
278+
struct AbortGuard;
279+
impl Drop for AbortGuard {
280+
fn drop(&mut self) {
281+
// Manually trigger a double-panic abort.
282+
// Workaround: <https://github.com/rust-lang/rust/issues/67952>
283+
panic!("panic cannot unwind across set_jump");
286284
}
287285
}
288286

287+
unsafe extern "C" fn wrap<T, F: FnOnce(JumpPoint<'_>) -> T>(
288+
data: &mut Data<T, F>,
289+
jp: *mut (),
290+
) -> usize {
291+
let guard = AbortGuard;
292+
let f = unsafe { ManuallyDrop::take(&mut data.func) };
293+
let ret = f(unsafe { JumpPoint::from_raw(jp) });
294+
data.ret = ManuallyDrop::new(ret);
295+
core::mem::forget(guard);
296+
0
297+
}
298+
299+
let mut data = Data::<T, F> {
300+
func: ManuallyDrop::new(ordinary),
301+
};
302+
let val: usize;
303+
289304
#[cfg(feature = "unstable-asm-goto")]
290305
unsafe {
291-
set_jump_raw!(val, ptr, {
306+
set_jump_raw!(val, wrap::<T, F>, &mut data, {
292307
unsafe { return lander(NonZero::new_unchecked(val)) }
293308
});
294-
ordinary(JumpPoint::from_raw(ptr.cast()))
309+
ManuallyDrop::take(&mut data.ret)
295310
}
296311

297312
#[cfg(not(feature = "unstable-asm-goto"))]
298-
unsafe {
299-
set_jump_raw!(val, ptr);
313+
{
314+
unsafe { set_jump_raw!(val, wrap::<T, F>, &mut data) };
300315
match NonZero::new(val) {
301-
None => ordinary(JumpPoint::from_raw(ptr.cast())),
302-
Some(val) => lander(val),
316+
None => unsafe { ManuallyDrop::take(&mut data.ret) },
317+
Some(v) => lander(v),
303318
}
304319
}
305320
}

0 commit comments

Comments
 (0)