Skip to content

Commit

Permalink
[ARM] Optimise non-ABI frame pointers
Browse files Browse the repository at this point in the history
With -fomit-frame-pointer, even if we set up a frame pointer for other
reasons (e.g. variable-sized or over-aligned stack allocations), we
don't need to create an ABI-compliant frame record. This means that we
can save all of the general-purpose registers in one push, instead of
splitting it to ensure that the frame pointer and link register are
adjacent on the stack, saving two instructions per function.
  • Loading branch information
ostannard committed Sep 27, 2024
1 parent 9596ae7 commit ba14908
Show file tree
Hide file tree
Showing 14 changed files with 523 additions and 333 deletions.
11 changes: 11 additions & 0 deletions llvm/lib/Target/ARM/ARMFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2964,6 +2964,17 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
// on the stack.
CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12));
break;
case ARMSubtarget::NoSplit:
assert(!MF.getTarget().Options.DisableFramePointerElim(MF) &&
"ABI-required frame pointers need a CSR split when signing return "
"address.");
CSI.insert(find_if(CSI,
[=](const auto &CS) {
Register Reg = CS.getReg();
return Reg != ARM::LR;
}),
CalleeSavedInfo(ARM::R12));
break;
default:
llvm_unreachable("Unexpected CSR split with return address signing");
}
Expand Down
30 changes: 15 additions & 15 deletions llvm/lib/Target/ARM/ARMSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,17 +492,16 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
const std::vector<CalleeSavedInfo> CSI =
MF.getFrameInfo().getCalleeSavedInfo();

// Returns SplitR7 if the frame setup must be split into two separate pushes
// of r0-r7,lr and another containing r8-r11 (+r12 if necessary). This is
// always required on Thumb1-only targets, as the push and pop instructions
// can't access the high registers. This is also required when R7 is the frame
// pointer and frame pointer elimiination is disabled, or branch signing is
// enabled and AAPCS is disabled.
if ((MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
!createAAPCSFrameChain()) ||
(getFramePointerReg() == ARM::R7 &&
MF.getTarget().Options.DisableFramePointerElim(MF)) ||
isThumb1Only())
// Thumb1 always splits the pushes at R7, because the Thumb1 push instruction
// cannot use high registers except for lr.
if (isThumb1Only())
return SplitR7;

// If R7 is the frame pointer, we must split at R7 to ensure that the
// previous frame pointer (R7) and return address (LR) are adjacent on the
// stack, to form a valid frame record.
if (getFramePointerReg() == ARM::R7 &&
MF.getTarget().Options.DisableFramePointerElim(MF))
return SplitR7;

// Returns SplitR11WindowsSEH when the stack pointer needs to be
Expand All @@ -515,11 +514,12 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
(MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF)))
return SplitR11WindowsSEH;

// Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each
// other in the list of callee saved registers in a frame, and branch
// signing is enabled.
// Returns SplitR11AAPCSSignRA when the frame pointer is R11, requiring R11
// and LR to be adjacent on the stack, and branch signing is enabled,
// requiring R12 to be on the stack.
if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
getFramePointerReg() == ARM::R11)
getFramePointerReg() == ARM::R11 &&
MF.getTarget().Options.DisableFramePointerElim(MF))
return SplitR11AAPCSSignRA;
return NoSplit;
}
4 changes: 4 additions & 0 deletions llvm/lib/Target/ARM/ARMSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
/// push {r0-r7, lr}
/// push {r8-r12}
/// vpush {d8-d15}
/// Note that Thumb1 changes this layout when the frame pointer is R11,
/// using a longer sequence of instructions because R11 can't be used by a
/// Thumb1 push instruction. This doesn't currently have a separate enum
/// value, and is handled entriely within Thumb1FrameLowering::emitPrologue.
SplitR7,

/// When the stack frame size if now known (because of variable-sized
Expand Down
22 changes: 10 additions & 12 deletions llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,20 @@ define hidden i32 @_Z1fi(i32 %x) "sign-return-address"="non-leaf" "sign-return-a
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pacbti r12, lr, sp
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: .cfi_offset lr, -4
; CHECK-NEXT: .cfi_offset r7, -8
; CHECK-NEXT: .save {ra_auth_code}
; CHECK-NEXT: str r12, [sp, #-4]!
; CHECK-NEXT: .save {r7, ra_auth_code, lr}
; CHECK-NEXT: push.w {r7, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: .cfi_offset ra_auth_code, -12
; CHECK-NEXT: .cfi_offset lr, -4
; CHECK-NEXT: .cfi_offset ra_auth_code, -8
; CHECK-NEXT: .cfi_offset r7, -12
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: adds r0, #1
; CHECK-NEXT: bl _Z1gi
; CHECK-NEXT: subs r0, #1
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: ldr r12, [sp], #4
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: pop.w {r7, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
entry:
Expand All @@ -42,6 +38,8 @@ declare dso_local i32 @_Z1gi(i32)

; UNWIND-LABEL: Opcodes [
; UNWIND-NEXT: 0x00 ; vsp = vsp + 4
; UNWIND-NEXT: 0x80 0x08 ; pop {r7}
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
; UNWIND-NEXT: 0xB0 ; finish
; UNWIND-NEXT: 0x84 0x00 ; pop {lr}


72 changes: 30 additions & 42 deletions llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,20 @@ define hidden i32 @f0(i32 %x) local_unnamed_addr "sign-return-address"="non-leaf
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: .cfi_offset lr, -4
; CHECK-NEXT: .cfi_offset r7, -8
; CHECK-NEXT: .save {ra_auth_code}
; CHECK-NEXT: str r12, [sp, #-4]!
; CHECK-NEXT: .save {r7, ra_auth_code, lr}
; CHECK-NEXT: push.w {r7, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: .cfi_offset ra_auth_code, -12
; CHECK-NEXT: .cfi_offset lr, -4
; CHECK-NEXT: .cfi_offset ra_auth_code, -8
; CHECK-NEXT: .cfi_offset r7, -12
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: subs r0, #1
; CHECK-NEXT: bl g
; CHECK-NEXT: adds r0, #1
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: ldr r12, [sp], #4
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: pop.w {r7, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
entry:
Expand All @@ -56,20 +52,16 @@ define hidden i32 @f1(i32 %x) local_unnamed_addr #0 {
; CHECK-NEXT: pac r12, lr, sp
; CHECK-NEXT: vstr fpcxtns, [sp, #-4]!
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: .cfi_offset lr, -8
; CHECK-NEXT: .cfi_offset r7, -12
; CHECK-NEXT: .save {ra_auth_code}
; CHECK-NEXT: str r12, [sp, #-4]!
; CHECK-NEXT: .save {r7, ra_auth_code, lr}
; CHECK-NEXT: push.w {r7, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset ra_auth_code, -16
; CHECK-NEXT: .cfi_offset lr, -8
; CHECK-NEXT: .cfi_offset ra_auth_code, -12
; CHECK-NEXT: .cfi_offset r7, -16
; CHECK-NEXT: subs r0, #1
; CHECK-NEXT: bl g
; CHECK-NEXT: adds r0, #1
; CHECK-NEXT: ldr r12, [sp], #4
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: pop.w {r7, r12, lr}
; CHECK-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
; CHECK-NEXT: vldr fpcxtns, [sp], #4
; CHECK-NEXT: aut r12, lr, sp
Expand All @@ -87,24 +79,20 @@ define hidden i32 @f2(i32 %x) local_unnamed_addr #1 {
; CHECK: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: .cfi_offset lr, -4
; CHECK-NEXT: .cfi_offset r7, -8
; CHECK-NEXT: .save {ra_auth_code}
; CHECK-NEXT: str r12, [sp, #-4]!
; CHECK-NEXT: .save {r7, ra_auth_code, lr}
; CHECK-NEXT: push.w {r7, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: .cfi_offset ra_auth_code, -12
; CHECK-NEXT: .cfi_offset lr, -4
; CHECK-NEXT: .cfi_offset ra_auth_code, -8
; CHECK-NEXT: .cfi_offset r7, -12
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: subs r0, #1
; CHECK-NEXT: bl g
; CHECK-NEXT: adds r0, #1
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: ldr r12, [sp], #4
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: pop.w {r7, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: mrs r12, control
; CHECK-NEXT: tst.w r12, #8
Expand Down Expand Up @@ -149,22 +137,22 @@ attributes #1 = { "sign-return-address"="non-leaf" "cmse_nonsecure_entry" "targe

; UNWIND-LABEL: FunctionAddress: 0x0
; UNWIND: 0x00 ; vsp = vsp + 4
; UNWIND-NEXT: 0x80 0x08 ; pop {r7}
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
; UNWIND-NEXT: 0xB0 ; finish
; UNWIND-NEXT: 0xB0 ; finish
; UNWIND-NEXT: 0x84 0x00 ; pop {lr}


; UNWIND-LABEL: FunctionAddress: 0x24
; UNWIND: 0xB4 ; pop ra_auth_code
; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
; UNWIND-LABEL: FunctionAddress: 0x1E
; UNWIND: 0x80 0x08 ; pop {r7}
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
; UNWIND-NEXT: 0x84 0x00 ; pop {lr}

; UNWIND-LABEL: FunctionAddress: 0x54
; UNWIND-LABEL: FunctionAddress: 0x48
; UNWIND: 0x00 ; vsp = vsp + 4
; UNWIND-NEXT: 0x80 0x08 ; pop {r7}
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
; UNWIND-NEXT: 0xB0 ; finish
; UNWIND-NEXT: 0xB0 ; finish
; UNWIND-NEXT: 0x84 0x00 ; pop {lr}

; UNWIND-LABEL: 00000001 {{.*}} f0
; UNWIND-LABEL: 00000025 {{.*}} f1
; UNWIND-LABEL: 00000055 {{.*}} f2
; UNWIND-LABEL: 0000001f {{.*}} f1
; UNWIND-LABEL: 00000049 {{.*}} f2
Loading

0 comments on commit ba14908

Please sign in to comment.