Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ARM] Optimise non-ABI frame pointers #110286

Merged
merged 5 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions llvm/lib/Target/ARM/ARMFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3003,6 +3003,17 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
// on the stack.
CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12));
break;
case ARMSubtarget::NoSplit:
assert(!MF.getTarget().Options.DisableFramePointerElim(MF) &&
"ABI-required frame pointers need a CSR split when signing return "
"address.");
CSI.insert(find_if(CSI,
[=](const auto &CS) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this lambda requires any captures, so the [=] can be replaced [].

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, also removed the capture from the one above.

Register Reg = CS.getReg();
return Reg != ARM::LR;
}),
CalleeSavedInfo(ARM::R12));
break;
default:
llvm_unreachable("Unexpected CSR split with return address signing");
}
Expand Down
30 changes: 15 additions & 15 deletions llvm/lib/Target/ARM/ARMSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,17 +492,16 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
const std::vector<CalleeSavedInfo> CSI =
MF.getFrameInfo().getCalleeSavedInfo();

// Returns SplitR7 if the frame setup must be split into two separate pushes
// of r0-r7,lr and another containing r8-r11 (+r12 if necessary). This is
// always required on Thumb1-only targets, as the push and pop instructions
// can't access the high registers. This is also required when R7 is the frame
// pointer and frame pointer elimiination is disabled, or branch signing is
// enabled and AAPCS is disabled.
if ((MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
!createAAPCSFrameChain()) ||
(getFramePointerReg() == ARM::R7 &&
MF.getTarget().Options.DisableFramePointerElim(MF)) ||
isThumb1Only())
// Thumb1 always splits the pushes at R7, because the Thumb1 push instruction
// cannot use high registers except for lr.
if (isThumb1Only())
return SplitR7;

// If R7 is the frame pointer, we must split at R7 to ensure that the
// previous frame pointer (R7) and return address (LR) are adjacent on the
// stack, to form a valid frame record.
if (getFramePointerReg() == ARM::R7 &&
MF.getTarget().Options.FramePointerIsReserved(MF))
return SplitR7;

// Returns SplitR11WindowsSEH when the stack pointer needs to be
Expand All @@ -515,11 +514,12 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
(MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF)))
return SplitR11WindowsSEH;

// Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each
// other in the list of callee saved registers in a frame, and branch
// signing is enabled.
// Returns SplitR11AAPCSSignRA when the frame pointer is R11, requiring R11
// and LR to be adjacent on the stack, and branch signing is enabled,
// requiring R12 to be on the stack.
if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
getFramePointerReg() == ARM::R11)
getFramePointerReg() == ARM::R11 &&
MF.getTarget().Options.FramePointerIsReserved(MF))
return SplitR11AAPCSSignRA;
return NoSplit;
}
4 changes: 4 additions & 0 deletions llvm/lib/Target/ARM/ARMSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
/// push {r0-r7, lr}
/// push {r8-r12}
/// vpush {d8-d15}
/// Note that Thumb1 changes this layout when the frame pointer is R11,
/// using a longer sequence of instructions because R11 can't be used by a
/// Thumb1 push instruction. This doesn't currently have a separate enum
/// value, and is handled entriely within Thumb1FrameLowering::emitPrologue.
SplitR7,

/// When the stack frame size is not known (because of variable-sized
Expand Down
47 changes: 27 additions & 20 deletions llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,32 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
; RUN: llc --filetype=obj %s -o - | llvm-readelf -u - | FileCheck %s --check-prefix=UNWIND
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-arm-unknown-eabi"

; Check the function starts with `pacbti` and correct unwind info is emitted
define hidden i32 @_Z1fi(i32 %x) "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" "branch-target-enforcement" {
; CHECK-LABEL: _Z1fi:
; CHECK: .cfi_sections .debug_frame
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pacbti r12, lr, sp
; CHECK-NEXT: .save {r7, ra_auth_code, lr}
; CHECK-NEXT: push.w {r7, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: .cfi_offset lr, -4
; CHECK-NEXT: .cfi_offset ra_auth_code, -8
; CHECK-NEXT: .cfi_offset r7, -12
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: adds r0, #1
; CHECK-NEXT: bl _Z1gi
; CHECK-NEXT: subs r0, #1
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r7, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
entry:
%add = add nsw i32 %x, 1
%call = tail call i32 @_Z1gi(i32 %add)
Expand All @@ -13,26 +36,10 @@ entry:

declare dso_local i32 @_Z1gi(i32)

; Check the function starts with `pacbti` and correct unwind info is emitted
; CHECK-LABEL: _Z1fi:
; ...
; CHECK: pacbti r12, lr, sp
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: .cfi_offset lr, -4
; CHECK-NEXT: .cfi_offset r7, -8
; CHECK-NEXT: .save {ra_auth_code}
; CHECK-NEXT: str r12, [sp, #-4]!
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: .cfi_offset ra_auth_code, -12
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .cfi_def_cfa_offset 16
; ...

; UNWIND-LABEL: Opcodes [
; UNWIND-NEXT: 0x00 ; vsp = vsp + 4
; UNWIND-NEXT: 0x80 0x08 ; pop {r7}
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
; UNWIND-NEXT: 0xB0 ; finish
; UNWIND-NEXT: 0x84 0x00 ; pop {lr}


160 changes: 97 additions & 63 deletions llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
Expand All @@ -16,79 +17,112 @@ target triple = "thumbv8m.main-none-none-eabi"
; }

define hidden i32 @f0(i32 %x) local_unnamed_addr "sign-return-address"="non-leaf" {
; CHECK-LABEL: f0:
; CHECK: .cfi_sections .debug_frame
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
; CHECK-NEXT: .save {r7, ra_auth_code, lr}
; CHECK-NEXT: push.w {r7, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: .cfi_offset lr, -4
; CHECK-NEXT: .cfi_offset ra_auth_code, -8
; CHECK-NEXT: .cfi_offset r7, -12
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: subs r0, #1
; CHECK-NEXT: bl g
; CHECK-NEXT: adds r0, #1
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r7, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
entry:
%sub = add nsw i32 %x, -1
%call = tail call i32 @g(i32 %sub)
%add = add nsw i32 %call, 1
ret i32 %add
}

; CHECK-LABEL: f0:
; CHECK: pac r12, lr, sp
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: .cfi_offset lr, -4
; CHECK-NEXT: .cfi_offset r7, -8
; CHECK-NEXT: .save {ra_auth_code}
; CHECK-NEXT: str r12, [sp, #-4]!
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: .cfi_offset ra_auth_code, -12
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; ...
; CHECK: add sp, #4
; CHECK-NEXT: ldr r12, [sp], #4
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr

define hidden i32 @f1(i32 %x) local_unnamed_addr #0 {
; CHECK-LABEL: f1:
; CHECK: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
; CHECK-NEXT: vstr fpcxtns, [sp, #-4]!
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: .save {r7, ra_auth_code, lr}
; CHECK-NEXT: push.w {r7, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset lr, -8
; CHECK-NEXT: .cfi_offset ra_auth_code, -12
; CHECK-NEXT: .cfi_offset r7, -16
; CHECK-NEXT: subs r0, #1
; CHECK-NEXT: bl g
; CHECK-NEXT: adds r0, #1
; CHECK-NEXT: pop.w {r7, r12, lr}
; CHECK-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
; CHECK-NEXT: vldr fpcxtns, [sp], #4
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: clrm {r1, r2, r3, r12, apsr}
; CHECK-NEXT: bxns lr
entry:
%sub = add nsw i32 %x, -1
%call = tail call i32 @g(i32 %sub)
%add = add nsw i32 %call, 1
ret i32 %add
}

; CHECK-LABEL: f1:
; CHECK: pac r12, lr, sp
; CHECK-NEXT: vstr fpcxtns, [sp, #-4]!
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK: vldr fpcxtns, [sp], #4
; CHECK: aut r12, lr, sp

define hidden i32 @f2(i32 %x) local_unnamed_addr #1 {
; CHECK-LABEL: f2:
; CHECK: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
; CHECK-NEXT: .save {r7, ra_auth_code, lr}
; CHECK-NEXT: push.w {r7, r12, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: .cfi_offset lr, -4
; CHECK-NEXT: .cfi_offset ra_auth_code, -8
; CHECK-NEXT: .cfi_offset r7, -12
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: subs r0, #1
; CHECK-NEXT: bl g
; CHECK-NEXT: adds r0, #1
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r7, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: mrs r12, control
; CHECK-NEXT: tst.w r12, #8
; CHECK-NEXT: beq .LBB2_2
; CHECK-NEXT: @ %bb.1: @ %entry
; CHECK-NEXT: vmrs r12, fpscr
; CHECK-NEXT: vmov d0, lr, lr
; CHECK-NEXT: vmov d1, lr, lr
; CHECK-NEXT: vmov d2, lr, lr
; CHECK-NEXT: vmov d3, lr, lr
; CHECK-NEXT: vmov d4, lr, lr
; CHECK-NEXT: vmov d5, lr, lr
; CHECK-NEXT: vmov d6, lr, lr
; CHECK-NEXT: vmov d7, lr, lr
; CHECK-NEXT: bic r12, r12, #159
; CHECK-NEXT: bic r12, r12, #4026531840
; CHECK-NEXT: vmsr fpscr, r12
; CHECK-NEXT: .LBB2_2: @ %entry
; CHECK-NEXT: mov r1, lr
; CHECK-NEXT: mov r2, lr
; CHECK-NEXT: mov r3, lr
; CHECK-NEXT: mov r12, lr
; CHECK-NEXT: msr apsr_nzcvq, lr
; CHECK-NEXT: bxns lr
entry:
%sub = add nsw i32 %x, -1
%call = tail call i32 @g(i32 %sub)
%add = add nsw i32 %call, 1
ret i32 %add
}
; CHECK-LABEL: f2:
; CHECK: pac r12, lr, sp
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: .cfi_offset lr, -4
; CHECK-NEXT: .cfi_offset r7, -8
; CHECK-NEXT: .save {ra_auth_code}
; CHECK-NEXT: str r12, [sp, #-4]!
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: .cfi_offset ra_auth_code, -12
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .cfi_def_cfa_offset 16
; ...
; CHECK: add sp, #4
; CHECK-NEXT: ldr r12, [sp], #4
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: mrs r12, control
; ...
; CHECK: bxns lr

declare dso_local i32 @g(i32) local_unnamed_addr

Expand All @@ -103,22 +137,22 @@ attributes #1 = { "sign-return-address"="non-leaf" "cmse_nonsecure_entry" "targe

; UNWIND-LABEL: FunctionAddress: 0x0
; UNWIND: 0x00 ; vsp = vsp + 4
; UNWIND-NEXT: 0x80 0x08 ; pop {r7}
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
; UNWIND-NEXT: 0xB0 ; finish
; UNWIND-NEXT: 0xB0 ; finish
; UNWIND-NEXT: 0x84 0x00 ; pop {lr}

; UNWIND-LABEL: FunctionAddress: 0x24
; UNWIND: 0xB4 ; pop ra_auth_code
; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}

; UNWIND-LABEL: FunctionAddress: 0x54
; UNWIND-LABEL: FunctionAddress: 0x1E
; UNWIND: 0x80 0x08 ; pop {r7}
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
; UNWIND-NEXT: 0x84 0x00 ; pop {lr}

; UNWIND-LABEL: FunctionAddress: 0x48
; UNWIND: 0x00 ; vsp = vsp + 4
; UNWIND-NEXT: 0x80 0x08 ; pop {r7}
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
; UNWIND-NEXT: 0xB0 ; finish
; UNWIND-NEXT: 0xB0 ; finish
; UNWIND-NEXT: 0x84 0x00 ; pop {lr}

; UNWIND-LABEL: 00000001 {{.*}} f0
; UNWIND-LABEL: 00000025 {{.*}} f1
; UNWIND-LABEL: 00000055 {{.*}} f2
; UNWIND-LABEL: 0000001f {{.*}} f1
; UNWIND-LABEL: 00000049 {{.*}} f2
Loading
Loading