Skip to content

Commit eb5ecbb

Browse files
committed
[llvm][AArch64] Insert "bti j" after call to setjmp
Some implementations of setjmp will end with a br instead of a ret. This means that the next instruction after a call to setjmp must be a "bti j" (j for jump) to make this work when branch target identification is enabled. The BTI extension was added in armv8.5-a but the bti instruction is in the hint space. This means we can emit it for any architecture version as long as branch target enforcement flags are passed. The starting point for the hint number is 32 then call adds 2, jump adds 4. Hence "hint #36" for a "bti j" (and "hint #34" for the "bti c" you see at the start of functions). The existing Arm command line option -mno-bti-at-return-twice has been applied to AArch64 as well. Support is added to SelectionDAG Isel and GlobalIsel. FastIsel will defer to SelectionDAG. Based on the change done for M profile Arm in https://reviews.llvm.org/D112427 Fixes #48888 Reviewed By: danielkiss Differential Revision: https://reviews.llvm.org/D121707
1 parent a451a29 commit eb5ecbb

14 files changed

+276
-5
lines changed

clang/docs/ClangCommandLineReference.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -3329,7 +3329,7 @@ Work around VLLDM erratum CVE-2021-35465 (ARM only)
33293329

33303330
.. option:: -mno-bti-at-return-twice
33313331

3332-
Do not add a BTI instruction after a setjmp or other return-twice construct (Arm only)
3332+
Do not add a BTI instruction after a setjmp or other return-twice construct (AArch32/AArch64 only)
33333333

33343334
.. option:: -mno-movt
33353335

clang/docs/ReleaseNotes.rst

+5
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,11 @@ DWARF Support in Clang
193193
Arm and AArch64 Support in Clang
194194
--------------------------------
195195

196+
- When using ``-mbranch-protection=bti`` with AArch64, calls to setjmp will
197+
now be followed by a BTI instruction. This is done to be compatible with
198+
setjmp implementations that return with a br instead of a ret. You can
199+
disable this behaviour using the ``-mno-bti-at-return-twice`` option.
200+
196201
Floating Point Support in Clang
197202
-------------------------------
198203

clang/include/clang/Driver/Options.td

+1-1
Original file line numberDiff line numberDiff line change
@@ -3414,7 +3414,7 @@ def mmark_bti_property : Flag<["-"], "mmark-bti-property">,
34143414
def mno_bti_at_return_twice : Flag<["-"], "mno-bti-at-return-twice">,
34153415
Group<m_arm_Features_Group>,
34163416
HelpText<"Do not add a BTI instruction after a setjmp or other"
3417-
" return-twice construct (Arm only)">;
3417+
" return-twice construct (Arm/AArch64 only)">;
34183418

34193419
foreach i = {1-31} in
34203420
def ffixed_x#i : Flag<["-"], "ffixed-x"#i>, Group<m_Group>,

clang/lib/Driver/ToolChains/Arch/AArch64.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -588,4 +588,7 @@ void aarch64::getAArch64TargetFeatures(const Driver &D,
588588
// Enabled A53 errata (835769) workaround by default on android
589589
Features.push_back("+fix-cortex-a53-835769");
590590
}
591+
592+
if (Args.getLastArg(options::OPT_mno_bti_at_return_twice))
593+
Features.push_back("+no-bti-at-return-twice");
591594
}

llvm/lib/Target/AArch64/AArch64.td

+5
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,11 @@ def FeatureEL3 : SubtargetFeature<"el3", "HasEL3", "true",
466466
def FeatureFixCortexA53_835769 : SubtargetFeature<"fix-cortex-a53-835769",
467467
"FixCortexA53_835769", "true", "Mitigate Cortex-A53 Erratum 835769">;
468468

469+
def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice",
470+
"NoBTIAtReturnTwice", "true",
471+
"Don't place a BTI instruction "
472+
"after a return-twice">;
473+
469474
//===----------------------------------------------------------------------===//
470475
// Architectures.
471476
//

llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp

+34
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ class AArch64ExpandPseudo : public MachineFunctionPass {
8686
unsigned N);
8787
bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
8888
MachineBasicBlock::iterator MBBI);
89+
bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
8990
bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
9091
MachineBasicBlock::iterator MBBI);
9192
};
@@ -759,6 +760,37 @@ bool AArch64ExpandPseudo::expandCALL_RVMARKER(
759760
return true;
760761
}
761762

763+
bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
764+
MachineBasicBlock::iterator MBBI) {
765+
// Expand CALL_BTI pseudo to:
766+
// - a branch to the call target
767+
// - a BTI instruction
768+
// Mark the sequence as a bundle, to avoid passes moving other code in
769+
// between.
770+
771+
MachineInstr &MI = *MBBI;
772+
MachineOperand &CallTarget = MI.getOperand(0);
773+
assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
774+
"invalid operand for regular call");
775+
unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
776+
MachineInstr *Call =
777+
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
778+
Call->addOperand(CallTarget);
779+
780+
MachineInstr *BTI =
781+
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
782+
// BTI J so that setjmp can to BR to this.
783+
.addImm(36)
784+
.getInstr();
785+
786+
if (MI.shouldUpdateCallSiteInfo())
787+
MBB.getParent()->moveCallSiteInfo(&MI, Call);
788+
789+
MI.eraseFromParent();
790+
finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));
791+
return true;
792+
}
793+
762794
bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
763795
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
764796
Register CtxReg = MBBI->getOperand(0).getReg();
@@ -1238,6 +1270,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
12381270
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
12391271
case AArch64::BLR_RVMARKER:
12401272
return expandCALL_RVMARKER(MBB, MBBI);
1273+
case AArch64::BLR_BTI:
1274+
return expandCALL_BTI(MBB, MBBI);
12411275
case AArch64::StoreSwiftAsyncContext:
12421276
return expandStoreSwiftAsyncContext(MBB, MBBI);
12431277
}

llvm/lib/Target/AArch64/AArch64FastISel.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "AArch64.h"
1616
#include "AArch64CallingConvention.h"
17+
#include "AArch64MachineFunctionInfo.h"
1718
#include "AArch64RegisterInfo.h"
1819
#include "AArch64Subtarget.h"
1920
#include "MCTargetDesc/AArch64AddressingModes.h"
@@ -3127,6 +3128,13 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
31273128
if (!Callee && !Symbol)
31283129
return false;
31293130

3131+
// Allow SelectionDAG isel to handle calls to functions like setjmp that need
3132+
// a bti instruction following the call.
3133+
if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3134+
!Subtarget->noBTIAtReturnTwice() &&
3135+
MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3136+
return false;
3137+
31303138
// Allow SelectionDAG isel to handle tail calls.
31313139
if (IsTailCall)
31323140
return false;

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+9-1
Original file line numberDiff line numberDiff line change
@@ -2274,6 +2274,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
22742274
MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING)
22752275
MAKE_CASE(AArch64ISD::MOPS_MEMCOPY)
22762276
MAKE_CASE(AArch64ISD::MOPS_MEMMOVE)
2277+
MAKE_CASE(AArch64ISD::CALL_BTI)
22772278
}
22782279
#undef MAKE_CASE
22792280
return nullptr;
@@ -6178,6 +6179,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
61786179
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
61796180
bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
61806181
bool IsSibCall = false;
6182+
bool GuardWithBTI = false;
6183+
6184+
if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
6185+
!Subtarget->noBTIAtReturnTwice()) {
6186+
GuardWithBTI = FuncInfo->branchTargetEnforcement();
6187+
}
61816188

61826189
// Check callee args/returns for SVE registers and set calling convention
61836190
// accordingly.
@@ -6612,7 +6619,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
66126619
Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
66136620
auto GA = DAG.getTargetGlobalAddress(ARCFn, DL, PtrVT);
66146621
Ops.insert(Ops.begin() + 1, GA);
6615-
}
6622+
} else if (GuardWithBTI)
6623+
CallOpc = AArch64ISD::CALL_BTI;
66166624

66176625
// Returns a chain and a flag for retval copy to use.
66186626
Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);

llvm/lib/Target/AArch64/AArch64ISelLowering.h

+2
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ enum NodeType : unsigned {
5555
// x29, x29` marker instruction.
5656
CALL_RVMARKER,
5757

58+
CALL_BTI, // Function call followed by a BTI instruction.
59+
5860
// Produces the full sequence of instructions for getting the thread pointer
5961
// offset of a variable into X0, using the TLSDesc model.
6062
TLSDESC_CALLSEQ,

llvm/lib/Target/AArch64/AArch64InstrInfo.td

+10
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,11 @@ def AArch64call : SDNode<"AArch64ISD::CALL",
473473
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
474474
SDNPVariadic]>;
475475

476+
def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI",
477+
SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
478+
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
479+
SDNPVariadic]>;
480+
476481
def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
477482
SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
478483
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
@@ -2328,6 +2333,8 @@ let isCall = 1, Defs = [LR], Uses = [SP] in {
23282333
PseudoInstExpansion<(BLR GPR64:$Rn)>;
23292334
def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>,
23302335
Sched<[WriteBrReg]>;
2336+
def BLR_BTI : Pseudo<(outs), (ins GPR64:$Rn), []>,
2337+
Sched<[WriteBrReg]>;
23312338
} // isCall
23322339

23332340
def : Pat<(AArch64call GPR64:$Rn),
@@ -2341,6 +2348,9 @@ def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn),
23412348
(BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>,
23422349
Requires<[NoSLSBLRMitigation]>;
23432350

2351+
def : Pat<(AArch64call_bti GPR64:$Rn),
2352+
(BLR_BTI GPR64:$Rn)>;
2353+
23442354
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
23452355
def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
23462356
} // isBranch, isTerminator, isBarrier, isIndirectBranch

llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp

+9-2
Original file line numberDiff line numberDiff line change
@@ -1129,12 +1129,20 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
11291129
// Create a temporarily-floating call instruction so we can add the implicit
11301130
// uses of arg registers.
11311131

1132+
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1133+
unsigned Opc = 0;
11321134
// Calls with operand bundle "clang.arc.attachedcall" are special. They should
11331135
// be expanded to the call, directly followed by a special marker sequence and
11341136
// a call to an ObjC library function.
1135-
unsigned Opc = 0;
11361137
if (Info.CB && objcarc::hasAttachedCallOpBundle(Info.CB))
11371138
Opc = AArch64::BLR_RVMARKER;
1139+
// A call to a returns twice function like setjmp must be followed by a bti
1140+
// instruction.
1141+
else if (Info.CB &&
1142+
Info.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
1143+
!Subtarget.noBTIAtReturnTwice() &&
1144+
MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
1145+
Opc = AArch64::BLR_BTI;
11381146
else
11391147
Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
11401148

@@ -1153,7 +1161,6 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
11531161

11541162
// Tell the call which registers are clobbered.
11551163
const uint32_t *Mask;
1156-
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
11571164
const auto *TRI = Subtarget.getRegisterInfo();
11581165

11591166
AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefix=NOBTI
2+
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel < %s | FileCheck %s --check-prefix=NOBTI
3+
; RUN: llc -mtriple=aarch64-none-linux-gnu -fast-isel < %s | FileCheck %s --check-prefix=NOBTI
4+
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+no-bti-at-return-twice < %s | \
5+
; RUN: FileCheck %s --check-prefix=NOBTI
6+
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -mattr=+no-bti-at-return-twice < %s | \
7+
; RUN: FileCheck %s --check-prefix=NOBTI
8+
; RUN: llc -mtriple=aarch64-none-linux-gnu -fast-isel -mattr=+no-bti-at-return-twice < %s | \
9+
; RUN: FileCheck %s --check-prefix=NOBTI
10+
11+
; Same as setjmp-bti.ll except that we do not enable branch target enforcement for this
12+
; module. There should be no combination of options that leads to a bti being emitted.
13+
14+
; C source
15+
; --------
16+
; extern int setjmp(void*);
17+
; extern void notsetjmp(void);
18+
;
19+
; void bbb(void) {
20+
; setjmp(0);
21+
; int (*fnptr)(void*) = setjmp;
22+
; fnptr(0);
23+
; notsetjmp();
24+
; }
25+
26+
define void @bbb() {
27+
; NOBTI-LABEL: bbb:
28+
; NOBTI: bl setjmp
29+
; NOBTI-NOT: hint #36
30+
; NOBTI: blr x{{[0-9]+}}
31+
; NOBTI-NOT: hint #36
32+
; NOBTI: bl notsetjmp
33+
; NOBTI-NOT: hint #36
34+
35+
entry:
36+
%fnptr = alloca i32 (i8*)*, align 8
37+
%call = call i32 @setjmp(i8* noundef null) #0
38+
store i32 (i8*)* @setjmp, i32 (i8*)** %fnptr, align 8
39+
%0 = load i32 (i8*)*, i32 (i8*)** %fnptr, align 8
40+
%call1 = call i32 %0(i8* noundef null) #0
41+
call void @notsetjmp()
42+
ret void
43+
}
44+
45+
declare i32 @setjmp(i8* noundef) #0
46+
declare void @notsetjmp()
47+
48+
attributes #0 = { returns_twice }
49+
50+
; !llvm.module.flags = !{!0}
51+
; !0 = !{i32 1, !"branch-target-enforcement", i32 1}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
; RUN: llc -mtriple=aarch64-none-linux-gnu -enable-machine-outliner < %s | FileCheck %s --check-prefix=BTI
2+
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -enable-machine-outliner < %s | \
3+
; RUN: FileCheck %s --check-prefix=BTI
4+
; RUN: llc -mtriple=aarch64-none-linux-gnu -fast-isel -enable-machine-outliner < %s | \
5+
; RUN: FileCheck %s --check-prefix=BTI
6+
; RUN: llc -mtriple=aarch64-none-linux-gnu -enable-machine-outliner -mattr=+no-bti-at-return-twice < %s | \
7+
; RUN: FileCheck %s --check-prefix=NOBTI
8+
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -enable-machine-outliner -mattr=+no-bti-at-return-twice < %s | \
9+
; RUN: FileCheck %s --check-prefix=NOBTI
10+
; RUN: llc -mtriple=aarch64-none-linux-gnu -fast-isel -enable-machine-outliner -mattr=+no-bti-at-return-twice < %s | \
11+
; RUN: FileCheck %s --check-prefix=NOBTI
12+
13+
; Check that the outliner does not split up the call to setjmp and the bti after it.
14+
; When we do not insert a bti, it is allowed to move the setjmp call into an outlined function.
15+
16+
; C source
17+
; --------
18+
; extern int setjmp(void*);
19+
;
20+
; int f(int a, int b, int c, int d) {
21+
; setjmp(0);
22+
; return 1 + a * (a + b) / (c + d);
23+
; }
24+
;
25+
; int g(int a, int b, int c, int d) {
26+
; setjmp(0);
27+
; return 2 + a * (a + b) / (c + d);
28+
; }
29+
30+
define i32 @f(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) {
31+
; BTI-LABEL: f:
32+
; BTI: bl OUTLINED_FUNCTION_1
33+
; BTI-NEXT: bl setjmp
34+
; BTI-NEXT: hint #36
35+
; BTI-NEXT: bl OUTLINED_FUNCTION_0
36+
37+
; NOBTI: f:
38+
; NOBTI: bl OUTLINED_FUNCTION_0
39+
; NOBTI-NEXT: bl OUTLINED_FUNCTION_1
40+
41+
entry:
42+
%call = call i32 @setjmp(i8* noundef null) #0
43+
%add = add nsw i32 %b, %a
44+
%mul = mul nsw i32 %add, %a
45+
%add1 = add nsw i32 %d, %c
46+
%div = sdiv i32 %mul, %add1
47+
%add2 = add nsw i32 %div, 1
48+
ret i32 %add2
49+
}
50+
51+
declare i32 @setjmp(i8* noundef) #0
52+
53+
define i32 @g(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) {
54+
; BTI-LABEL: g:
55+
; BTI: bl OUTLINED_FUNCTION_1
56+
; BTI-NEXT: bl setjmp
57+
; BTI-NEXT: hint #36
58+
; BTI-NEXT: bl OUTLINED_FUNCTION_0
59+
60+
; NOBTI: g:
61+
; NOBTI: bl OUTLINED_FUNCTION_0
62+
; NOBTI-NEXT: bl OUTLINED_FUNCTION_1
63+
64+
entry:
65+
%call = call i32 @setjmp(i8* noundef null) #0
66+
%add = add nsw i32 %b, %a
67+
%mul = mul nsw i32 %add, %a
68+
%add1 = add nsw i32 %d, %c
69+
%div = sdiv i32 %mul, %add1
70+
%add2 = add nsw i32 %div, 2
71+
ret i32 %add2
72+
}
73+
74+
; NOBTI-LABEL: OUTLINED_FUNCTION_0:
75+
; NOBTI: b setjmp
76+
; NOBTI: OUTLINED_FUNCTION_1:
77+
; NOBTI-LABEL: ret
78+
79+
attributes #0 = { returns_twice }
80+
81+
!llvm.module.flags = !{!0}
82+
83+
!0 = !{i32 1, !"branch-target-enforcement", i32 1}

0 commit comments

Comments
 (0)