Skip to content

Commit cc944f5

Browse files
[AArch64] Stack probing for function prologues (#66524)
This adds code to AArch64 function prologues to protect against stack clash attacks by probing (writing to) the stack at regular enough intervals to ensure that the guard page cannot be skipped over. The patch depends on and maintains the following invariants: Upon function entry the caller guarantees that it has probed the stack (e.g. performed a store) at some address [sp, #N], where`0 <= N <= 1024`. This invariant comes from a requirement for compatibility with GCC. Any address range in the allocated stack, no smaller than stack-probe-size bytes contains at least one probe At any time the stack pointer is above or in the guard page Probes are performed in descreasing address order The stack-probe-size is a function attribute that can be set by a platform to correspond to the guard page size. By default, the stack probe size is 4KiB, which is a safe default as this is the smallest possible page size for AArch64. Linux uses a 64KiB guard for AArch64, so this can be overridden by the stack-probe-size function attribute. For small frames without a frame pointer (<= 240 bytes), no probes are needed. For larger frame sizes, LLVM always stores x29 to the stack. This serves as an implicit stack probe. Thus, while allocating stack objects the compiler assumes that the stack has been probed at [sp]. There are multiple probing sequences that can be emitted, depending on the size of the stack allocation: A straight-line sequence of subtracts and stores, used when the allocation size is smaller than 5 guard pages. A loop allocating and probing one page size per iteration, plus at most a single probe to deal with the remainder, used when the allocation size is larger but still known at compile time. A loop which moves the SP down to the target value held in a register (or a loop, moving a scratch register to the target value help in SP), used when the allocation size is not known at compile-time, such as when allocating space for SVE values, or when over-aligning the stack. This is emitted in AArch64InstrInfo because it will also be used for dynamic allocas in a future patch. A single probe where the amount of stack adjustment is unknown, but is known to be less than or equal to a page size. --------- Co-authored-by: Oliver Stannard <oliver.stannard@linaro.org>
1 parent 52be47b commit cc944f5

13 files changed

+2302
-40
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

+307-29
Large diffs are not rendered by default.

llvm/lib/Target/AArch64/AArch64FrameLowering.h

+15-2
Original file line numberDiff line numberDiff line change
@@ -152,13 +152,26 @@ class AArch64FrameLowering : public TargetFrameLowering {
152152
MachineBasicBlock::iterator MBBI) const;
153153
void allocateStackSpace(MachineBasicBlock &MBB,
154154
MachineBasicBlock::iterator MBBI,
155-
bool NeedsRealignment, StackOffset AllocSize,
155+
int64_t RealignmentPadding, StackOffset AllocSize,
156156
bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
157-
StackOffset InitialOffset) const;
157+
StackOffset InitialOffset, bool FollowupAllocs) const;
158158

159159
/// Emit target zero call-used regs.
160160
void emitZeroCallUsedRegs(BitVector RegsToZero,
161161
MachineBasicBlock &MBB) const override;
162+
163+
/// Replace a StackProbe stub (if any) with the actual probe code inline
164+
void inlineStackProbe(MachineFunction &MF,
165+
MachineBasicBlock &PrologueMBB) const override;
166+
167+
void inlineStackProbeFixed(MachineBasicBlock::iterator MBBI,
168+
Register ScratchReg, int64_t FrameSize,
169+
StackOffset CFAOffset) const;
170+
171+
MachineBasicBlock::iterator
172+
inlineStackProbeLoopExactMultiple(MachineBasicBlock::iterator MBBI,
173+
int64_t NegProbeSize,
174+
Register TargetReg) const;
162175
};
163176

164177
} // End llvm namespace

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -26815,3 +26815,9 @@ unsigned AArch64TargetLowering::getVectorTypeBreakdownForCallingConv(
2681526815

2681626816
return NumRegs;
2681726817
}
26818+
26819+
bool AArch64TargetLowering::hasInlineStackProbe(
26820+
const MachineFunction &MF) const {
26821+
return !Subtarget->isTargetWindows() &&
26822+
MF.getInfo<AArch64FunctionInfo>()->hasStackProbing();
26823+
}

llvm/lib/Target/AArch64/AArch64ISelLowering.h

+10
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,13 @@ const unsigned RoundingBitsPos = 22;
514514
ArrayRef<MCPhysReg> getGPRArgRegs();
515515
ArrayRef<MCPhysReg> getFPRArgRegs();
516516

517+
/// Maximum allowed number of unprobed bytes above SP at an ABI
518+
/// boundary.
519+
const unsigned StackProbeMaxUnprobedStack = 1024;
520+
521+
/// Maximum number of iterations to unroll for a constant size probing loop.
522+
const unsigned StackProbeMaxLoopUnroll = 4;
523+
517524
} // namespace AArch64
518525

519526
class AArch64Subtarget;
@@ -966,6 +973,9 @@ class AArch64TargetLowering : public TargetLowering {
966973
unsigned &NumIntermediates,
967974
MVT &RegisterVT) const override;
968975

976+
/// True if stack clash protection is enabled for this functions.
977+
bool hasInlineStackProbe(const MachineFunction &MF) const override;
978+
969979
private:
970980
/// Keep a pointer to the AArch64Subtarget around so that we can
971981
/// make the right decision when generating code for different targets.

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

+90-1
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13-
#include "AArch64ExpandImm.h"
1413
#include "AArch64InstrInfo.h"
14+
#include "AArch64ExpandImm.h"
1515
#include "AArch64FrameLowering.h"
1616
#include "AArch64MachineFunctionInfo.h"
1717
#include "AArch64PointerAuth.h"
@@ -21,6 +21,7 @@
2121
#include "llvm/ADT/ArrayRef.h"
2222
#include "llvm/ADT/STLExtras.h"
2323
#include "llvm/ADT/SmallVector.h"
24+
#include "llvm/CodeGen/LivePhysRegs.h"
2425
#include "llvm/CodeGen/MachineBasicBlock.h"
2526
#include "llvm/CodeGen/MachineCombinerPattern.h"
2627
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -9467,6 +9468,94 @@ bool AArch64InstrInfo::isReallyTriviallyReMaterializable(
94679468
return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
94689469
}
94699470

9471+
MachineBasicBlock::iterator
9472+
AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
9473+
Register TargetReg, bool FrameSetup) const {
9474+
assert(TargetReg != AArch64::SP && "New top of stack cannot aleady be in SP");
9475+
9476+
MachineBasicBlock &MBB = *MBBI->getParent();
9477+
MachineFunction &MF = *MBB.getParent();
9478+
const AArch64InstrInfo *TII =
9479+
MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
9480+
int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
9481+
DebugLoc DL = MBB.findDebugLoc(MBBI);
9482+
9483+
MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
9484+
MachineBasicBlock *LoopTestMBB =
9485+
MF.CreateMachineBasicBlock(MBB.getBasicBlock());
9486+
MF.insert(MBBInsertPoint, LoopTestMBB);
9487+
MachineBasicBlock *LoopBodyMBB =
9488+
MF.CreateMachineBasicBlock(MBB.getBasicBlock());
9489+
MF.insert(MBBInsertPoint, LoopBodyMBB);
9490+
MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
9491+
MF.insert(MBBInsertPoint, ExitMBB);
9492+
MachineInstr::MIFlag Flags =
9493+
FrameSetup ? MachineInstr::FrameSetup : MachineInstr::NoFlags;
9494+
9495+
// LoopTest:
9496+
// SUB SP, SP, #ProbeSize
9497+
emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
9498+
AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
9499+
9500+
// CMP SP, TargetReg
9501+
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
9502+
AArch64::XZR)
9503+
.addReg(AArch64::SP)
9504+
.addReg(TargetReg)
9505+
.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
9506+
.setMIFlags(Flags);
9507+
9508+
// B.<Cond> LoopExit
9509+
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
9510+
.addImm(AArch64CC::LE)
9511+
.addMBB(ExitMBB)
9512+
.setMIFlags(Flags);
9513+
9514+
// STR XZR, [SP]
9515+
BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
9516+
.addReg(AArch64::XZR)
9517+
.addReg(AArch64::SP)
9518+
.addImm(0)
9519+
.setMIFlags(Flags);
9520+
9521+
// B loop
9522+
BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
9523+
.addMBB(LoopTestMBB)
9524+
.setMIFlags(Flags);
9525+
9526+
// LoopExit:
9527+
// MOV SP, TargetReg
9528+
BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
9529+
.addReg(TargetReg)
9530+
.addImm(0)
9531+
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
9532+
.setMIFlags(Flags);
9533+
9534+
// STR XZR, [SP]
9535+
BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::STRXui))
9536+
.addReg(AArch64::XZR)
9537+
.addReg(AArch64::SP)
9538+
.addImm(0)
9539+
.setMIFlags(Flags);
9540+
9541+
ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
9542+
ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
9543+
9544+
LoopTestMBB->addSuccessor(ExitMBB);
9545+
LoopTestMBB->addSuccessor(LoopBodyMBB);
9546+
LoopBodyMBB->addSuccessor(LoopTestMBB);
9547+
MBB.addSuccessor(LoopTestMBB);
9548+
9549+
// Update liveins.
9550+
if (MF.getRegInfo().reservedRegsFrozen()) {
9551+
recomputeLiveIns(*LoopTestMBB);
9552+
recomputeLiveIns(*LoopBodyMBB);
9553+
recomputeLiveIns(*ExitMBB);
9554+
}
9555+
9556+
return ExitMBB->begin();
9557+
}
9558+
94709559
#define GET_INSTRINFO_HELPERS
94719560
#define GET_INSTRMAP_INFO
94729561
#include "AArch64GenInstrInfo.inc"

llvm/lib/Target/AArch64/AArch64InstrInfo.h

+7
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,13 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
383383
bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
384384
unsigned Scale) const;
385385

386+
// Decrement the SP, issuing probes along the way. `TargetReg` is the new top
387+
// of the stack. `FrameSetup` is passed as true, if the allocation is a part
388+
// of constructing the activation frame of a function.
389+
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI,
390+
Register TargetReg,
391+
bool FrameSetup) const;
392+
386393
#define GET_INSTRINFO_HELPER_DECLS
387394
#include "AArch64GenInstrInfo.inc"
388395

llvm/lib/Target/AArch64/AArch64InstrInfo.td

+23-2
Original file line numberDiff line numberDiff line change
@@ -936,7 +936,8 @@ include "SMEInstrFormats.td"
936936
// Miscellaneous instructions.
937937
//===----------------------------------------------------------------------===//
938938

939-
let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in {
939+
let hasSideEffects = 1, isCodeGenOnly = 1 in {
940+
let Defs = [SP], Uses = [SP] in {
940941
// We set Sched to empty list because we expect these instructions to simply get
941942
// removed in most cases.
942943
def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
@@ -945,7 +946,27 @@ def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
945946
def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
946947
[(AArch64callseq_end timm:$amt1, timm:$amt2)]>,
947948
Sched<[]>;
948-
} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1
949+
950+
}
951+
952+
let Defs = [SP, NZCV], Uses = [SP] in {
953+
// Probed stack allocation of a constant size, used in function prologues when
954+
// stack-clash protection is enabled.
955+
def PROBED_STACKALLOC : Pseudo<(outs GPR64:$scratch),
956+
(ins i64imm:$stacksize, i64imm:$fixed_offset,
957+
i64imm:$scalable_offset),
958+
[]>,
959+
Sched<[]>;
960+
961+
// Probed stack allocation of a variable size, used in function prologues when
962+
// stack-clash protection is enabled.
963+
def PROBED_STACKALLOC_VAR : Pseudo<(outs),
964+
(ins GPR64sp:$target),
965+
[]>,
966+
Sched<[]>;
967+
968+
} // Defs = [SP, NZCV], Uses = [SP] in
969+
} // hasSideEffects = 1, isCodeGenOnly = 1
949970

950971
let isReMaterializable = 1, isCodeGenOnly = 1 in {
951972
// FIXME: The following pseudo instructions are only needed because remat

llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp

+37-6
Original file line numberDiff line numberDiff line change
@@ -97,14 +97,45 @@ AArch64FunctionInfo::AArch64FunctionInfo(const Function &F,
9797
if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
9898
F.getParent()->getModuleFlag("branch-target-enforcement")))
9999
BranchTargetEnforcement = BTE->getZExtValue();
100-
return;
100+
} else {
101+
const StringRef BTIEnable =
102+
F.getFnAttribute("branch-target-enforcement").getValueAsString();
103+
assert(BTIEnable.equals_insensitive("true") ||
104+
BTIEnable.equals_insensitive("false"));
105+
BranchTargetEnforcement = BTIEnable.equals_insensitive("true");
101106
}
102107

103-
const StringRef BTIEnable =
104-
F.getFnAttribute("branch-target-enforcement").getValueAsString();
105-
assert(BTIEnable.equals_insensitive("true") ||
106-
BTIEnable.equals_insensitive("false"));
107-
BranchTargetEnforcement = BTIEnable.equals_insensitive("true");
108+
// The default stack probe size is 4096 if the function has no
109+
// stack-probe-size attribute. This is a safe default because it is the
110+
// smallest possible guard page size.
111+
uint64_t ProbeSize = 4096;
112+
if (F.hasFnAttribute("stack-probe-size"))
113+
ProbeSize = F.getFnAttributeAsParsedInteger("stack-probe-size");
114+
else if (const auto *PS = mdconst::extract_or_null<ConstantInt>(
115+
F.getParent()->getModuleFlag("stack-probe-size")))
116+
ProbeSize = PS->getZExtValue();
117+
assert(int64_t(ProbeSize) > 0 && "Invalid stack probe size");
118+
119+
if (STI->isTargetWindows()) {
120+
if (!F.hasFnAttribute("no-stack-arg-probe"))
121+
StackProbeSize = ProbeSize;
122+
} else {
123+
// Round down to the stack alignment.
124+
uint64_t StackAlign =
125+
STI->getFrameLowering()->getTransientStackAlign().value();
126+
ProbeSize = std::max(StackAlign, ProbeSize & ~(StackAlign - 1U));
127+
StringRef ProbeKind;
128+
if (F.hasFnAttribute("probe-stack"))
129+
ProbeKind = F.getFnAttribute("probe-stack").getValueAsString();
130+
else if (const auto *PS = dyn_cast_or_null<MDString>(
131+
F.getParent()->getModuleFlag("probe-stack")))
132+
ProbeKind = PS->getString();
133+
if (ProbeKind.size()) {
134+
if (ProbeKind != "inline-asm")
135+
report_fatal_error("Unsupported stack probing method");
136+
StackProbeSize = ProbeSize;
137+
}
138+
}
108139
}
109140

110141
MachineFunctionInfo *AArch64FunctionInfo::clone(

llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h

+6
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,8 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
194194
/// True if the function need asynchronous unwind information.
195195
mutable std::optional<bool> NeedsAsyncDwarfUnwindInfo;
196196

197+
int64_t StackProbeSize = 0;
198+
197199
public:
198200
AArch64FunctionInfo(const Function &F, const AArch64Subtarget *STI);
199201

@@ -456,6 +458,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
456458
HasStreamingModeChanges = HasChanges;
457459
}
458460

461+
bool hasStackProbing() const { return StackProbeSize != 0; }
462+
463+
int64_t getStackProbeSize() const { return StackProbeSize; }
464+
459465
private:
460466
// Hold the lists of LOHs.
461467
MILOHContainer LOHContainerSet;

0 commit comments

Comments
 (0)