Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 94f8691

Browse files
committedNov 30, 2023
Fix for a missing probe to the SVE callee-saved registers area
In some cases a probe may be elided, resulting in more than 1024 unprobed area at the top of the stack or decrement of the SP by more than a guard area size.
1 parent 2278dd2 commit 94f8691

File tree

3 files changed

+62
-10
lines changed

3 files changed

+62
-10
lines changed
 

‎llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

+12-8
Original file line numberDiff line numberDiff line change
@@ -699,7 +699,8 @@ static int64_t upperBound(StackOffset Size) {
699699
void AArch64FrameLowering::allocateStackSpace(
700700
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
701701
int64_t RealignmentPadding, StackOffset AllocSize, bool NeedsWinCFI,
702-
bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset) const {
702+
bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset,
703+
bool FollowupAllocs) const {
703704

704705
if (!AllocSize)
705706
return;
@@ -753,9 +754,10 @@ void AArch64FrameLowering::allocateStackSpace(
753754
.addImm(InitialOffset.getFixed())
754755
.addImm(InitialOffset.getScalable());
755756
// The fixed allocation may leave unprobed bytes at the top of the
756-
// stack. If we have variable-sized objects, we need to issue an extra
757-
// probe, so their allocations starts in a known state.
758-
if (MFI.hasVarSizedObjects()) {
757+
// stack. If we have subsequent alocation (e.g. if we have variable-sized
758+
// objects), we need to issue an extra probe, so these allocations start in
759+
// a known state.
760+
if (FollowupAllocs) {
759761
// STR XZR, [SP]
760762
BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
761763
.addReg(AArch64::XZR)
@@ -789,8 +791,8 @@ void AArch64FrameLowering::allocateStackSpace(
789791
.setMIFlags(MachineInstr::FrameSetup);
790792
AFI.setStackRealigned(true);
791793
}
792-
if (MFI.hasVarSizedObjects() || upperBound(AllocSize) + RealignmentPadding >
793-
AArch64::StackProbeMaxUnprobedStack) {
794+
if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
795+
AArch64::StackProbeMaxUnprobedStack) {
794796
// STR XZR, [SP]
795797
BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
796798
.addReg(AArch64::XZR)
@@ -1986,8 +1988,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
19861988
// Allocate space for the callee saves (if any).
19871989
StackOffset CFAOffset =
19881990
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
1991+
StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
19891992
allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
1990-
nullptr, EmitAsyncCFI && !HasFP, CFAOffset);
1993+
nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
1994+
MFI.hasVarSizedObjects() || LocalsSize);
19911995
CFAOffset += SVECalleeSavesSize;
19921996

19931997
if (EmitAsyncCFI)
@@ -2004,7 +2008,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
20042008
allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
20052009
SVELocalsSize + StackOffset::getFixed(NumBytes),
20062010
NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
2007-
CFAOffset);
2011+
CFAOffset, MFI.hasVarSizedObjects());
20082012
}
20092013

20102014
// If we need a base pointer, set it up here. It's whatever the value of the

‎llvm/lib/Target/AArch64/AArch64FrameLowering.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,8 @@ class AArch64FrameLowering : public TargetFrameLowering {
153153
void allocateStackSpace(MachineBasicBlock &MBB,
154154
MachineBasicBlock::iterator MBBI,
155155
int64_t RealignmentPadding, StackOffset AllocSize,
156-
bool NeedsWinCFI, bool *HasWinCFI,
157-
bool EmitCFI, StackOffset InitialOffset) const;
156+
bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
157+
StackOffset InitialOffset, bool FollowupAllocs) const;
158158

159159
/// Emit target zero call-used regs.
160160
void emitZeroCallUsedRegs(BitVector RegsToZero,

‎llvm/test/CodeGen/AArch64/stack-probing-sve.ll

+48
Original file line numberDiff line numberDiff line change
@@ -671,4 +671,52 @@ entry:
671671
ret void
672672
}
673673

674+
; Test with a 14 scalable bytes (so up to 14 * 16 = 224) of unprobed
675+
; are bellow the save location of `p9`.
676+
define void @sve_unprobed_area(<vscale x 4 x float> %a, i32 %n) #0 {
677+
; CHECK-LABEL: sve_unprobed_area:
678+
; CHECK: // %bb.0: // %entry
679+
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
680+
; CHECK-NEXT: .cfi_def_cfa_offset 16
681+
; CHECK-NEXT: .cfi_offset w29, -16
682+
; CHECK-NEXT: addvl sp, sp, #-4
683+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
684+
; CHECK-NEXT: str xzr, [sp]
685+
; CHECK-NEXT: str p9, [sp, #7, mul vl] // 2-byte Folded Spill
686+
; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill
687+
; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill
688+
; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill
689+
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
690+
; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
691+
; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG
692+
; CHECK-NEXT: addvl sp, sp, #-4
693+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 64 * VG
694+
; CHECK-NEXT: //APP
695+
; CHECK-NEXT: //NO_APP
696+
; CHECK-NEXT: addvl sp, sp, #4
697+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
698+
; CHECK-NEXT: ldr p9, [sp, #7, mul vl] // 2-byte Folded Reload
699+
; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload
700+
; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload
701+
; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload
702+
; CHECK-NEXT: addvl sp, sp, #4
703+
; CHECK-NEXT: .cfi_def_cfa wsp, 16
704+
; CHECK-NEXT: .cfi_restore z8
705+
; CHECK-NEXT: .cfi_restore z9
706+
; CHECK-NEXT: .cfi_restore z10
707+
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
708+
; CHECK-NEXT: .cfi_def_cfa_offset 0
709+
; CHECK-NEXT: .cfi_restore w29
710+
; CHECK-NEXT: ret
711+
entry:
712+
call void asm sideeffect "", "~{z8},~{z9},~{z10},~{p9}" ()
713+
714+
%v0 = alloca <vscale x 4 x float>, align 16
715+
%v1 = alloca <vscale x 4 x float>, align 16
716+
%v2 = alloca <vscale x 4 x float>, align 16
717+
%v3 = alloca <vscale x 4 x float>, align 16
718+
719+
ret void
720+
}
721+
674722
attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" "target-features"="+sve" }

0 commit comments

Comments
 (0)
Please sign in to comment.