Skip to content

Commit

Permalink
[AArch64] Async unwind - Fix MTE codegen emitting frame adjustments i…
Browse files Browse the repository at this point in the history
…n a loop

When untagging the stack, the compiler may emit a sequence like:
```
        .LBB0_1:
          st2g sp, [sp], #32
          sub x8, x8, #32
          cbnz x8, .LBB0_1
          stg sp, [sp], #16
```
These stack adjustments cannot be described by CFI instructions.

This patch disables merging of SP update with untagging, i.e. makes the
compiler use an additional scratch register (there should be plenty
available at this point as we are in the epilogue) and generate:
```
            mov     x9, sp
            mov     x8, #256
            stg     x9, [x9], #16
    .LBB0_1:
            sub     x8, x8, #32
            st2g    x9, [x9], #32
            cbnz    x8, .LBB0_1
            add     sp, sp, #272
```
Merging is disabled only when we need to generate asynchronous unwind
tables.

Reviewed By: eugenis

Differential Revision: https://reviews.llvm.org/D114548
  • Loading branch information
momchil-velikov committed Apr 15, 2022
1 parent 5865a74 commit 24c84bd
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 22 deletions.
17 changes: 11 additions & 6 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3235,7 +3235,7 @@ class TagStoreEdit {
// instructions. May skip if the replacement is not profitable. May invalidate
// the input iterator and replace it with a valid one.
void emitCode(MachineBasicBlock::iterator &InsertI,
const AArch64FrameLowering *TFI, bool IsLast);
const AArch64FrameLowering *TFI, bool TryMergeSPUpdate);
};

void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
Expand Down Expand Up @@ -3374,7 +3374,8 @@ void mergeMemRefs(const SmallVectorImpl<TagStoreInstr> &TSE,
}

void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
const AArch64FrameLowering *TFI, bool IsLast) {
const AArch64FrameLowering *TFI,
bool TryMergeSPUpdate) {
if (TagStores.empty())
return;
TagStoreInstr &FirstTagStore = TagStores[0];
Expand Down Expand Up @@ -3404,8 +3405,8 @@ void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
emitUnrolled(InsertI);
} else {
MachineInstr *UpdateInstr = nullptr;
int64_t TotalOffset;
if (IsLast) {
int64_t TotalOffset = 0;
if (TryMergeSPUpdate) {
// See if we can merge base register update into the STGloop.
// This is done in AArch64LoadStoreOptimizer for "normal" stores,
// but STGloop is way too unusual for that, and also it only
Expand Down Expand Up @@ -3550,15 +3551,19 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
for (auto &Instr : Instrs) {
if (EndOffset && *EndOffset != Instr.Offset) {
// Found a gap.
TSE.emitCode(InsertI, TFI, /*IsLast = */ false);
TSE.emitCode(InsertI, TFI, /*TryMergeSPUpdate = */ false);
TSE.clear();
}

TSE.addInstruction(Instr);
EndOffset = Instr.Offset + Instr.Size;
}

TSE.emitCode(InsertI, TFI, /*IsLast = */ true);
// Multiple FP/SP updates in a loop cannot be described by CFI instructions.
TSE.emitCode(InsertI, TFI, /*TryMergeSPUpdate = */
!MBB->getParent()
->getInfo<AArch64FunctionInfo>()
->needsAsyncDwarfUnwindInfo());

return InsertI;
}
Expand Down
32 changes: 20 additions & 12 deletions llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -589,23 +589,31 @@ void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,

// Create a scratch register for the frame index elimination in an instruction.
// This function has special handling of stack tagging loop pseudos, in which
// case it can also change the instruction opcode (but not the operands).
// case it can also change the instruction opcode.
static Register
createScratchRegisterForInstruction(MachineInstr &MI,
createScratchRegisterForInstruction(MachineInstr &MI, unsigned FIOperandNum,
const AArch64InstrInfo *TII) {
// ST*Gloop have a reserved scratch register in operand 1. Use it, and also
// replace the instruction with the writeback variant because it will now
// satisfy the operand constraints for it.
if (MI.getOpcode() == AArch64::STGloop) {
MI.setDesc(TII->get(AArch64::STGloop_wback));
return MI.getOperand(1).getReg();
} else if (MI.getOpcode() == AArch64::STZGloop) {
MI.setDesc(TII->get(AArch64::STZGloop_wback));
return MI.getOperand(1).getReg();
Register ScratchReg;
if (MI.getOpcode() == AArch64::STGloop ||
MI.getOpcode() == AArch64::STZGloop) {
assert(FIOperandNum == 3 &&
"Wrong frame index operand for STGloop/STZGloop");
unsigned Op = MI.getOpcode() == AArch64::STGloop ? AArch64::STGloop_wback
: AArch64::STZGloop_wback;
ScratchReg = MI.getOperand(1).getReg();
MI.getOperand(3).ChangeToRegister(ScratchReg, false, false, true);
MI.setDesc(TII->get(Op));
MI.tieOperands(1, 3);
} else {
return MI.getMF()->getRegInfo().createVirtualRegister(
&AArch64::GPR64RegClass);
ScratchReg =
MI.getMF()->getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
MI.getOperand(FIOperandNum)
.ChangeToRegister(ScratchReg, false, false, true);
}
return ScratchReg;
}

void AArch64RegisterInfo::getOffsetOpcodes(
Expand Down Expand Up @@ -722,9 +730,9 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// If we get here, the immediate doesn't fit into the instruction. We folded
// as much as possible above. Handle the rest, providing a register that is
// SP+LargeImm.
Register ScratchReg = createScratchRegisterForInstruction(MI, TII);
Register ScratchReg =
createScratchRegisterForInstruction(MI, FIOperandNum, TII);
emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true);
}

unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
Expand Down
31 changes: 27 additions & 4 deletions llvm/test/CodeGen/AArch64/settag.ll
Original file line number Diff line number Diff line change
Expand Up @@ -146,21 +146,44 @@ entry:
ret void
}

define void @stg_alloca17() uwtable {
define void @stg_alloca17() nounwind {
; CHECK-LABEL: stg_alloca17:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #288
; CHECK-NEXT: .cfi_def_cfa_offset 288
; CHECK-NEXT: str x29, [sp, #272] // 8-byte Folded Spill
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov x8, #256
; CHECK-NEXT: str x29, [sp, #272] // 8-byte Folded Spill
; CHECK-NEXT: .LBB11_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: st2g sp, [sp], #32
; CHECK-NEXT: sub x8, x8, #32
; CHECK-NEXT: cbnz x8, .LBB11_1
; CHECK-NEXT: // %bb.2: // %entry
; CHECK-NEXT: stg sp, [sp], #16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%a = alloca i8, i32 272, align 16
call void @llvm.aarch64.settag(i8* %a, i64 272)
ret void
}

define void @stg_alloca18() uwtable {
; CHECK-LABEL: stg_alloca18:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #288
; CHECK-NEXT: .cfi_def_cfa_offset 288
; CHECK-NEXT: str x29, [sp, #272] // 8-byte Folded Spill
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: mov x8, #256
; CHECK-NEXT: stg x9, [x9], #16
; CHECK-NEXT: .LBB12_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub x8, x8, #32
; CHECK-NEXT: st2g x9, [x9], #32
; CHECK-NEXT: cbnz x8, .LBB12_1
; CHECK-NEXT: // %bb.2: // %entry
; CHECK-NEXT: add sp, sp, #272
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
Expand Down

0 comments on commit 24c84bd

Please sign in to comment.