Skip to content

Commit

Permalink
[AArch64][GlobalISel] Lower formal arguments of AAPCS & ms_abi variad…
Browse files Browse the repository at this point in the history
…ic functions.

Reimplemented SelectionDAG code for GlobalISel.

Fixes llvm/llvm-project#54079

Differential Revision: https://reviews.llvm.org/D130903
  • Loading branch information
dzhidzhoev committed Dec 12, 2022
1 parent f32f293 commit 56fd846
Show file tree
Hide file tree
Showing 7 changed files with 204 additions and 18 deletions.
23 changes: 15 additions & 8 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,17 @@ static cl::opt<unsigned> MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden,
/// Value type used for condition codes.
static const MVT MVT_CC = MVT::i32;

static const MCPhysReg GPRArgRegs[] = {AArch64::X0, AArch64::X1, AArch64::X2,
AArch64::X3, AArch64::X4, AArch64::X5,
AArch64::X6, AArch64::X7};
static const MCPhysReg FPRArgRegs[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
AArch64::Q3, AArch64::Q4, AArch64::Q5,
AArch64::Q6, AArch64::Q7};

const ArrayRef<MCPhysReg> llvm::AArch64::getGPRArgRegs() { return GPRArgRegs; }

const ArrayRef<MCPhysReg> llvm::AArch64::getFPRArgRegs() { return FPRArgRegs; }

static inline EVT getPackedSVEVectorVT(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
default:
Expand Down Expand Up @@ -6562,10 +6573,8 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,

SmallVector<SDValue, 8> MemOps;

static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
AArch64::X3, AArch64::X4, AArch64::X5,
AArch64::X6, AArch64::X7 };
unsigned NumGPRArgRegs = std::size(GPRArgRegs);
auto GPRArgRegs = AArch64::getGPRArgRegs();
unsigned NumGPRArgRegs = GPRArgRegs.size();
if (Subtarget->isWindowsArm64EC()) {
// In the ARM64EC ABI, only x0-x3 are used to pass arguments to varargs
// functions.
Expand Down Expand Up @@ -6615,10 +6624,8 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
FuncInfo->setVarArgsGPRSize(GPRSaveSize);

if (Subtarget->hasFPARMv8() && !IsWin64) {
static const MCPhysReg FPRArgRegs[] = {
AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
static const unsigned NumFPRArgRegs = std::size(FPRArgRegs);
auto FPRArgRegs = AArch64::getFPRArgRegs();
const unsigned NumFPRArgRegs = FPRArgRegs.size();
unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);

unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,11 @@ enum Rounding {

// Bit position of rounding mode bits in FPCR.
const unsigned RoundingBitsPos = 22;

// Registers used to pass function arguments.
const ArrayRef<MCPhysReg> getGPRArgRegs();
const ArrayRef<MCPhysReg> getFPRArgRegs();

} // namespace AArch64

class AArch64Subtarget;
Expand Down
108 changes: 99 additions & 9 deletions llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "AArch64CallLowering.h"
#include "AArch64ISelLowering.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
Expand Down Expand Up @@ -546,13 +547,98 @@ bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
return false;
}

void AArch64CallLowering::saveVarArgRegisters(
MachineIRBuilder &MIRBuilder, CallLowering::IncomingValueHandler &Handler,
CCState &CCInfo) const {
auto GPRArgRegs = AArch64::getGPRArgRegs();
auto FPRArgRegs = AArch64::getFPRArgRegs();

MachineFunction &MF = MIRBuilder.getMF();
MachineRegisterInfo &MRI = MF.getRegInfo();
MachineFrameInfo &MFI = MF.getFrameInfo();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
bool IsWin64CC =
Subtarget.isCallingConvWin64(CCInfo.getCallingConv());
const LLT p0 = LLT::pointer(0, 64);
const LLT s64 = LLT::scalar(64);

unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
unsigned NumVariadicGPRArgRegs = GPRArgRegs.size() - FirstVariadicGPR + 1;

unsigned GPRSaveSize = 8 * (GPRArgRegs.size() - FirstVariadicGPR);
int GPRIdx = 0;
if (GPRSaveSize != 0) {
if (IsWin64CC) {
GPRIdx = MFI.CreateFixedObject(GPRSaveSize,
-static_cast<int>(GPRSaveSize), false);
} else
GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);

auto FIN = MIRBuilder.buildFrameIndex(p0, GPRIdx);
auto Offset =
MIRBuilder.buildConstant(MRI.createGenericVirtualRegister(s64), 8);

for (unsigned i = FirstVariadicGPR; i < GPRArgRegs.size(); ++i) {
Register Val = MRI.createGenericVirtualRegister(s64);
Handler.assignValueToReg(
Val, GPRArgRegs[i],
CCValAssign::getReg(i + MF.getFunction().getNumOperands(), MVT::i64,
GPRArgRegs[i], MVT::i64, CCValAssign::Full));
auto MPO = IsWin64CC ? MachinePointerInfo::getFixedStack(
MF, GPRIdx, (i - FirstVariadicGPR) * 8)
: MachinePointerInfo::getStack(MF, i * 8);
MIRBuilder.buildStore(Val, FIN, MPO, inferAlignFromPtrInfo(MF, MPO));

FIN = MIRBuilder.buildPtrAdd(MRI.createGenericVirtualRegister(p0),
FIN.getReg(0), Offset);
}
}
FuncInfo->setVarArgsGPRIndex(GPRIdx);
FuncInfo->setVarArgsGPRSize(GPRSaveSize);

if (Subtarget.hasFPARMv8() && !IsWin64CC) {
unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);

unsigned FPRSaveSize = 16 * (FPRArgRegs.size() - FirstVariadicFPR);
int FPRIdx = 0;
if (FPRSaveSize != 0) {
FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);

auto FIN = MIRBuilder.buildFrameIndex(p0, FPRIdx);
auto Offset =
MIRBuilder.buildConstant(MRI.createGenericVirtualRegister(s64), 16);

for (unsigned i = FirstVariadicFPR; i < FPRArgRegs.size(); ++i) {
Register Val = MRI.createGenericVirtualRegister(LLT::scalar(128));
Handler.assignValueToReg(
Val, FPRArgRegs[i],
CCValAssign::getReg(
i + MF.getFunction().getNumOperands() + NumVariadicGPRArgRegs,
MVT::f128, FPRArgRegs[i], MVT::f128, CCValAssign::Full));

auto MPO = MachinePointerInfo::getStack(MF, i * 16);
MIRBuilder.buildStore(Val, FIN, MPO, inferAlignFromPtrInfo(MF, MPO));

FIN = MIRBuilder.buildPtrAdd(MRI.createGenericVirtualRegister(p0),
FIN.getReg(0), Offset);
}
}
FuncInfo->setVarArgsFPRIndex(FPRIdx);
FuncInfo->setVarArgsFPRSize(FPRSaveSize);
}
}

bool AArch64CallLowering::lowerFormalArguments(
MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
MachineFunction &MF = MIRBuilder.getMF();
MachineBasicBlock &MBB = MIRBuilder.getMBB();
MachineRegisterInfo &MRI = MF.getRegInfo();
auto &DL = F.getParent()->getDataLayout();
auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
// TODO: Support Arm64EC
bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv()) && !Subtarget.isWindowsArm64EC();

SmallVector<ArgInfo, 8> SplitArgs;
SmallVector<std::pair<Register, Register>> BoolArgs;
Expand Down Expand Up @@ -598,13 +684,14 @@ bool AArch64CallLowering::lowerFormalArguments(
MIRBuilder.setInstr(*MBB.begin());

const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
CCAssignFn *AssignFn =
TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
CCAssignFn *AssignFn = TLI.CCAssignFnForCall(F.getCallingConv(), IsWin64);

AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn);
FormalArgHandler Handler(MIRBuilder, MRI);
if (!determineAndHandleAssignments(Handler, Assigner, SplitArgs, MIRBuilder,
F.getCallingConv(), F.isVarArg()))
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
if (!determineAssignments(Assigner, SplitArgs, CCInfo) ||
!handleAssignments(Handler, SplitArgs, CCInfo, ArgLocs, MIRBuilder))
return false;

if (!BoolArgs.empty()) {
Expand All @@ -622,10 +709,14 @@ bool AArch64CallLowering::lowerFormalArguments(
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
uint64_t StackOffset = Assigner.StackOffset;
if (F.isVarArg()) {
auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
if (!Subtarget.isTargetDarwin()) {
// FIXME: we need to reimplement saveVarArgsRegisters from
// AArch64ISelLowering.
if ((!Subtarget.isTargetDarwin() && !Subtarget.isWindowsArm64EC()) || IsWin64) {
// The AAPCS variadic function ABI is identical to the non-variadic
// one. As a result there may be more arguments in registers and we should
// save them for future reference.
// Win64 variadic functions also pass arguments in registers, but all
// float arguments are passed in integer registers.
saveVarArgRegisters(MIRBuilder, Handler, CCInfo);
} else if (Subtarget.isWindowsArm64EC()) {
return false;
}

Expand Down Expand Up @@ -657,7 +748,6 @@ bool AArch64CallLowering::lowerFormalArguments(
// in this function later.
FuncInfo->setBytesInStackArgArea(StackOffset);

auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
if (Subtarget.hasCustomCallingConv())
Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ class AArch64CallLowering: public CallLowering {
using MemHandler =
std::function<void(MachineIRBuilder &, int, CCValAssign &)>;

void saveVarArgRegisters(MachineIRBuilder &MIRBuilder,
CallLowering::IncomingValueHandler &Handler,
CCState &CCInfo) const;

bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
SmallVectorImpl<ArgInfo> &OutArgs) const;

Expand Down
10 changes: 9 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1939,10 +1939,18 @@ bool AArch64InstructionSelector::selectVaStartDarwin(

Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);

int FrameIdx = FuncInfo->getVarArgsStackIndex();
if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
MF.getFunction().getCallingConv())) {
FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
? FuncInfo->getVarArgsGPRIndex()
: FuncInfo->getVarArgsStackIndex();
}

auto MIB =
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
.addDef(ArgsAddrReg)
.addFrameIndex(FuncInfo->getVarArgsStackIndex())
.addFrameIndex(FrameIdx)
.addImm(0)
.addImm(0);

Expand Down
34 changes: 34 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
; RUN: llc < %s --global-isel=0 -mtriple=aarch64-linux-gnu -mattr=+fp-armv8 | FileCheck %s
; RUN: llc < %s --global-isel=1 -mtriple=aarch64-linux-gnu -mattr=+fp-armv8 | FileCheck %s --check-prefix=GISEL

define void @va(i32 %count, half %f, ...) nounwind {
; CHECK-LABEL: va:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #176
; CHECK-NEXT: stp x4, x5, [sp, #144]
; CHECK-NEXT: stp x2, x3, [sp, #128]
; CHECK-NEXT: str x1, [sp, #120]
; CHECK-NEXT: stp x6, x7, [sp, #160]
; CHECK-NEXT: stp q1, q2, [sp]
; CHECK-NEXT: stp q3, q4, [sp, #32]
; CHECK-NEXT: stp q5, q6, [sp, #64]
; CHECK-NEXT: str q7, [sp, #96]
; CHECK-NEXT: add sp, sp, #176
; CHECK-NEXT: ret
;
; GISEL-LABEL: va:
; GISEL: // %bb.0: // %entry
; GISEL-NEXT: sub sp, sp, #176
; GISEL-NEXT: stp x1, x2, [sp, #120]
; GISEL-NEXT: stp x3, x4, [sp, #136]
; GISEL-NEXT: stp x5, x6, [sp, #152]
; GISEL-NEXT: str x7, [sp, #168]
; GISEL-NEXT: stp q1, q2, [sp]
; GISEL-NEXT: stp q3, q4, [sp, #32]
; GISEL-NEXT: stp q5, q6, [sp, #64]
; GISEL-NEXT: str q7, [sp, #96]
; GISEL-NEXT: add sp, sp, #176
; GISEL-NEXT: ret
entry:
ret void
}
38 changes: 38 additions & 0 deletions llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
; RUN: llc < %s --global-isel=1 -mtriple=aarch64-apple-darwin | FileCheck %s --check-prefix=DARWIN

define win64cc void @pass_va(i32 %count, ...) nounwind {
; CHECK-LABEL: pass_va:
Expand All @@ -17,6 +18,12 @@ define win64cc void @pass_va(i32 %count, ...) nounwind {
; CHECK-NEXT: ldp x30, x18, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #96
; CHECK-NEXT: ret
;
; DARWIN: ; %bb.0: ; %entry
; DARWIN-DAG: stp x3, x4, [sp, #56]
; DARWIN-DAG: stp x1, x2, [sp, #40]
; DARWIN-DAG: stp x5, x6, [sp, #72]
; DARWIN-DAG: str x7, [sp, #88]
entry:
%ap = alloca i8*, align 8
%ap1 = bitcast i8** %ap to i8*
Expand All @@ -40,6 +47,16 @@ define win64cc i8* @f9(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64
; CHECK-NEXT: str x8, [sp, #8]
; CHECK-NEXT: ldr x18, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
;
; DARWIN-LABEL: _f9:
; DARWIN: ; %bb.0: ; %entry
; DARWIN-NEXT: str x18, [sp, #-16]! ; 8-byte Folded Spill
; DARWIN-NEXT: add x8, sp, #8
; DARWIN-NEXT: add x9, sp, #24
; DARWIN-NEXT: str x9, [x8]
; DARWIN-NEXT: ldr x0, [sp, #8]
; DARWIN-NEXT: ldr x18, [sp], #16 ; 8-byte Folded Reload
; DARWIN-NEXT: ret
entry:
%ap = alloca i8*, align 8
%ap1 = bitcast i8** %ap to i8*
Expand All @@ -57,6 +74,16 @@ define win64cc i8* @f8(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64
; CHECK-NEXT: str x8, [sp, #8]
; CHECK-NEXT: ldr x18, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
;
; DARWIN-LABEL: _f8:
; DARWIN: ; %bb.0: ; %entry
; DARWIN-NEXT: str x18, [sp, #-16]! ; 8-byte Folded Spill
; DARWIN-NEXT: add x8, sp, #8
; DARWIN-NEXT: add x9, sp, #16
; DARWIN-NEXT: str x9, [x8]
; DARWIN-NEXT: ldr x0, [sp, #8]
; DARWIN-NEXT: ldr x18, [sp], #16 ; 8-byte Folded Reload
; DARWIN-NEXT: ret
entry:
%ap = alloca i8*, align 8
%ap1 = bitcast i8** %ap to i8*
Expand All @@ -75,6 +102,17 @@ define win64cc i8* @f7(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64
; CHECK-NEXT: str x8, [sp, #8]
; CHECK-NEXT: ldr x18, [sp], #32 // 8-byte Folded Reload
; CHECK-NEXT: ret
;
; DARWIN-LABEL: _f7:
; DARWIN: ; %bb.0: ; %entry
; DARWIN-NEXT: str x18, [sp, #-32]! ; 8-byte Folded Spill
; DARWIN-NEXT: add x8, sp, #8
; DARWIN-NEXT: add x9, sp, #24
; DARWIN-NEXT: str x7, [sp, #24]
; DARWIN-NEXT: str x9, [x8]
; DARWIN-NEXT: ldr x0, [sp, #8]
; DARWIN-NEXT: ldr x18, [sp], #32 ; 8-byte Folded Reload
; DARWIN-NEXT: ret
entry:
%ap = alloca i8*, align 8
%ap1 = bitcast i8** %ap to i8*
Expand Down

0 comments on commit 56fd846

Please sign in to comment.