Skip to content

Commit 92985ac

Browse files
committed
[RISCV] Add 32 bit GPR sub-register for Zfinx.
This patches adds a 32 bit register class for use with Zfinx instructions. This makes them more similar to F instructions and allows us to only spill 32 bits. I've added CodeGenOnly instructions for load/store using GPRF32 as that gave better results than insert_subreg/extract_subreg. I'm using FSGNJ for GPRF32 copy with Zfinx as that gave better results from MachineCopyPropagation. Function arguments use this new GPRF32 register class for f32 arguments with Zfinx. Eliminating the need to use RISCVISD::FMV* nodes. This is similar to llvm#107446 which adds a 16 bit register class.
1 parent ae9fcd6 commit 92985ac

27 files changed

+639
-517
lines changed

llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,8 +485,14 @@ struct RISCVOperand final : public MCParsedAsmOperand {
485485
RISCVMCRegisterClasses[RISCV::GPRF16RegClassID].contains(Reg.RegNum);
486486
}
487487

488+
bool isGPRF32() const {
489+
return Kind == KindTy::Register &&
490+
RISCVMCRegisterClasses[RISCV::GPRF32RegClassID].contains(Reg.RegNum);
491+
}
492+
488493
bool isGPRAsFPR() const { return isGPR() && Reg.IsGPRAsFPR; }
489494
bool isGPRAsFPR16() const { return isGPRF16() && Reg.IsGPRAsFPR; }
495+
bool isGPRAsFPR32() const { return isGPRF32() && Reg.IsGPRAsFPR; }
490496
bool isGPRPairAsFPR() const { return isGPRPair() && Reg.IsGPRAsFPR; }
491497

492498
bool isGPRPair() const {
@@ -1352,6 +1358,10 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
13521358
Op.Reg.RegNum = Reg - RISCV::X0 + RISCV::X0_H;
13531359
return Match_Success;
13541360
}
1361+
if (Kind == MCK_GPRAsFPR32 && Op.isGPRAsFPR()) {
1362+
Op.Reg.RegNum = Reg - RISCV::X0 + RISCV::X0_W;
1363+
return Match_Success;
1364+
}
13551365

13561366
// There are some GPRF64AsFPR instructions that have no RV32 equivalent. We
13571367
// reject them at parsing thinking we should match as GPRPairAsFPR for RV32.

llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,19 @@ static DecodeStatus DecodeGPRF16RegisterClass(MCInst &Inst, uint32_t RegNo,
9494
return MCDisassembler::Success;
9595
}
9696

97+
static DecodeStatus DecodeGPRF32RegisterClass(MCInst &Inst, uint32_t RegNo,
98+
uint64_t Address,
99+
const MCDisassembler *Decoder) {
100+
bool IsRVE = Decoder->getSubtargetInfo().hasFeature(RISCV::FeatureStdExtE);
101+
102+
if (RegNo >= 32 || (IsRVE && RegNo >= 16))
103+
return MCDisassembler::Fail;
104+
105+
MCRegister Reg = RISCV::X0_W + RegNo;
106+
Inst.addOperand(MCOperand::createReg(Reg));
107+
return MCDisassembler::Success;
108+
}
109+
97110
static DecodeStatus DecodeGPRX1X5RegisterClass(MCInst &Inst, uint32_t RegNo,
98111
uint64_t Address,
99112
const MCDisassembler *Decoder) {

llvm/lib/Target/RISCV/RISCVCallingConv.cpp

Lines changed: 74 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,23 @@ static ArrayRef<MCPhysReg> getArgGPR16s(const RISCVABI::ABI ABI) {
156156
return ArrayRef(ArgIGPRs);
157157
}
158158

159+
static ArrayRef<MCPhysReg> getArgGPR32s(const RISCVABI::ABI ABI) {
160+
// The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
161+
// the ILP32E ABI.
162+
static const MCPhysReg ArgIGPRs[] = {RISCV::X10_W, RISCV::X11_W, RISCV::X12_W,
163+
RISCV::X13_W, RISCV::X14_W, RISCV::X15_W,
164+
RISCV::X16_W, RISCV::X17_W};
165+
// The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
166+
static const MCPhysReg ArgEGPRs[] = {RISCV::X10_W, RISCV::X11_W,
167+
RISCV::X12_W, RISCV::X13_W,
168+
RISCV::X14_W, RISCV::X15_W};
169+
170+
if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
171+
return ArrayRef(ArgEGPRs);
172+
173+
return ArrayRef(ArgIGPRs);
174+
}
175+
159176
static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {
160177
// The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
161178
// for save-restore libcall, so we don't use them.
@@ -194,6 +211,26 @@ static ArrayRef<MCPhysReg> getFastCCArgGPRF16s(const RISCVABI::ABI ABI) {
194211
return ArrayRef(FastCCIGPRs);
195212
}
196213

214+
static ArrayRef<MCPhysReg> getFastCCArgGPRF32s(const RISCVABI::ABI ABI) {
215+
// The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
216+
// for save-restore libcall, so we don't use them.
217+
// Don't use X7 for fastcc, since Zicfilp uses X7 as the label register.
218+
static const MCPhysReg FastCCIGPRs[] = {
219+
RISCV::X10_W, RISCV::X11_W, RISCV::X12_W, RISCV::X13_W,
220+
RISCV::X14_W, RISCV::X15_W, RISCV::X16_W, RISCV::X17_W,
221+
RISCV::X28_W, RISCV::X29_W, RISCV::X30_W, RISCV::X31_W};
222+
223+
// The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
224+
static const MCPhysReg FastCCEGPRs[] = {RISCV::X10_W, RISCV::X11_W,
225+
RISCV::X12_W, RISCV::X13_W,
226+
RISCV::X14_W, RISCV::X15_W};
227+
228+
if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
229+
return ArrayRef(FastCCEGPRs);
230+
231+
return ArrayRef(FastCCIGPRs);
232+
}
233+
197234
// Pass a 2*XLEN argument that has been split into two XLEN values through
198235
// registers or the stack as necessary.
199236
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
@@ -364,11 +401,17 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT,
364401
}
365402
}
366403

404+
if ((ValVT == MVT::f32 && Subtarget.hasStdExtZfinx())) {
405+
if (MCRegister Reg = State.AllocateReg(getArgGPR32s(ABI))) {
406+
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
407+
return false;
408+
}
409+
}
410+
367411
ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI);
368412

369-
// Zfinx/Zdinx use GPR without a bitcast when possible.
370-
if ((LocVT == MVT::f32 && XLen == 32 && Subtarget.hasStdExtZfinx()) ||
371-
(LocVT == MVT::f64 && XLen == 64 && Subtarget.hasStdExtZdinx())) {
413+
// Zdinx use GPR without a bitcast when possible.
414+
if (LocVT == MVT::f64 && XLen == 64 && Subtarget.hasStdExtZdinx()) {
372415
if (MCRegister Reg = State.AllocateReg(ArgGPRs)) {
373416
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
374417
return false;
@@ -609,10 +652,24 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
609652
}
610653
}
611654

655+
// Check if there is an available GPRF16 before hitting the stack.
656+
if (LocVT == MVT::f16 && Subtarget.hasStdExtZhinxmin()) {
657+
if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRF16s(ABI))) {
658+
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
659+
return false;
660+
}
661+
}
662+
663+
// Check if there is an available GPRF32 before hitting the stack.
664+
if (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) {
665+
if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRF32s(ABI))) {
666+
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
667+
return false;
668+
}
669+
}
670+
612671
// Check if there is an available GPR before hitting the stack.
613-
if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
614-
(LocVT == MVT::f64 && Subtarget.is64Bit() &&
615-
Subtarget.hasStdExtZdinx())) {
672+
if (LocVT == MVT::f64 && Subtarget.is64Bit() && Subtarget.hasStdExtZdinx()) {
616673
if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
617674
if (LocVT.getSizeInBits() != Subtarget.getXLen()) {
618675
LocVT = Subtarget.getXLenVT();
@@ -625,14 +682,6 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
625682
}
626683
}
627684

628-
// Check if there is an available GPRF16 before hitting the stack.
629-
if ((LocVT == MVT::f16 && Subtarget.hasStdExtZhinxmin())) {
630-
if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRF16s(ABI))) {
631-
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
632-
return false;
633-
}
634-
}
635-
636685
if (LocVT == MVT::f16 || LocVT == MVT::bf16) {
637686
int64_t Offset2 = State.AllocateStack(2, Align(2));
638687
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
@@ -739,6 +788,17 @@ bool llvm::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
739788
}
740789
}
741790

791+
if (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) {
792+
static const MCPhysReg GPR32List[] = {
793+
RISCV::X9_W, RISCV::X18_W, RISCV::X19_W, RISCV::X20_W,
794+
RISCV::X21_W, RISCV::X22_W, RISCV::X23_W, RISCV::X24_W,
795+
RISCV::X25_W, RISCV::X26_W, RISCV::X27_W};
796+
if (MCRegister Reg = State.AllocateReg(GPR32List)) {
797+
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
798+
return false;
799+
}
800+
}
801+
742802
if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
743803
(LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
744804
Subtarget.is64Bit())) {

llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
9797
const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
9898
if (RC && RC->contains(RISCV::X0)) {
9999
X0Reg = RISCV::X0;
100+
} else if (RC && RC->contains(RISCV::X0_W)) {
101+
X0Reg = RISCV::X0_W;
100102
} else if (RC && RC->contains(RISCV::X0_H)) {
101103
X0Reg = RISCV::X0_H;
102104
} else {

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
962962
if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
963963
Res =
964964
CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
965+
} else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
966+
Res =
967+
CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
965968
} else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
966969
Res = CurDAG->getMachineNode(
967970
Opc, DL, VT, Imm,

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,14 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
475475
return;
476476
}
477477

478+
if (RISCV::GPRF32RegClass.contains(DstReg, SrcReg)) {
479+
assert(STI.hasStdExtZfinx());
480+
BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S_INX), DstReg)
481+
.addReg(SrcReg, getKillRegState(KillSrc))
482+
.addReg(SrcReg, getKillRegState(KillSrc));
483+
return;
484+
}
485+
478486
if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) {
479487
// Emit an ADDI for both parts of GPRPair.
480488
BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
@@ -599,6 +607,9 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
599607
} else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
600608
Opcode = RISCV::SH_INX;
601609
IsScalableVector = false;
610+
} else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) {
611+
Opcode = RISCV::SW_INX;
612+
IsScalableVector = false;
602613
} else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
603614
Opcode = RISCV::PseudoRV32ZdinxSD;
604615
IsScalableVector = false;
@@ -685,6 +696,9 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
685696
} else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
686697
Opcode = RISCV::LH_INX;
687698
IsScalableVector = false;
699+
} else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) {
700+
Opcode = RISCV::LW_INX;
701+
IsScalableVector = false;
688702
} else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
689703
Opcode = RISCV::PseudoRV32ZdinxLD;
690704
IsScalableVector = false;

llvm/lib/Target/RISCV/RISCVInstrInfoF.td

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -83,15 +83,14 @@ def any_fma_nsz : PatFrag<(ops node:$rs1, node:$rs2, node:$rs3),
8383

8484
// Zfinx
8585

86-
def GPRAsFPR : AsmOperandClass {
87-
let Name = "GPRAsFPR";
86+
def GPRAsFPR32 : AsmOperandClass {
87+
let Name = "GPRAsFPR32";
8888
let ParserMethod = "parseGPRAsFPR";
8989
let RenderMethod = "addRegOperands";
9090
}
9191

9292
def FPR32INX : RegisterOperand<GPRF32> {
93-
let ParserMatchClass = GPRAsFPR;
94-
let DecoderMethod = "DecodeGPRRegisterClass";
93+
let ParserMatchClass = GPRAsFPR32;
9594
}
9695

9796
// Describes a combination of predicates from F/D/Zfh/Zfhmin or
@@ -306,6 +305,12 @@ def FLW : FPLoad_r<0b010, "flw", FPR32, WriteFLD32>;
306305
def FSW : FPStore_r<0b010, "fsw", FPR32, WriteFST32>;
307306
} // Predicates = [HasStdExtF]
308307

308+
let Predicates = [HasStdExtZfinx], isCodeGenOnly = 1 in {
309+
def LW_INX : Load_ri<0b010, "lw", GPRF32>, Sched<[WriteLDW, ReadMemBase]>;
310+
def SW_INX : Store_rri<0b010, "sw", GPRF32>,
311+
Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
312+
}
313+
309314
foreach Ext = FExts in {
310315
let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32Addend] in {
311316
defm FMADD_S : FPFMA_rrr_frm_m<OPC_MADD, 0b00, "fmadd.s", Ext>;
@@ -682,12 +687,10 @@ defm Select_FPR32INX : SelectCC_GPR_rrirr<FPR32INX, f32>;
682687
def PseudoFROUND_S_INX : PseudoFROUND<FPR32INX, f32>;
683688

684689
/// Loads
685-
def : Pat<(f32 (load (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12))),
686-
(COPY_TO_REGCLASS (LW GPR:$rs1, simm12:$imm12), GPRF32)>;
690+
def : LdPat<load, LW_INX, f32>;
687691

688692
/// Stores
689-
def : Pat<(store (f32 FPR32INX:$rs2), (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12)),
690-
(SW (COPY_TO_REGCLASS FPR32INX:$rs2, GPR), GPR:$rs1, simm12:$imm12)>;
693+
def : StPat<store, SW_INX, GPRF32, f32>;
691694
} // Predicates = [HasStdExtZfinx]
692695

693696
let Predicates = [HasStdExtF] in {
@@ -698,8 +701,8 @@ def : Pat<(i32 (bitconvert FPR32:$rs1)), (FMV_X_W FPR32:$rs1)>;
698701

699702
let Predicates = [HasStdExtZfinx] in {
700703
// Moves (no conversion)
701-
def : Pat<(f32 (bitconvert (i32 GPR:$rs1))), (COPY_TO_REGCLASS GPR:$rs1, GPRF32)>;
702-
def : Pat<(i32 (bitconvert FPR32INX:$rs1)), (COPY_TO_REGCLASS FPR32INX:$rs1, GPR)>;
704+
def : Pat<(f32 (bitconvert (i32 GPR:$rs1))), (EXTRACT_SUBREG GPR:$rs1, sub_32)>;
705+
def : Pat<(i32 (bitconvert FPR32INX:$rs1)), (INSERT_SUBREG (XLenVT (IMPLICIT_DEF)), FPR32INX:$rs1, sub_32)>;
703706
} // Predicates = [HasStdExtZfinx]
704707

705708
let Predicates = [HasStdExtF] in {
@@ -778,8 +781,8 @@ def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_S_LU $rs1, FRM_DYN)>;
778781

779782
let Predicates = [HasStdExtZfinx, IsRV64] in {
780783
// Moves (no conversion)
781-
def : Pat<(riscv_fmv_w_x_rv64 GPR:$src), (COPY_TO_REGCLASS GPR:$src, GPRF32)>;
782-
def : Pat<(riscv_fmv_x_anyextw_rv64 GPRF32:$src), (COPY_TO_REGCLASS GPRF32:$src, GPR)>;
784+
def : Pat<(riscv_fmv_w_x_rv64 GPR:$src), (EXTRACT_SUBREG GPR:$src, sub_32)>;
785+
def : Pat<(riscv_fmv_x_anyextw_rv64 GPRF32:$src), (INSERT_SUBREG (XLenVT (IMPLICIT_DEF)), FPR32INX:$src, sub_32)>;
783786

784787
// Use target specific isd nodes to help us remember the result is sign
785788
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be

0 commit comments

Comments
 (0)