diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 5e29a92f0bacd..fbad7d5d02db6 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -480,7 +480,13 @@ struct RISCVOperand final : public MCParsedAsmOperand { RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(Reg.RegNum); } + bool isGPRF16() const { + return Kind == KindTy::Register && + RISCVMCRegisterClasses[RISCV::GPRF16RegClassID].contains(Reg.RegNum); + } + bool isGPRAsFPR() const { return isGPR() && Reg.IsGPRAsFPR; } + bool isGPRAsFPR16() const { return isGPRF16() && Reg.IsGPRAsFPR; } bool isGPRPairAsFPR() const { return isGPRPair() && Reg.IsGPRAsFPR; } bool isGPRPair() const { @@ -1342,6 +1348,10 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, Op.Reg.RegNum = convertFPR64ToFPR16(Reg); return Match_Success; } + if (Kind == MCK_GPRAsFPR16 && Op.isGPRAsFPR()) { + Op.Reg.RegNum = Reg - RISCV::X0 + RISCV::X0_H; + return Match_Success; + } // There are some GPRF64AsFPR instructions that have no RV32 equivalent. We // reject them at parsing thinking we should match as GPRPairAsFPR for RV32. diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index b869458a25614..c2659a51b0209 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -81,6 +81,19 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint32_t RegNo, return MCDisassembler::Success; } +static DecodeStatus DecodeGPRF16RegisterClass(MCInst &Inst, uint32_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + bool IsRVE = Decoder->getSubtargetInfo().hasFeature(RISCV::FeatureStdExtE); + + if (RegNo >= 32 || (IsRVE && RegNo >= 16)) + return MCDisassembler::Fail; + + MCRegister Reg = RISCV::X0_H + RegNo; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeGPRX1X5RegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp index 30a565c8b19db..d610f0b956027 100644 --- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp @@ -139,6 +139,23 @@ ArrayRef RISCV::getArgGPRs(const RISCVABI::ABI ABI) { return ArrayRef(ArgIGPRs); } +static ArrayRef getArgGPR16s(const RISCVABI::ABI ABI) { + // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except + // the ILP32E ABI. + static const MCPhysReg ArgIGPRs[] = {RISCV::X10_H, RISCV::X11_H, RISCV::X12_H, + RISCV::X13_H, RISCV::X14_H, RISCV::X15_H, + RISCV::X16_H, RISCV::X17_H}; + // The GPRs used for passing arguments in the ILP32E/LP64E ABI. + static const MCPhysReg ArgEGPRs[] = {RISCV::X10_H, RISCV::X11_H, + RISCV::X12_H, RISCV::X13_H, + RISCV::X14_H, RISCV::X15_H}; + + if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) + return ArrayRef(ArgEGPRs); + + return ArrayRef(ArgIGPRs); +} + static ArrayRef getFastCCArgGPRs(const RISCVABI::ABI ABI) { // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used // for save-restore libcall, so we don't use them. @@ -157,6 +174,26 @@ static ArrayRef getFastCCArgGPRs(const RISCVABI::ABI ABI) { return ArrayRef(FastCCIGPRs); } +static ArrayRef getFastCCArgGPRF16s(const RISCVABI::ABI ABI) { + // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used + // for save-restore libcall, so we don't use them. + // Don't use X7 for fastcc, since Zicfilp uses X7 as the label register. + static const MCPhysReg FastCCIGPRs[] = { + RISCV::X10_H, RISCV::X11_H, RISCV::X12_H, RISCV::X13_H, + RISCV::X14_H, RISCV::X15_H, RISCV::X16_H, RISCV::X17_H, + RISCV::X28_H, RISCV::X29_H, RISCV::X30_H, RISCV::X31_H}; + + // The GPRs used for passing arguments in the FastCC when using ILP32E/LP64E. + static const MCPhysReg FastCCEGPRs[] = {RISCV::X10_H, RISCV::X11_H, + RISCV::X12_H, RISCV::X13_H, + RISCV::X14_H, RISCV::X15_H}; + + if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) + return ArrayRef(FastCCEGPRs); + + return ArrayRef(FastCCIGPRs); +} + // Pass a 2*XLEN argument that has been split into two XLEN values through // registers or the stack as necessary. static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, @@ -320,6 +357,13 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, } } + if ((ValVT == MVT::f16 && Subtarget.hasStdExtZhinxmin())) { + if (MCRegister Reg = State.AllocateReg(getArgGPR16s(ABI))) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + ArrayRef ArgGPRs = RISCV::getArgGPRs(ABI); // Zfinx/Zdinx use GPR without a bitcast when possible. @@ -564,9 +608,16 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, MVT XLenVT = Subtarget.getXLenVT(); + // Check if there is an available GPRF16 before hitting the stack. + if ((LocVT == MVT::f16 && Subtarget.hasStdExtZhinxmin())) { + if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRF16s(ABI))) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + // Check if there is an available GPR before hitting the stack. - if ((LocVT == MVT::f16 && Subtarget.hasStdExtZhinxmin()) || - (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || + if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || (LocVT == MVT::f64 && Subtarget.is64Bit() && Subtarget.hasStdExtZdinx())) { if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { diff --git a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp index cce0ffe16e5fe..713c7a0661def 100644 --- a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp +++ b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp @@ -93,14 +93,19 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) { continue; LLVM_DEBUG(dbgs() << " Dead def operand #" << I << " in:\n "; MI.print(dbgs())); + Register X0Reg; const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF); - if (!(RC && RC->contains(RISCV::X0))) { + if (RC && RC->contains(RISCV::X0)) { + X0Reg = RISCV::X0; + } else if (RC && RC->contains(RISCV::X0_H)) { + X0Reg = RISCV::X0_H; + } else { LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n"); continue; } assert(LIS.hasInterval(Reg)); LIS.removeInterval(Reg); - MO.setReg(RISCV::X0); + MO.setReg(X0Reg); LLVM_DEBUG(dbgs() << " Replacing with zero register. New:\n "; MI.print(dbgs())); ++NumDeadDefsReplaced; diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 72f96965ae985..2501256ca6adf 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -48,6 +48,8 @@ class RISCVExpandPseudo : public MachineFunctionPass { MachineBasicBlock::iterator &NextMBBI); bool expandVMSET_VMCLR(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned Opcode); + bool expandMV_FPR16INX(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); bool expandRV32ZdinxStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); bool expandRV32ZdinxLoad(MachineBasicBlock &MBB, @@ -104,6 +106,8 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB, // expanded instructions for each pseudo is correct in the Size field of the // tablegen definition for the pseudo. switch (MBBI->getOpcode()) { + case RISCV::PseudoMV_FPR16INX: + return expandMV_FPR16INX(MBB, MBBI); case RISCV::PseudoRV32ZdinxSD: return expandRV32ZdinxStore(MBB, MBBI); case RISCV::PseudoRV32ZdinxLD: @@ -266,6 +270,23 @@ bool RISCVExpandPseudo::expandVMSET_VMCLR(MachineBasicBlock &MBB, return true; } +bool RISCVExpandPseudo::expandMV_FPR16INX(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + DebugLoc DL = MBBI->getDebugLoc(); + const TargetRegisterInfo *TRI = STI->getRegisterInfo(); + Register DstReg = TRI->getMatchingSuperReg( + MBBI->getOperand(0).getReg(), RISCV::sub_16, &RISCV::GPRRegClass); + Register SrcReg = TRI->getMatchingSuperReg( + MBBI->getOperand(1).getReg(), RISCV::sub_16, &RISCV::GPRRegClass); + + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), DstReg) + .addReg(SrcReg, getKillRegState(MBBI->getOperand(1).isKill())) + .addImm(0); + + MBBI->eraseFromParent(); // The pseudo instruction is gone now. + return true; +} + // This function expands the PseudoRV32ZdinxSD for storing a double-precision // floating-point value into memory by generating an equivalent instruction // sequence for RV32. diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 05ba18bf8ebd8..23479c2edf1d9 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -928,7 +928,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { } SDNode *Res; - if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W) + if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) { + Res = + CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode(); + } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W) Res = CurDAG->getMachineNode( Opc, DL, VT, Imm, CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT)); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 8dafd824963c0..7cbe2829d9b34 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -104,6 +104,7 @@ Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, MemBytes = 1; break; case RISCV::LH: + case RISCV::LH_INX: case RISCV::LHU: case RISCV::FLH: MemBytes = 2; @@ -144,6 +145,7 @@ Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, MemBytes = 1; break; case RISCV::SH: + case RISCV::SH_INX: case RISCV::FSH: MemBytes = 2; break; @@ -462,6 +464,13 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + if (RISCV::GPRF16RegClass.contains(DstReg, SrcReg)) { + BuildMI(MBB, MBBI, DL, get(RISCV::PseudoMV_FPR16INX), DstReg) + .addReg(SrcReg, + getKillRegState(KillSrc) | getRenamableRegState(RenamableSrc)); + return; + } + if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) { // Emit an ADDI for both parts of GPRPair. BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), @@ -583,6 +592,9 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::SW : RISCV::SD; IsScalableVector = false; + } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::SH_INX; + IsScalableVector = false; } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoRV32ZdinxSD; IsScalableVector = false; @@ -666,6 +678,9 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::LW : RISCV::LD; IsScalableVector = false; + } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::LH_INX; + IsScalableVector = false; } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoRV32ZdinxLD; IsScalableVector = false; @@ -1520,6 +1535,9 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { } switch (Opcode) { + case RISCV::PseudoMV_FPR16INX: + // MV is always compressible to either c.mv or c.li rd, 0. + return STI.hasStdExtCOrZca() ? 2 : 4; case TargetOpcode::STACKMAP: // The upper bound for a stackmap intrinsic is the full length of its shadow return StackMapOpers(&MI).getNumPatchBytes(); @@ -2575,6 +2593,7 @@ bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, case RISCV::LB: case RISCV::LBU: case RISCV::LH: + case RISCV::LH_INX: case RISCV::LHU: case RISCV::LW: case RISCV::LWU: @@ -2584,6 +2603,7 @@ bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, case RISCV::FLD: case RISCV::SB: case RISCV::SH: + case RISCV::SH_INX: case RISCV::SW: case RISCV::SD: case RISCV::FSH: @@ -2647,9 +2667,11 @@ bool RISCVInstrInfo::getMemOperandsWithOffsetWidth( case RISCV::LBU: case RISCV::SB: case RISCV::LH: + case RISCV::LH_INX: case RISCV::LHU: case RISCV::FLH: case RISCV::SH: + case RISCV::SH_INX: case RISCV::FSH: case RISCV::LW: case RISCV::LWU: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index fe5623e2920e2..ac3cb9dc091e1 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -514,8 +514,8 @@ class BranchCC_rri funct3, string opcodestr> } let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { -class Load_ri funct3, string opcodestr> - : RVInstI funct3, string opcodestr, DAGOperand rty = GPR> + : RVInstI; class HLoad_r funct7, bits<5> funct5, string opcodestr> @@ -529,9 +529,9 @@ class HLoad_r funct7, bits<5> funct5, string opcodestr> // reflecting the order these fields are specified in the instruction // encoding. let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in { -class Store_rri funct3, string opcodestr> +class Store_rri funct3, string opcodestr, DAGOperand rty = GPR> : RVInstS; class HStore_rr funct7, string opcodestr> @@ -543,8 +543,8 @@ class HStore_rr funct7, string opcodestr> } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class ALU_ri funct3, string opcodestr> - : RVInstI funct3, string opcodestr, DAGOperand rty = GPR> + : RVInstI, Sched<[WriteIALU, ReadIALU]>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td index 11c2695a59854..bff740a33c1c1 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td @@ -112,8 +112,9 @@ class CLoadB_ri funct6, string OpcodeStr> } let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in -class CLoadH_ri funct6, bit funct1, string OpcodeStr> - : RVInst16CLH funct6, bit funct1, string OpcodeStr, + DAGOperand rty = GPRC> + : RVInst16CLH { bits<2> imm; @@ -132,9 +133,10 @@ class CStoreB_rri funct6, string OpcodeStr> } let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in -class CStoreH_rri funct6, bit funct1, string OpcodeStr> +class CStoreH_rri funct6, bit funct1, string OpcodeStr, + DAGOperand rty = GPRC> : RVInst16CSH { bits<2> imm; @@ -202,7 +204,15 @@ def C_SB : CStoreB_rri<0b100010, "c.sb">, Sched<[WriteSTB, ReadStoreData, ReadMemBase]>; def C_SH : CStoreH_rri<0b100011, 0b0, "c.sh">, Sched<[WriteSTH, ReadStoreData, ReadMemBase]>; + +// Compressed versions of Zhinx load/store. +let isCodeGenOnly = 1 in { +def C_LH_INX : CLoadH_ri<0b100001, 0b1, "c.lh", GPRF16C>, + Sched<[WriteLDH, ReadMemBase]>; +def C_SH_INX : CStoreH_rri<0b100011, 0b0, "c.sh", GPRF16C>, + Sched<[WriteSTH, ReadStoreData, ReadMemBase]>; } +} // Predicates = [HasStdExtZcb] // Zcmp let DecoderNamespace = "RVZcmp", Predicates = [HasStdExtZcmp], @@ -318,6 +328,13 @@ def : CompressPat<(SB GPRC:$rs2, GPRCMem:$rs1, uimm2:$imm), (C_SB GPRC:$rs2, GPRCMem:$rs1, uimm2:$imm)>; def : CompressPat<(SH GPRC:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm), (C_SH GPRC:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm)>; + +let isCompressOnly = true in { +def : CompressPat<(LH_INX GPRF16C:$rd, GPRCMem:$rs1, uimm2_lsb0:$imm), + (C_LH_INX GPRF16C:$rd, GPRCMem:$rs1, uimm2_lsb0:$imm)>; +def : CompressPat<(SH_INX GPRF16C:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm), + (C_SH_INX GPRF16C:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm)>; +} }// Predicates = [HasStdExtZcb] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index 792cb7fa6dbc2..51123180d47c6 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -33,9 +33,14 @@ def riscv_fmv_x_signexth // Zhinxmin and Zhinx +def GPRAsFPR16 : AsmOperandClass { + let Name = "GPRAsFPR16"; + let ParserMethod = "parseGPRAsFPR"; + let RenderMethod = "addRegOperands"; +} + def FPR16INX : RegisterOperand { - let ParserMatchClass = GPRAsFPR; - let DecoderMethod = "DecodeGPRRegisterClass"; + let ParserMatchClass = GPRAsFPR16; } def ZfhExt : ExtInfo<"", "", [HasStdExtZfh], @@ -84,6 +89,19 @@ def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>; def FSH : FPStore_r<0b001, "fsh", FPR16, WriteFST16>; } // Predicates = [HasHalfFPLoadStoreMove] +let Predicates = [HasStdExtZhinxmin], isCodeGenOnly = 1 in { +def LH_INX : Load_ri<0b001, "lh", GPRF16>, Sched<[WriteLDH, ReadMemBase]>; +def SH_INX : Store_rri<0b001, "sh", GPRF16>, + Sched<[WriteSTH, ReadStoreData, ReadMemBase]>; + +// ADDI with GPRF16 register class to use for copy. This should not be used as +// general ADDI, so the immediate should always be zero. +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveReg = 1, + hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def PseudoMV_FPR16INX : Pseudo<(outs GPRF16:$rd), (ins GPRF16:$rs), []>, + Sched<[WriteIALU, ReadIALU]>; +} + foreach Ext = ZfhExts in { let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16Addend] in { defm FMADD_H : FPFMA_rrr_frm_m; @@ -426,13 +444,10 @@ let Predicates = [HasStdExtZhinxmin] in { defm Select_FPR16INX : SelectCC_GPR_rrirr; /// Loads -def : Pat<(f16 (load (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12))), - (COPY_TO_REGCLASS (LH GPR:$rs1, simm12:$imm12), GPRF16)>; +def : LdPat; /// Stores -def : Pat<(store (f16 FPR16INX:$rs2), - (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12)), - (SH (COPY_TO_REGCLASS FPR16INX:$rs2, GPR), GPR:$rs1, simm12:$imm12)>; +def : StPat; } // Predicates = [HasStdExtZhinxmin] let Predicates = [HasStdExtZfhmin] in { @@ -458,8 +473,8 @@ def : Pat<(any_fpround FPR32INX:$rs1), (FCVT_H_S_INX FPR32INX:$rs1, FRM_DYN)>; def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_S_H_INX FPR16INX:$rs1, FRM_RNE)>; // Moves (no conversion) -def : Pat<(f16 (riscv_fmv_h_x GPR:$src)), (COPY_TO_REGCLASS GPR:$src, GPR)>; -def : Pat<(riscv_fmv_x_anyexth FPR16INX:$src), (COPY_TO_REGCLASS FPR16INX:$src, GPR)>; +def : Pat<(f16 (riscv_fmv_h_x GPR:$src)), (EXTRACT_SUBREG GPR:$src, sub_16)>; +def : Pat<(riscv_fmv_x_anyexth FPR16INX:$src), (INSERT_SUBREG (XLenVT (IMPLICIT_DEF)), FPR16INX:$src, sub_16)>; def : Pat<(fcopysign FPR32INX:$rs1, FPR16INX:$rs2), (FSGNJ_S_INX $rs1, (FCVT_S_H_INX $rs2, FRM_RNE))>; } // Predicates = [HasStdExtZhinxmin] diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp index 3f423450618df..5973e5bf2e525 100644 --- a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp +++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp @@ -103,8 +103,10 @@ static unsigned log2LdstWidth(unsigned Opcode) { case RISCV::SB: return 0; case RISCV::LH: + case RISCV::LH_INX: case RISCV::LHU: case RISCV::SH: + case RISCV::SH_INX: return 1; case RISCV::LW: case RISCV::SW: @@ -128,8 +130,10 @@ static unsigned offsetMask(unsigned Opcode) { case RISCV::SB: return maskTrailingOnes(2U); case RISCV::LH: + case RISCV::LH_INX: case RISCV::LHU: case RISCV::SH: + case RISCV::SH_INX: return maskTrailingOnes(1U); case RISCV::LW: case RISCV::SW: @@ -173,6 +177,7 @@ static int64_t getBaseAdjustForCompression(int64_t Offset, unsigned Opcode) { // Return true if Reg is in a compressed register class. static bool isCompressedReg(Register Reg) { return RISCV::GPRCRegClass.contains(Reg) || + RISCV::GPRF16CRegClass.contains(Reg) || RISCV::FPR32CRegClass.contains(Reg) || RISCV::FPR64CRegClass.contains(Reg); } @@ -186,6 +191,7 @@ static bool isCompressibleLoad(const MachineInstr &MI) { return false; case RISCV::LBU: case RISCV::LH: + case RISCV::LH_INX: case RISCV::LHU: return STI.hasStdExtZcb(); case RISCV::LW: @@ -207,6 +213,7 @@ static bool isCompressibleStore(const MachineInstr &MI) { return false; case RISCV::SB: case RISCV::SH: + case RISCV::SH_INX: return STI.hasStdExtZcb(); case RISCV::SW: case RISCV::SD: @@ -320,6 +327,8 @@ static Register analyzeCompressibleUses(MachineInstr &FirstMI, // Work out the compressed register class from which to scavenge. if (RISCV::GPRRegClass.contains(RegImm.Reg)) RCToScavenge = &RISCV::GPRCRegClass; + else if (RISCV::GPRF16RegClass.contains(RegImm.Reg)) + RCToScavenge = &RISCV::GPRF16CRegClass; else if (RISCV::FPR32RegClass.contains(RegImm.Reg)) RCToScavenge = &RISCV::FPR32CRegClass; else if (RISCV::FPR64RegClass.contains(RegImm.Reg)) @@ -410,6 +419,11 @@ bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) { BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::ADDI), NewReg) .addReg(RegImm.Reg) .addImm(RegImm.Imm); + } else if (RISCV::GPRF16RegClass.contains(RegImm.Reg)) { + assert(RegImm.Imm == 0); + BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::PseudoMV_FPR16INX), + NewReg) + .addReg(RegImm.Reg); } else { // If we are looking at replacing an FPR register we don't expect to // have any offset. The only compressible FP instructions with an offset diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp index b6ac3384e7d3e..b3a2877edde4e 100644 --- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp @@ -385,6 +385,7 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi, return false; case RISCV::LB: case RISCV::LH: + case RISCV::LH_INX: case RISCV::LW: case RISCV::LBU: case RISCV::LHU: @@ -395,6 +396,7 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi, case RISCV::FLD: case RISCV::SB: case RISCV::SH: + case RISCV::SH_INX: case RISCV::SW: case RISCV::SD: case RISCV::FSH: diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 91d539a355ac2..a8b6be4fe277a 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -115,11 +115,11 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { } // Use markSuperRegs to ensure any register aliases are also reserved - markSuperRegs(Reserved, RISCV::X2); // sp - markSuperRegs(Reserved, RISCV::X3); // gp - markSuperRegs(Reserved, RISCV::X4); // tp + markSuperRegs(Reserved, RISCV::X2_H); // sp + markSuperRegs(Reserved, RISCV::X3_H); // gp + markSuperRegs(Reserved, RISCV::X4_H); // tp if (TFI->hasFP(MF)) - markSuperRegs(Reserved, RISCV::X8); // fp + markSuperRegs(Reserved, RISCV::X8_H); // fp // Reserve the base register if we need to realign the stack and allocate // variable-sized objects at runtime. if (TFI->hasBP(MF)) @@ -131,7 +131,7 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // There are only 16 GPRs for RVE. if (Subtarget.hasStdExtE()) - for (MCPhysReg Reg = RISCV::X16; Reg <= RISCV::X31; Reg++) + for (MCPhysReg Reg = RISCV::X16_H; Reg <= RISCV::X31_H; Reg++) markSuperRegs(Reserved, Reg); // V registers for code generation. We handle them manually. @@ -150,8 +150,8 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) { if (Subtarget.hasStdExtE()) report_fatal_error("Graal reserved registers do not exist in RVE"); - markSuperRegs(Reserved, RISCV::X23); - markSuperRegs(Reserved, RISCV::X27); + markSuperRegs(Reserved, RISCV::X23_H); + markSuperRegs(Reserved, RISCV::X27_H); } // Shadow stack pointer. diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 5725d8eda88ce..9cb589f2441a2 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -82,42 +82,84 @@ def sub_gpr_odd : SubRegIndex<32, 32> { // instructions. let RegAltNameIndices = [ABIRegAltName] in { + // 16-bit sub-registers for use by Zhinx. Having a 16-bit sub-register reduces + // the spill size for these operations. let isConstant = true in - def X0 : RISCVReg<0, "x0", ["zero"]>, DwarfRegNum<[0]>; + def X0_H : RISCVReg<0, "x0", ["zero"]>; let CostPerUse = [0, 1] in { - def X1 : RISCVReg<1, "x1", ["ra"]>, DwarfRegNum<[1]>; - def X2 : RISCVReg<2, "x2", ["sp"]>, DwarfRegNum<[2]>; - def X3 : RISCVReg<3, "x3", ["gp"]>, DwarfRegNum<[3]>; - def X4 : RISCVReg<4, "x4", ["tp"]>, DwarfRegNum<[4]>; - def X5 : RISCVReg<5, "x5", ["t0"]>, DwarfRegNum<[5]>; - def X6 : RISCVReg<6, "x6", ["t1"]>, DwarfRegNum<[6]>; - def X7 : RISCVReg<7, "x7", ["t2"]>, DwarfRegNum<[7]>; + def X1_H : RISCVReg<1, "x1", ["ra"]>; + def X2_H : RISCVReg<2, "x2", ["sp"]>; + def X3_H : RISCVReg<3, "x3", ["gp"]>; + def X4_H : RISCVReg<4, "x4", ["tp"]>; + def X5_H : RISCVReg<5, "x5", ["t0"]>; + def X6_H : RISCVReg<6, "x6", ["t1"]>; + def X7_H : RISCVReg<7, "x7", ["t2"]>; } - def X8 : RISCVReg<8, "x8", ["s0", "fp"]>, DwarfRegNum<[8]>; - def X9 : RISCVReg<9, "x9", ["s1"]>, DwarfRegNum<[9]>; - def X10 : RISCVReg<10,"x10", ["a0"]>, DwarfRegNum<[10]>; - def X11 : RISCVReg<11,"x11", ["a1"]>, DwarfRegNum<[11]>; - def X12 : RISCVReg<12,"x12", ["a2"]>, DwarfRegNum<[12]>; - def X13 : RISCVReg<13,"x13", ["a3"]>, DwarfRegNum<[13]>; - def X14 : RISCVReg<14,"x14", ["a4"]>, DwarfRegNum<[14]>; - def X15 : RISCVReg<15,"x15", ["a5"]>, DwarfRegNum<[15]>; + def X8_H : RISCVReg<8, "x8", ["s0", "fp"]>; + def X9_H : RISCVReg<9, "x9", ["s1"]>; + def X10_H : RISCVReg<10,"x10", ["a0"]>; + def X11_H : RISCVReg<11,"x11", ["a1"]>; + def X12_H : RISCVReg<12,"x12", ["a2"]>; + def X13_H : RISCVReg<13,"x13", ["a3"]>; + def X14_H : RISCVReg<14,"x14", ["a4"]>; + def X15_H : RISCVReg<15,"x15", ["a5"]>; let CostPerUse = [0, 1] in { - def X16 : RISCVReg<16,"x16", ["a6"]>, DwarfRegNum<[16]>; - def X17 : RISCVReg<17,"x17", ["a7"]>, DwarfRegNum<[17]>; - def X18 : RISCVReg<18,"x18", ["s2"]>, DwarfRegNum<[18]>; - def X19 : RISCVReg<19,"x19", ["s3"]>, DwarfRegNum<[19]>; - def X20 : RISCVReg<20,"x20", ["s4"]>, DwarfRegNum<[20]>; - def X21 : RISCVReg<21,"x21", ["s5"]>, DwarfRegNum<[21]>; - def X22 : RISCVReg<22,"x22", ["s6"]>, DwarfRegNum<[22]>; - def X23 : RISCVReg<23,"x23", ["s7"]>, DwarfRegNum<[23]>; - def X24 : RISCVReg<24,"x24", ["s8"]>, DwarfRegNum<[24]>; - def X25 : RISCVReg<25,"x25", ["s9"]>, DwarfRegNum<[25]>; - def X26 : RISCVReg<26,"x26", ["s10"]>, DwarfRegNum<[26]>; - def X27 : RISCVReg<27,"x27", ["s11"]>, DwarfRegNum<[27]>; - def X28 : RISCVReg<28,"x28", ["t3"]>, DwarfRegNum<[28]>; - def X29 : RISCVReg<29,"x29", ["t4"]>, DwarfRegNum<[29]>; - def X30 : RISCVReg<30,"x30", ["t5"]>, DwarfRegNum<[30]>; - def X31 : RISCVReg<31,"x31", ["t6"]>, DwarfRegNum<[31]>; + def X16_H : RISCVReg<16,"x16", ["a6"]>; + def X17_H : RISCVReg<17,"x17", ["a7"]>; + def X18_H : RISCVReg<18,"x18", ["s2"]>; + def X19_H : RISCVReg<19,"x19", ["s3"]>; + def X20_H : RISCVReg<20,"x20", ["s4"]>; + def X21_H : RISCVReg<21,"x21", ["s5"]>; + def X22_H : RISCVReg<22,"x22", ["s6"]>; + def X23_H : RISCVReg<23,"x23", ["s7"]>; + def X24_H : RISCVReg<24,"x24", ["s8"]>; + def X25_H : RISCVReg<25,"x25", ["s9"]>; + def X26_H : RISCVReg<26,"x26", ["s10"]>; + def X27_H : RISCVReg<27,"x27", ["s11"]>; + def X28_H : RISCVReg<28,"x28", ["t3"]>; + def X29_H : RISCVReg<29,"x29", ["t4"]>; + def X30_H : RISCVReg<30,"x30", ["t5"]>; + def X31_H : RISCVReg<31,"x31", ["t6"]>; + } + + let SubRegIndices = [sub_16] in { + let isConstant = true in + def X0 : RISCVRegWithSubRegs<0, "x0", [X0_H], ["zero"]>, DwarfRegNum<[0]>; + let CostPerUse = [0, 1] in { + def X1 : RISCVRegWithSubRegs<1, "x1", [X1_H], ["ra"]>, DwarfRegNum<[1]>; + def X2 : RISCVRegWithSubRegs<2, "x2", [X2_H], ["sp"]>, DwarfRegNum<[2]>; + def X3 : RISCVRegWithSubRegs<3, "x3", [X3_H], ["gp"]>, DwarfRegNum<[3]>; + def X4 : RISCVRegWithSubRegs<4, "x4", [X4_H], ["tp"]>, DwarfRegNum<[4]>; + def X5 : RISCVRegWithSubRegs<5, "x5", [X5_H], ["t0"]>, DwarfRegNum<[5]>; + def X6 : RISCVRegWithSubRegs<6, "x6", [X6_H], ["t1"]>, DwarfRegNum<[6]>; + def X7 : RISCVRegWithSubRegs<7, "x7", [X7_H], ["t2"]>, DwarfRegNum<[7]>; + } + def X8 : RISCVRegWithSubRegs<8, "x8", [X8_H], ["s0", "fp"]>, DwarfRegNum<[8]>; + def X9 : RISCVRegWithSubRegs<9, "x9", [X9_H], ["s1"]>, DwarfRegNum<[9]>; + def X10 : RISCVRegWithSubRegs<10,"x10", [X10_H], ["a0"]>, DwarfRegNum<[10]>; + def X11 : RISCVRegWithSubRegs<11,"x11", [X11_H], ["a1"]>, DwarfRegNum<[11]>; + def X12 : RISCVRegWithSubRegs<12,"x12", [X12_H], ["a2"]>, DwarfRegNum<[12]>; + def X13 : RISCVRegWithSubRegs<13,"x13", [X13_H], ["a3"]>, DwarfRegNum<[13]>; + def X14 : RISCVRegWithSubRegs<14,"x14", [X14_H], ["a4"]>, DwarfRegNum<[14]>; + def X15 : RISCVRegWithSubRegs<15,"x15", [X15_H], ["a5"]>, DwarfRegNum<[15]>; + let CostPerUse = [0, 1] in { + def X16 : RISCVRegWithSubRegs<16,"x16", [X16_H], ["a6"]>, DwarfRegNum<[16]>; + def X17 : RISCVRegWithSubRegs<17,"x17", [X17_H], ["a7"]>, DwarfRegNum<[17]>; + def X18 : RISCVRegWithSubRegs<18,"x18", [X18_H], ["s2"]>, DwarfRegNum<[18]>; + def X19 : RISCVRegWithSubRegs<19,"x19", [X19_H], ["s3"]>, DwarfRegNum<[19]>; + def X20 : RISCVRegWithSubRegs<20,"x20", [X20_H], ["s4"]>, DwarfRegNum<[20]>; + def X21 : RISCVRegWithSubRegs<21,"x21", [X21_H], ["s5"]>, DwarfRegNum<[21]>; + def X22 : RISCVRegWithSubRegs<22,"x22", [X22_H], ["s6"]>, DwarfRegNum<[22]>; + def X23 : RISCVRegWithSubRegs<23,"x23", [X23_H], ["s7"]>, DwarfRegNum<[23]>; + def X24 : RISCVRegWithSubRegs<24,"x24", [X24_H], ["s8"]>, DwarfRegNum<[24]>; + def X25 : RISCVRegWithSubRegs<25,"x25", [X25_H], ["s9"]>, DwarfRegNum<[25]>; + def X26 : RISCVRegWithSubRegs<26,"x26", [X26_H], ["s10"]>, DwarfRegNum<[26]>; + def X27 : RISCVRegWithSubRegs<27,"x27", [X27_H], ["s11"]>, DwarfRegNum<[27]>; + def X28 : RISCVRegWithSubRegs<28,"x28", [X28_H], ["t3"]>, DwarfRegNum<[28]>; + def X29 : RISCVRegWithSubRegs<29,"x29", [X29_H], ["t4"]>, DwarfRegNum<[29]>; + def X30 : RISCVRegWithSubRegs<30,"x30", [X30_H], ["t5"]>, DwarfRegNum<[30]>; + def X31 : RISCVRegWithSubRegs<31,"x31", [X31_H], ["t6"]>, DwarfRegNum<[31]>; + } } } @@ -565,8 +607,17 @@ def VRM8NoV0 : VReg; def VMV0 : VReg; +// 16-bit GPR sub-register class used by Zhinx instructions. +def GPRF16 : RISCVRegisterClass<[f16], 16, (add (sequence "X%u_H", 10, 17), + (sequence "X%u_H", 5, 7), + (sequence "X%u_H", 28, 31), + (sequence "X%u_H", 8, 9), + (sequence "X%u_H", 18, 27), + (sequence "X%u_H", 0, 4))>; +def GPRF16C : RISCVRegisterClass<[f16], 16, (add (sequence "X%u_H", 10, 15), + (sequence "X%u_H", 8, 9))>; + let RegInfos = XLenRI in { -def GPRF16 : RISCVRegisterClass<[f16], 16, (add GPR)>; def GPRF32 : RISCVRegisterClass<[f32], 32, (add GPR)>; } // RegInfos = XLenRI diff --git a/llvm/test/CodeGen/RISCV/codemodel-lowering.ll b/llvm/test/CodeGen/RISCV/codemodel-lowering.ll index ad81db75f7bc9..4831f0b24c7fe 100644 --- a/llvm/test/CodeGen/RISCV/codemodel-lowering.ll +++ b/llvm/test/CodeGen/RISCV/codemodel-lowering.ll @@ -1,14 +1,24 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi=ilp32f -code-model=small -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=RV32I-SMALL -; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi=ilp32f -code-model=medium -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=RV32I-MEDIUM -; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi=lp64f -code-model=small -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=RV64I-SMALL -; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi=lp64f -code-model=medium -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=RV64I-MEDIUM -; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi=lp64f -code-model=large -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=RV64I-LARGE +; RUN: llc -mtriple=riscv32 -mattr=+f,+zfh -target-abi=ilp32f -code-model=small -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32I-SMALL,RV32F-SMALL +; RUN: llc -mtriple=riscv32 -mattr=+f,+zfh -target-abi=ilp32f -code-model=medium -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32I-MEDIUM,RV32F-MEDIUM +; RUN: llc -mtriple=riscv64 -mattr=+f,+zfh -target-abi=lp64f -code-model=small -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64I-SMALL,RV64F-SMALL +; RUN: llc -mtriple=riscv64 -mattr=+f,+zfh -target-abi=lp64f -code-model=medium -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64I-MEDIUM,RV64F-MEDIUM +; RUN: llc -mtriple=riscv64 -mattr=+f,+zfh -target-abi=lp64f -code-model=large -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64I-LARGE,RV64F-LARGE +; RUN: llc -mtriple=riscv32 -mattr=+zfinx,+zhinx -target-abi=ilp32 -code-model=small -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32I-SMALL,RV32FINX-SMALL +; RUN: llc -mtriple=riscv32 -mattr=+zfinx,+zhinx -target-abi=ilp32 -code-model=medium -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32I-MEDIUM,RV32FINX-MEDIUM +; RUN: llc -mtriple=riscv64 -mattr=+zfinx,+zhinx -target-abi=lp64 -code-model=small -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64I-SMALL,RV64FINX-SMALL +; RUN: llc -mtriple=riscv64 -mattr=+zfinx,+zhinx -target-abi=lp64 -code-model=medium -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64I-MEDIUM,RV64FINX-MEDIUM +; RUN: llc -mtriple=riscv64 -mattr=+zfinx,+zhinx -target-abi=lp64 -code-model=large -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64I-LARGE,RV64FINX-LARGE ; Check lowering of globals @G = global i32 0 @@ -238,43 +248,78 @@ indirectgoto: ; Check lowering of constantpools define float @lower_constantpool(float %a) nounwind { -; RV32I-SMALL-LABEL: lower_constantpool: -; RV32I-SMALL: # %bb.0: -; RV32I-SMALL-NEXT: lui a0, %hi(.LCPI3_0) -; RV32I-SMALL-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; RV32I-SMALL-NEXT: fadd.s fa0, fa0, fa5 -; RV32I-SMALL-NEXT: ret +; RV32F-SMALL-LABEL: lower_constantpool: +; RV32F-SMALL: # %bb.0: +; RV32F-SMALL-NEXT: lui a0, %hi(.LCPI3_0) +; RV32F-SMALL-NEXT: flw fa5, %lo(.LCPI3_0)(a0) +; RV32F-SMALL-NEXT: fadd.s fa0, fa0, fa5 +; RV32F-SMALL-NEXT: ret ; -; RV32I-MEDIUM-LABEL: lower_constantpool: -; RV32I-MEDIUM: # %bb.0: -; RV32I-MEDIUM-NEXT: .Lpcrel_hi3: -; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(.LCPI3_0) -; RV32I-MEDIUM-NEXT: flw fa5, %pcrel_lo(.Lpcrel_hi3)(a0) -; RV32I-MEDIUM-NEXT: fadd.s fa0, fa0, fa5 -; RV32I-MEDIUM-NEXT: ret +; RV32F-MEDIUM-LABEL: lower_constantpool: +; RV32F-MEDIUM: # %bb.0: +; RV32F-MEDIUM-NEXT: .Lpcrel_hi3: +; RV32F-MEDIUM-NEXT: auipc a0, %pcrel_hi(.LCPI3_0) +; RV32F-MEDIUM-NEXT: flw fa5, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV32F-MEDIUM-NEXT: fadd.s fa0, fa0, fa5 +; RV32F-MEDIUM-NEXT: ret ; -; RV64I-SMALL-LABEL: lower_constantpool: -; RV64I-SMALL: # %bb.0: -; RV64I-SMALL-NEXT: lui a0, %hi(.LCPI3_0) -; RV64I-SMALL-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; RV64I-SMALL-NEXT: fadd.s fa0, fa0, fa5 -; RV64I-SMALL-NEXT: ret +; RV64F-SMALL-LABEL: lower_constantpool: +; RV64F-SMALL: # %bb.0: +; RV64F-SMALL-NEXT: lui a0, %hi(.LCPI3_0) +; RV64F-SMALL-NEXT: flw fa5, %lo(.LCPI3_0)(a0) +; RV64F-SMALL-NEXT: fadd.s fa0, fa0, fa5 +; RV64F-SMALL-NEXT: ret ; -; RV64I-MEDIUM-LABEL: lower_constantpool: -; RV64I-MEDIUM: # %bb.0: -; RV64I-MEDIUM-NEXT: .Lpcrel_hi3: -; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(.LCPI3_0) -; RV64I-MEDIUM-NEXT: flw fa5, %pcrel_lo(.Lpcrel_hi3)(a0) -; RV64I-MEDIUM-NEXT: fadd.s fa0, fa0, fa5 -; RV64I-MEDIUM-NEXT: ret +; RV64F-MEDIUM-LABEL: lower_constantpool: +; RV64F-MEDIUM: # %bb.0: +; RV64F-MEDIUM-NEXT: .Lpcrel_hi3: +; RV64F-MEDIUM-NEXT: auipc a0, %pcrel_hi(.LCPI3_0) +; RV64F-MEDIUM-NEXT: flw fa5, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV64F-MEDIUM-NEXT: fadd.s fa0, fa0, fa5 +; RV64F-MEDIUM-NEXT: ret ; -; RV64I-LARGE-LABEL: lower_constantpool: -; RV64I-LARGE: # %bb.0: -; RV64I-LARGE-NEXT: .Lpcrel_hi3: -; RV64I-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI3_0) -; RV64I-LARGE-NEXT: flw fa5, %pcrel_lo(.Lpcrel_hi3)(a0) -; RV64I-LARGE-NEXT: fadd.s fa0, fa0, fa5 -; RV64I-LARGE-NEXT: ret +; RV64F-LARGE-LABEL: lower_constantpool: +; RV64F-LARGE: # %bb.0: +; RV64F-LARGE-NEXT: .Lpcrel_hi3: +; RV64F-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI3_0) +; RV64F-LARGE-NEXT: flw fa5, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV64F-LARGE-NEXT: fadd.s fa0, fa0, fa5 +; RV64F-LARGE-NEXT: ret +; +; RV32FINX-SMALL-LABEL: lower_constantpool: +; RV32FINX-SMALL: # %bb.0: +; RV32FINX-SMALL-NEXT: lui a1, 260097 +; RV32FINX-SMALL-NEXT: addi a1, a1, -2048 +; RV32FINX-SMALL-NEXT: fadd.s a0, a0, a1 +; RV32FINX-SMALL-NEXT: ret +; +; RV32FINX-MEDIUM-LABEL: lower_constantpool: +; RV32FINX-MEDIUM: # %bb.0: +; RV32FINX-MEDIUM-NEXT: lui a1, 260097 +; RV32FINX-MEDIUM-NEXT: addi a1, a1, -2048 +; RV32FINX-MEDIUM-NEXT: fadd.s a0, a0, a1 +; RV32FINX-MEDIUM-NEXT: ret +; +; RV64FINX-SMALL-LABEL: lower_constantpool: +; RV64FINX-SMALL: # %bb.0: +; RV64FINX-SMALL-NEXT: lui a1, 260097 +; RV64FINX-SMALL-NEXT: addiw a1, a1, -2048 +; RV64FINX-SMALL-NEXT: fadd.s a0, a0, a1 +; RV64FINX-SMALL-NEXT: ret +; +; RV64FINX-MEDIUM-LABEL: lower_constantpool: +; RV64FINX-MEDIUM: # %bb.0: +; RV64FINX-MEDIUM-NEXT: lui a1, 260097 +; RV64FINX-MEDIUM-NEXT: addiw a1, a1, -2048 +; RV64FINX-MEDIUM-NEXT: fadd.s a0, a0, a1 +; RV64FINX-MEDIUM-NEXT: ret +; +; RV64FINX-LARGE-LABEL: lower_constantpool: +; RV64FINX-LARGE: # %bb.0: +; RV64FINX-LARGE-NEXT: lui a1, 260097 +; RV64FINX-LARGE-NEXT: addiw a1, a1, -2048 +; RV64FINX-LARGE-NEXT: fadd.s a0, a0, a1 +; RV64FINX-LARGE-NEXT: ret %1 = fadd float %a, 1.000244140625 ret float %1 } @@ -289,13 +334,13 @@ define i32 @lower_extern_weak(i32 %a) nounwind { ; RV32I-SMALL-NEXT: lw a0, %lo(W)(a0) ; RV32I-SMALL-NEXT: ret ; -; RV32I-MEDIUM-LABEL: lower_extern_weak: -; RV32I-MEDIUM: # %bb.0: -; RV32I-MEDIUM-NEXT: .Lpcrel_hi4: -; RV32I-MEDIUM-NEXT: auipc a0, %got_pcrel_hi(W) -; RV32I-MEDIUM-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi4)(a0) -; RV32I-MEDIUM-NEXT: lw a0, 0(a0) -; RV32I-MEDIUM-NEXT: ret +; RV32F-MEDIUM-LABEL: lower_extern_weak: +; RV32F-MEDIUM: # %bb.0: +; RV32F-MEDIUM-NEXT: .Lpcrel_hi4: +; RV32F-MEDIUM-NEXT: auipc a0, %got_pcrel_hi(W) +; RV32F-MEDIUM-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi4)(a0) +; RV32F-MEDIUM-NEXT: lw a0, 0(a0) +; RV32F-MEDIUM-NEXT: ret ; ; RV64I-SMALL-LABEL: lower_extern_weak: ; RV64I-SMALL: # %bb.0: @@ -303,21 +348,130 @@ define i32 @lower_extern_weak(i32 %a) nounwind { ; RV64I-SMALL-NEXT: lw a0, %lo(W)(a0) ; RV64I-SMALL-NEXT: ret ; -; RV64I-MEDIUM-LABEL: lower_extern_weak: -; RV64I-MEDIUM: # %bb.0: -; RV64I-MEDIUM-NEXT: .Lpcrel_hi4: -; RV64I-MEDIUM-NEXT: auipc a0, %got_pcrel_hi(W) -; RV64I-MEDIUM-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi4)(a0) -; RV64I-MEDIUM-NEXT: lw a0, 0(a0) -; RV64I-MEDIUM-NEXT: ret +; RV64F-MEDIUM-LABEL: lower_extern_weak: +; RV64F-MEDIUM: # %bb.0: +; RV64F-MEDIUM-NEXT: .Lpcrel_hi4: +; RV64F-MEDIUM-NEXT: auipc a0, %got_pcrel_hi(W) +; RV64F-MEDIUM-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi4)(a0) +; RV64F-MEDIUM-NEXT: lw a0, 0(a0) +; RV64F-MEDIUM-NEXT: ret ; -; RV64I-LARGE-LABEL: lower_extern_weak: -; RV64I-LARGE: # %bb.0: -; RV64I-LARGE-NEXT: .Lpcrel_hi4: -; RV64I-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI4_0) -; RV64I-LARGE-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi4)(a0) -; RV64I-LARGE-NEXT: lw a0, 0(a0) -; RV64I-LARGE-NEXT: ret +; RV64F-LARGE-LABEL: lower_extern_weak: +; RV64F-LARGE: # %bb.0: +; RV64F-LARGE-NEXT: .Lpcrel_hi4: +; RV64F-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI4_0) +; RV64F-LARGE-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi4)(a0) +; RV64F-LARGE-NEXT: lw a0, 0(a0) +; RV64F-LARGE-NEXT: ret +; +; RV32FINX-MEDIUM-LABEL: lower_extern_weak: +; RV32FINX-MEDIUM: # %bb.0: +; RV32FINX-MEDIUM-NEXT: .Lpcrel_hi3: +; RV32FINX-MEDIUM-NEXT: auipc a0, %got_pcrel_hi(W) +; RV32FINX-MEDIUM-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV32FINX-MEDIUM-NEXT: lw a0, 0(a0) +; RV32FINX-MEDIUM-NEXT: ret +; +; RV64FINX-MEDIUM-LABEL: lower_extern_weak: +; RV64FINX-MEDIUM: # %bb.0: +; RV64FINX-MEDIUM-NEXT: .Lpcrel_hi3: +; RV64FINX-MEDIUM-NEXT: auipc a0, %got_pcrel_hi(W) +; RV64FINX-MEDIUM-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV64FINX-MEDIUM-NEXT: lw a0, 0(a0) +; RV64FINX-MEDIUM-NEXT: ret +; +; RV64FINX-LARGE-LABEL: lower_extern_weak: +; RV64FINX-LARGE: # %bb.0: +; RV64FINX-LARGE-NEXT: .Lpcrel_hi3: +; RV64FINX-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI4_0) +; RV64FINX-LARGE-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV64FINX-LARGE-NEXT: lw a0, 0(a0) +; RV64FINX-LARGE-NEXT: ret %1 = load volatile i32, ptr @W ret i32 %1 } + +@X = global half 1.5 + +define half @lower_global_half(half %a) nounwind { +; RV32F-SMALL-LABEL: lower_global_half: +; RV32F-SMALL: # %bb.0: +; RV32F-SMALL-NEXT: lui a0, %hi(X) +; RV32F-SMALL-NEXT: flh fa5, %lo(X)(a0) +; RV32F-SMALL-NEXT: fadd.h fa0, fa0, fa5 +; RV32F-SMALL-NEXT: ret +; +; RV32F-MEDIUM-LABEL: lower_global_half: +; RV32F-MEDIUM: # %bb.0: +; RV32F-MEDIUM-NEXT: .Lpcrel_hi5: +; RV32F-MEDIUM-NEXT: auipc a0, %pcrel_hi(X) +; RV32F-MEDIUM-NEXT: flh fa5, %pcrel_lo(.Lpcrel_hi5)(a0) +; RV32F-MEDIUM-NEXT: fadd.h fa0, fa0, fa5 +; RV32F-MEDIUM-NEXT: ret +; +; RV64F-SMALL-LABEL: lower_global_half: +; RV64F-SMALL: # %bb.0: +; RV64F-SMALL-NEXT: lui a0, %hi(X) +; RV64F-SMALL-NEXT: flh fa5, %lo(X)(a0) +; RV64F-SMALL-NEXT: fadd.h fa0, fa0, fa5 +; RV64F-SMALL-NEXT: ret +; +; RV64F-MEDIUM-LABEL: lower_global_half: +; RV64F-MEDIUM: # %bb.0: +; RV64F-MEDIUM-NEXT: .Lpcrel_hi5: +; RV64F-MEDIUM-NEXT: auipc a0, %pcrel_hi(X) +; RV64F-MEDIUM-NEXT: flh fa5, %pcrel_lo(.Lpcrel_hi5)(a0) +; RV64F-MEDIUM-NEXT: fadd.h fa0, fa0, fa5 +; RV64F-MEDIUM-NEXT: ret +; +; RV64F-LARGE-LABEL: lower_global_half: +; RV64F-LARGE: # %bb.0: +; RV64F-LARGE-NEXT: .Lpcrel_hi5: +; RV64F-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI5_0) +; RV64F-LARGE-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi5)(a0) +; RV64F-LARGE-NEXT: flh fa5, 0(a0) +; RV64F-LARGE-NEXT: fadd.h fa0, fa0, fa5 +; RV64F-LARGE-NEXT: ret +; +; RV32FINX-SMALL-LABEL: lower_global_half: +; RV32FINX-SMALL: # %bb.0: +; RV32FINX-SMALL-NEXT: lui a1, %hi(X) +; RV32FINX-SMALL-NEXT: lh a1, %lo(X)(a1) +; RV32FINX-SMALL-NEXT: fadd.h a0, a0, a1 +; RV32FINX-SMALL-NEXT: ret +; +; RV32FINX-MEDIUM-LABEL: lower_global_half: +; RV32FINX-MEDIUM: # %bb.0: +; RV32FINX-MEDIUM-NEXT: .Lpcrel_hi4: +; RV32FINX-MEDIUM-NEXT: auipc a1, %pcrel_hi(X) +; RV32FINX-MEDIUM-NEXT: lh a1, %pcrel_lo(.Lpcrel_hi4)(a1) +; RV32FINX-MEDIUM-NEXT: fadd.h a0, a0, a1 +; RV32FINX-MEDIUM-NEXT: ret +; +; RV64FINX-SMALL-LABEL: lower_global_half: +; RV64FINX-SMALL: # %bb.0: +; RV64FINX-SMALL-NEXT: lui a1, %hi(X) +; RV64FINX-SMALL-NEXT: lh a1, %lo(X)(a1) +; RV64FINX-SMALL-NEXT: fadd.h a0, a0, a1 +; RV64FINX-SMALL-NEXT: ret +; +; RV64FINX-MEDIUM-LABEL: lower_global_half: +; RV64FINX-MEDIUM: # %bb.0: +; RV64FINX-MEDIUM-NEXT: .Lpcrel_hi4: +; RV64FINX-MEDIUM-NEXT: auipc a1, %pcrel_hi(X) +; RV64FINX-MEDIUM-NEXT: lh a1, %pcrel_lo(.Lpcrel_hi4)(a1) +; RV64FINX-MEDIUM-NEXT: fadd.h a0, a0, a1 +; RV64FINX-MEDIUM-NEXT: ret +; +; RV64FINX-LARGE-LABEL: lower_global_half: +; RV64FINX-LARGE: # %bb.0: +; RV64FINX-LARGE-NEXT: .Lpcrel_hi4: +; RV64FINX-LARGE-NEXT: auipc a1, %pcrel_hi(.LCPI5_0) +; RV64FINX-LARGE-NEXT: ld a1, %pcrel_lo(.Lpcrel_hi4)(a1) +; RV64FINX-LARGE-NEXT: lh a1, 0(a1) +; RV64FINX-LARGE-NEXT: fadd.h a0, a0, a1 +; RV64FINX-LARGE-NEXT: ret + %b = load half, ptr @X + %1 = fadd half %a, %b + ret half %1 +} diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll index ca40ba0399973..de5bb8a30db16 100644 --- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll +++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll @@ -246,28 +246,32 @@ define fastcc half @callee_half_32(<32 x half> %A) nounwind { define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-LABEL: caller_half_32: ; ZHINX32: # %bb.0: -; ZHINX32-NEXT: addi sp, sp, -96 -; ZHINX32-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s1, 84(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s2, 80(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s3, 76(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s4, 72(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s5, 68(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s6, 64(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s7, 60(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s8, 56(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s9, 52(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s10, 48(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s11, 44(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: addi sp, sp, -112 +; ZHINX32-NEXT: sw ra, 108(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s0, 104(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s1, 100(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s2, 96(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s3, 92(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s4, 88(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s5, 84(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s6, 80(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s7, 76(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s8, 72(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s9, 68(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s10, 64(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s11, 60(sp) # 4-byte Folded Spill ; ZHINX32-NEXT: lh t0, 112(sp) -; ZHINX32-NEXT: lh t1, 116(sp) -; ZHINX32-NEXT: lh t2, 120(sp) -; ZHINX32-NEXT: lh s0, 124(sp) -; ZHINX32-NEXT: lh t3, 128(sp) -; ZHINX32-NEXT: lh t4, 132(sp) -; ZHINX32-NEXT: lh t5, 136(sp) -; ZHINX32-NEXT: lh t6, 140(sp) +; ZHINX32-NEXT: sh t0, 58(sp) # 2-byte Folded Spill +; ZHINX32-NEXT: lh t0, 116(sp) +; ZHINX32-NEXT: sh t0, 56(sp) # 2-byte Folded Spill +; ZHINX32-NEXT: lh t0, 120(sp) +; ZHINX32-NEXT: sh t0, 54(sp) # 2-byte Folded Spill +; ZHINX32-NEXT: lh t0, 124(sp) +; ZHINX32-NEXT: sh t0, 52(sp) # 2-byte Folded Spill +; ZHINX32-NEXT: lh t6, 128(sp) +; ZHINX32-NEXT: lh t5, 132(sp) +; ZHINX32-NEXT: lh t4, 136(sp) +; ZHINX32-NEXT: lh s0, 140(sp) ; ZHINX32-NEXT: lh s1, 144(sp) ; ZHINX32-NEXT: lh s2, 148(sp) ; ZHINX32-NEXT: lh s3, 152(sp) @@ -280,122 +284,134 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-NEXT: lh s10, 180(sp) ; ZHINX32-NEXT: lh s11, 184(sp) ; ZHINX32-NEXT: lh ra, 188(sp) -; ZHINX32-NEXT: sh ra, 38(sp) -; ZHINX32-NEXT: sh s11, 36(sp) -; ZHINX32-NEXT: sh s10, 34(sp) -; ZHINX32-NEXT: sh s9, 32(sp) -; ZHINX32-NEXT: sh s8, 30(sp) -; ZHINX32-NEXT: sh s7, 28(sp) -; ZHINX32-NEXT: sh s6, 26(sp) -; ZHINX32-NEXT: sh s5, 24(sp) -; ZHINX32-NEXT: sh s4, 22(sp) -; ZHINX32-NEXT: sh s3, 20(sp) -; ZHINX32-NEXT: sh s2, 18(sp) -; ZHINX32-NEXT: sh s1, 16(sp) -; ZHINX32-NEXT: sh t6, 14(sp) -; ZHINX32-NEXT: sh t5, 12(sp) -; ZHINX32-NEXT: sh t4, 10(sp) -; ZHINX32-NEXT: sh t3, 8(sp) -; ZHINX32-NEXT: lh t3, 96(sp) -; ZHINX32-NEXT: lh t4, 100(sp) -; ZHINX32-NEXT: lh t5, 104(sp) -; ZHINX32-NEXT: lh t6, 108(sp) +; ZHINX32-NEXT: lh t3, 192(sp) +; ZHINX32-NEXT: lh t2, 196(sp) +; ZHINX32-NEXT: lh t1, 200(sp) +; ZHINX32-NEXT: lh t0, 204(sp) +; ZHINX32-NEXT: sh t0, 38(sp) +; ZHINX32-NEXT: sh t1, 36(sp) +; ZHINX32-NEXT: sh t2, 34(sp) +; ZHINX32-NEXT: sh t3, 32(sp) +; ZHINX32-NEXT: sh ra, 30(sp) +; ZHINX32-NEXT: sh s11, 28(sp) +; ZHINX32-NEXT: sh s10, 26(sp) +; ZHINX32-NEXT: sh s9, 24(sp) +; ZHINX32-NEXT: sh s8, 22(sp) +; ZHINX32-NEXT: sh s7, 20(sp) +; ZHINX32-NEXT: sh s6, 18(sp) +; ZHINX32-NEXT: sh s5, 16(sp) +; ZHINX32-NEXT: sh s4, 14(sp) +; ZHINX32-NEXT: sh s3, 12(sp) +; ZHINX32-NEXT: sh s2, 10(sp) +; ZHINX32-NEXT: sh s1, 8(sp) ; ZHINX32-NEXT: sh s0, 6(sp) -; ZHINX32-NEXT: sh t2, 4(sp) -; ZHINX32-NEXT: sh t1, 2(sp) -; ZHINX32-NEXT: sh t0, 0(sp) +; ZHINX32-NEXT: sh t4, 4(sp) +; ZHINX32-NEXT: sh t5, 2(sp) +; ZHINX32-NEXT: sh t6, 0(sp) +; ZHINX32-NEXT: lh t3, 58(sp) # 2-byte Folded Reload +; ZHINX32-NEXT: lh t4, 56(sp) # 2-byte Folded Reload +; ZHINX32-NEXT: lh t5, 54(sp) # 2-byte Folded Reload +; ZHINX32-NEXT: lh t6, 52(sp) # 2-byte Folded Reload ; ZHINX32-NEXT: call callee_half_32 -; ZHINX32-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s1, 84(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s2, 80(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s3, 76(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s4, 72(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s5, 68(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s6, 64(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s7, 60(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s8, 56(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s9, 52(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s10, 48(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s11, 44(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: addi sp, sp, 96 +; ZHINX32-NEXT: lw ra, 108(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s0, 104(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s1, 100(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s2, 96(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s3, 92(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s4, 88(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s5, 84(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s6, 80(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s7, 76(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s8, 72(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s9, 68(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s10, 64(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s11, 60(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: addi sp, sp, 112 ; ZHINX32-NEXT: ret ; ; ZHINX64-LABEL: caller_half_32: ; ZHINX64: # %bb.0: -; ZHINX64-NEXT: addi sp, sp, -144 -; ZHINX64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s1, 120(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s2, 112(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s3, 104(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s4, 96(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s5, 88(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s6, 80(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s7, 72(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s8, 64(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s9, 56(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s10, 48(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s11, 40(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: addi sp, sp, -160 +; ZHINX64-NEXT: sd ra, 152(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s0, 144(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s1, 136(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s2, 128(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s3, 120(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s4, 112(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s5, 104(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s6, 96(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s7, 88(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s8, 80(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s9, 72(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s10, 64(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s11, 56(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: lh t0, 160(sp) +; ZHINX64-NEXT: sh t0, 54(sp) # 2-byte Folded Spill +; ZHINX64-NEXT: lh t0, 168(sp) +; ZHINX64-NEXT: sh t0, 52(sp) # 2-byte Folded Spill ; ZHINX64-NEXT: lh t0, 176(sp) -; ZHINX64-NEXT: lh t1, 184(sp) -; ZHINX64-NEXT: lh t2, 192(sp) -; ZHINX64-NEXT: lh s0, 200(sp) -; ZHINX64-NEXT: lh t3, 208(sp) -; ZHINX64-NEXT: lh t4, 216(sp) -; ZHINX64-NEXT: lh t5, 224(sp) -; ZHINX64-NEXT: lh t6, 232(sp) -; ZHINX64-NEXT: lh s1, 240(sp) -; ZHINX64-NEXT: lh s2, 248(sp) -; ZHINX64-NEXT: lh s3, 256(sp) -; ZHINX64-NEXT: lh s4, 264(sp) -; ZHINX64-NEXT: lh s5, 272(sp) -; ZHINX64-NEXT: lh s6, 280(sp) -; ZHINX64-NEXT: lh s7, 288(sp) -; ZHINX64-NEXT: lh s8, 296(sp) -; ZHINX64-NEXT: lh s9, 304(sp) -; ZHINX64-NEXT: lh s10, 312(sp) -; ZHINX64-NEXT: lh s11, 320(sp) -; ZHINX64-NEXT: lh ra, 328(sp) -; ZHINX64-NEXT: sh ra, 38(sp) -; ZHINX64-NEXT: sh s11, 36(sp) -; ZHINX64-NEXT: sh s10, 34(sp) -; ZHINX64-NEXT: sh s9, 32(sp) -; ZHINX64-NEXT: sh s8, 30(sp) -; ZHINX64-NEXT: sh s7, 28(sp) -; ZHINX64-NEXT: sh s6, 26(sp) -; ZHINX64-NEXT: sh s5, 24(sp) -; ZHINX64-NEXT: sh s4, 22(sp) -; ZHINX64-NEXT: sh s3, 20(sp) -; ZHINX64-NEXT: sh s2, 18(sp) -; ZHINX64-NEXT: sh s1, 16(sp) -; ZHINX64-NEXT: sh t6, 14(sp) -; ZHINX64-NEXT: sh t5, 12(sp) -; ZHINX64-NEXT: sh t4, 10(sp) -; ZHINX64-NEXT: sh t3, 8(sp) -; ZHINX64-NEXT: lh t3, 144(sp) -; ZHINX64-NEXT: lh t4, 152(sp) -; ZHINX64-NEXT: lh t5, 160(sp) -; ZHINX64-NEXT: lh t6, 168(sp) +; ZHINX64-NEXT: sh t0, 50(sp) # 2-byte Folded Spill +; ZHINX64-NEXT: lh t0, 184(sp) +; ZHINX64-NEXT: sh t0, 48(sp) # 2-byte Folded Spill +; ZHINX64-NEXT: lh t6, 192(sp) +; ZHINX64-NEXT: lh t5, 200(sp) +; ZHINX64-NEXT: lh t4, 208(sp) +; ZHINX64-NEXT: lh s0, 216(sp) +; ZHINX64-NEXT: lh s1, 224(sp) +; ZHINX64-NEXT: lh s2, 232(sp) +; ZHINX64-NEXT: lh s3, 240(sp) +; ZHINX64-NEXT: lh s4, 248(sp) +; ZHINX64-NEXT: lh s5, 256(sp) +; ZHINX64-NEXT: lh s6, 264(sp) +; ZHINX64-NEXT: lh s7, 272(sp) +; ZHINX64-NEXT: lh s8, 280(sp) +; ZHINX64-NEXT: lh s9, 288(sp) +; ZHINX64-NEXT: lh s10, 296(sp) +; ZHINX64-NEXT: lh s11, 304(sp) +; ZHINX64-NEXT: lh ra, 312(sp) +; ZHINX64-NEXT: lh t3, 320(sp) +; ZHINX64-NEXT: lh t2, 328(sp) +; ZHINX64-NEXT: lh t1, 336(sp) +; ZHINX64-NEXT: lh t0, 344(sp) +; ZHINX64-NEXT: sh t0, 38(sp) +; ZHINX64-NEXT: sh t1, 36(sp) +; ZHINX64-NEXT: sh t2, 34(sp) +; ZHINX64-NEXT: sh t3, 32(sp) +; ZHINX64-NEXT: sh ra, 30(sp) +; ZHINX64-NEXT: sh s11, 28(sp) +; ZHINX64-NEXT: sh s10, 26(sp) +; ZHINX64-NEXT: sh s9, 24(sp) +; ZHINX64-NEXT: sh s8, 22(sp) +; ZHINX64-NEXT: sh s7, 20(sp) +; ZHINX64-NEXT: sh s6, 18(sp) +; ZHINX64-NEXT: sh s5, 16(sp) +; ZHINX64-NEXT: sh s4, 14(sp) +; ZHINX64-NEXT: sh s3, 12(sp) +; ZHINX64-NEXT: sh s2, 10(sp) +; ZHINX64-NEXT: sh s1, 8(sp) ; ZHINX64-NEXT: sh s0, 6(sp) -; ZHINX64-NEXT: sh t2, 4(sp) -; ZHINX64-NEXT: sh t1, 2(sp) -; ZHINX64-NEXT: sh t0, 0(sp) +; ZHINX64-NEXT: sh t4, 4(sp) +; ZHINX64-NEXT: sh t5, 2(sp) +; ZHINX64-NEXT: sh t6, 0(sp) +; ZHINX64-NEXT: lh t3, 54(sp) # 2-byte Folded Reload +; ZHINX64-NEXT: lh t4, 52(sp) # 2-byte Folded Reload +; ZHINX64-NEXT: lh t5, 50(sp) # 2-byte Folded Reload +; ZHINX64-NEXT: lh t6, 48(sp) # 2-byte Folded Reload ; ZHINX64-NEXT: call callee_half_32 -; ZHINX64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s1, 120(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s2, 112(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s3, 104(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s4, 96(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s6, 80(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s7, 72(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s8, 64(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s9, 56(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s10, 48(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s11, 40(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: addi sp, sp, 144 +; ZHINX64-NEXT: ld ra, 152(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s0, 144(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s1, 136(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s2, 128(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s3, 120(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s4, 112(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s5, 104(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s6, 96(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s7, 88(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s8, 80(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s9, 72(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s10, 64(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s11, 56(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: addi sp, sp, 160 ; ZHINX64-NEXT: ret ; ; ZFINX32-LABEL: caller_half_32: diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll index 27829f2b65759..4c2deafdc7e66 100644 --- a/llvm/test/CodeGen/RISCV/half-arith.ll +++ b/llvm/test/CodeGen/RISCV/half-arith.ll @@ -466,20 +466,26 @@ define half @fsgnj_h(half %a, half %b) nounwind { ; ; RV32IZHINXMIN-LABEL: fsgnj_h: ; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: # kill: def $x11_h killed $x11_h def $x11 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV32IZHINXMIN-NEXT: lui a2, 1048568 ; RV32IZHINXMIN-NEXT: and a1, a1, a2 ; RV32IZHINXMIN-NEXT: slli a0, a0, 17 ; RV32IZHINXMIN-NEXT: srli a0, a0, 17 ; RV32IZHINXMIN-NEXT: or a0, a0, a1 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: fsgnj_h: ; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: # kill: def $x11_h killed $x11_h def $x11 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV64IZHINXMIN-NEXT: lui a2, 1048568 ; RV64IZHINXMIN-NEXT: and a1, a1, a2 ; RV64IZHINXMIN-NEXT: slli a0, a0, 49 ; RV64IZHINXMIN-NEXT: srli a0, a0, 49 ; RV64IZHINXMIN-NEXT: or a0, a0, a1 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV64IZHINXMIN-NEXT: ret %1 = call half @llvm.copysign.f16(half %a, half %b) ret half %1 @@ -725,6 +731,7 @@ define half @fsgnjn_h(half %a, half %b) nounwind { ; ; RV32IZHINXMIN-LABEL: fsgnjn_h: ; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV32IZHINXMIN-NEXT: fcvt.s.h a1, a1 ; RV32IZHINXMIN-NEXT: fcvt.s.h a2, a0 ; RV32IZHINXMIN-NEXT: fadd.s a1, a2, a1 @@ -735,10 +742,12 @@ define half @fsgnjn_h(half %a, half %b) nounwind { ; RV32IZHINXMIN-NEXT: slli a0, a0, 17 ; RV32IZHINXMIN-NEXT: srli a0, a0, 17 ; RV32IZHINXMIN-NEXT: or a0, a0, a1 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: fsgnjn_h: ; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV64IZHINXMIN-NEXT: fcvt.s.h a1, a1 ; RV64IZHINXMIN-NEXT: fcvt.s.h a2, a0 ; RV64IZHINXMIN-NEXT: fadd.s a1, a2, a1 @@ -749,6 +758,7 @@ define half @fsgnjn_h(half %a, half %b) nounwind { ; RV64IZHINXMIN-NEXT: slli a0, a0, 49 ; RV64IZHINXMIN-NEXT: srli a0, a0, 49 ; RV64IZHINXMIN-NEXT: or a0, a0, a1 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV64IZHINXMIN-NEXT: ret %1 = fadd half %a, %b %2 = fneg half %1 @@ -1702,8 +1712,7 @@ define half @fnmadd_h_3(half %a, half %b, half %c) nounwind { ; CHECKIZHINX-LABEL: fnmadd_h_3: ; CHECKIZHINX: # %bb.0: ; CHECKIZHINX-NEXT: fmadd.h a0, a0, a1, a2 -; CHECKIZHINX-NEXT: lui a1, 1048568 -; CHECKIZHINX-NEXT: xor a0, a0, a1 +; CHECKIZHINX-NEXT: fneg.h a0, a0 ; CHECKIZHINX-NEXT: ret ; ; RV32I-LABEL: fnmadd_h_3: @@ -1798,6 +1807,7 @@ define half @fnmadd_h_3(half %a, half %b, half %c) nounwind { ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0 ; CHECKIZHINXMIN-NEXT: lui a1, 1048568 ; CHECKIZHINXMIN-NEXT: xor a0, a0, a1 +; CHECKIZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; CHECKIZHINXMIN-NEXT: ret %1 = call half @llvm.fma.f16(half %a, half %b, half %c) %neg = fneg half %1 @@ -1823,9 +1833,7 @@ define half @fnmadd_nsz(half %a, half %b, half %c) nounwind { ; ; CHECKIZHINX-LABEL: fnmadd_nsz: ; CHECKIZHINX: # %bb.0: -; CHECKIZHINX-NEXT: fmadd.h a0, a0, a1, a2 -; CHECKIZHINX-NEXT: lui a1, 1048568 -; CHECKIZHINX-NEXT: xor a0, a0, a1 +; CHECKIZHINX-NEXT: fnmadd.h a0, a0, a1, a2 ; CHECKIZHINX-NEXT: ret ; ; RV32I-LABEL: fnmadd_nsz: @@ -1920,6 +1928,7 @@ define half @fnmadd_nsz(half %a, half %b, half %c) nounwind { ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0 ; CHECKIZHINXMIN-NEXT: lui a1, 1048568 ; CHECKIZHINXMIN-NEXT: xor a0, a0, a1 +; CHECKIZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; CHECKIZHINXMIN-NEXT: ret %1 = call nsz half @llvm.fma.f16(half %a, half %b, half %c) %neg = fneg nsz half %1 @@ -2910,6 +2919,7 @@ define half @fsgnjx_f16(half %x, half %y) nounwind { ; ; CHECKIZHINXMIN-LABEL: fsgnjx_f16: ; CHECKIZHINXMIN: # %bb.0: +; CHECKIZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; CHECKIZHINXMIN-NEXT: lui a2, 1048568 ; CHECKIZHINXMIN-NEXT: and a0, a0, a2 ; CHECKIZHINXMIN-NEXT: li a2, 15 diff --git a/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll index 506b7027a8b35..e0c47bfac6fec 100644 --- a/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll +++ b/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll @@ -55,14 +55,12 @@ define half @fneg(half %a) nounwind { ; ; RV32IZHINX-LABEL: fneg: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: lui a1, 1048568 -; RV32IZHINX-NEXT: xor a0, a0, a1 +; RV32IZHINX-NEXT: fneg.h a0, a0 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: fneg: ; RV64IZHINX: # %bb.0: -; RV64IZHINX-NEXT: lui a1, 1048568 -; RV64IZHINX-NEXT: xor a0, a0, a1 +; RV64IZHINX-NEXT: fneg.h a0, a0 ; RV64IZHINX-NEXT: ret ; ; RV32IZFHMIN-LABEL: fneg: @@ -79,8 +77,10 @@ define half @fneg(half %a) nounwind { ; ; RVIZHINXMIN-LABEL: fneg: ; RVIZHINXMIN: # %bb.0: +; RVIZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RVIZHINXMIN-NEXT: lui a1, 1048568 ; RVIZHINXMIN-NEXT: xor a0, a0, a1 +; RVIZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RVIZHINXMIN-NEXT: ret %1 = fneg half %a ret half %1 @@ -115,14 +115,12 @@ define half @fabs(half %a) nounwind { ; ; RV32IZHINX-LABEL: fabs: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: slli a0, a0, 17 -; RV32IZHINX-NEXT: srli a0, a0, 17 +; RV32IZHINX-NEXT: fabs.h a0, a0 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: fabs: ; RV64IZHINX: # %bb.0: -; RV64IZHINX-NEXT: slli a0, a0, 49 -; RV64IZHINX-NEXT: srli a0, a0, 49 +; RV64IZHINX-NEXT: fabs.h a0, a0 ; RV64IZHINX-NEXT: ret ; ; RV32IZFHMIN-LABEL: fabs: @@ -139,14 +137,18 @@ define half @fabs(half %a) nounwind { ; ; RV32IZHINXMIN-LABEL: fabs: ; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV32IZHINXMIN-NEXT: slli a0, a0, 17 ; RV32IZHINXMIN-NEXT: srli a0, a0, 17 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: fabs: ; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV64IZHINXMIN-NEXT: slli a0, a0, 49 ; RV64IZHINXMIN-NEXT: srli a0, a0, 49 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV64IZHINXMIN-NEXT: ret %1 = call half @llvm.fabs.f16(half %a) ret half %1 @@ -227,22 +229,28 @@ define half @fcopysign_fneg(half %a, half %b) nounwind { ; ; RV32IZHINXMIN-LABEL: fcopysign_fneg: ; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: # kill: def $x11_h killed $x11_h def $x11 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV32IZHINXMIN-NEXT: not a1, a1 ; RV32IZHINXMIN-NEXT: lui a2, 1048568 ; RV32IZHINXMIN-NEXT: and a1, a1, a2 ; RV32IZHINXMIN-NEXT: slli a0, a0, 17 ; RV32IZHINXMIN-NEXT: srli a0, a0, 17 ; RV32IZHINXMIN-NEXT: or a0, a0, a1 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: fcopysign_fneg: ; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: # kill: def $x11_h killed $x11_h def $x11 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV64IZHINXMIN-NEXT: not a1, a1 ; RV64IZHINXMIN-NEXT: lui a2, 1048568 ; RV64IZHINXMIN-NEXT: and a1, a1, a2 ; RV64IZHINXMIN-NEXT: slli a0, a0, 49 ; RV64IZHINXMIN-NEXT: srli a0, a0, 49 ; RV64IZHINXMIN-NEXT: or a0, a0, a1 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV64IZHINXMIN-NEXT: ret %1 = fneg half %b %2 = call half @llvm.copysign.f16(half %a, half %1) diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll index e5585661ce79a..0c84a08f1fd45 100644 --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -5536,10 +5536,12 @@ define half @bitcast_h_i16(i16 %a) nounwind { ; ; CHECKIZHINX-LABEL: bitcast_h_i16: ; CHECKIZHINX: # %bb.0: +; CHECKIZHINX-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; CHECKIZHINX-NEXT: ret ; ; CHECKIZDINXZHINX-LABEL: bitcast_h_i16: ; CHECKIZDINXZHINX: # %bb.0: +; CHECKIZDINXZHINX-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; CHECKIZDINXZHINX-NEXT: ret ; ; RV32I-LABEL: bitcast_h_i16: @@ -5588,18 +5590,22 @@ define half @bitcast_h_i16(i16 %a) nounwind { ; ; CHECK32-IZHINXMIN-LABEL: bitcast_h_i16: ; CHECK32-IZHINXMIN: # %bb.0: +; CHECK32-IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; CHECK32-IZHINXMIN-NEXT: ret ; ; CHECK64-IZHINXMIN-LABEL: bitcast_h_i16: ; CHECK64-IZHINXMIN: # %bb.0: +; CHECK64-IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; CHECK64-IZHINXMIN-NEXT: ret ; ; CHECK32-IZDINXZHINXMIN-LABEL: bitcast_h_i16: ; CHECK32-IZDINXZHINXMIN: # %bb.0: +; CHECK32-IZDINXZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; CHECK32-IZDINXZHINXMIN-NEXT: ret ; ; CHECK64-IZDINXZHINXMIN-LABEL: bitcast_h_i16: ; CHECK64-IZDINXZHINXMIN: # %bb.0: +; CHECK64-IZDINXZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; CHECK64-IZDINXZHINXMIN-NEXT: ret %1 = bitcast i16 %a to half ret half %1 @@ -5623,10 +5629,12 @@ define i16 @bitcast_i16_h(half %a) nounwind { ; ; CHECKIZHINX-LABEL: bitcast_i16_h: ; CHECKIZHINX: # %bb.0: +; CHECKIZHINX-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; CHECKIZHINX-NEXT: ret ; ; CHECKIZDINXZHINX-LABEL: bitcast_i16_h: ; CHECKIZDINXZHINX: # %bb.0: +; CHECKIZDINXZHINX-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; CHECKIZDINXZHINX-NEXT: ret ; ; RV32I-LABEL: bitcast_i16_h: @@ -5667,18 +5675,22 @@ define i16 @bitcast_i16_h(half %a) nounwind { ; ; CHECK32-IZHINXMIN-LABEL: bitcast_i16_h: ; CHECK32-IZHINXMIN: # %bb.0: +; CHECK32-IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; CHECK32-IZHINXMIN-NEXT: ret ; ; CHECK64-IZHINXMIN-LABEL: bitcast_i16_h: ; CHECK64-IZHINXMIN: # %bb.0: +; CHECK64-IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; CHECK64-IZHINXMIN-NEXT: ret ; ; CHECK32-IZDINXZHINXMIN-LABEL: bitcast_i16_h: ; CHECK32-IZDINXZHINXMIN: # %bb.0: +; CHECK32-IZDINXZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; CHECK32-IZDINXZHINXMIN-NEXT: ret ; ; CHECK64-IZDINXZHINXMIN-LABEL: bitcast_i16_h: ; CHECK64-IZDINXZHINXMIN: # %bb.0: +; CHECK64-IZDINXZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; CHECK64-IZDINXZHINXMIN-NEXT: ret %1 = bitcast half %a to i16 ret i16 %1 diff --git a/llvm/test/CodeGen/RISCV/half-imm.ll b/llvm/test/CodeGen/RISCV/half-imm.ll index 2ebc28c2ebd44..1045df1c3e766 100644 --- a/llvm/test/CodeGen/RISCV/half-imm.ll +++ b/llvm/test/CodeGen/RISCV/half-imm.ll @@ -32,12 +32,14 @@ define half @half_imm() nounwind { ; RV32IZHINX: # %bb.0: ; RV32IZHINX-NEXT: lui a0, 4 ; RV32IZHINX-NEXT: addi a0, a0, 512 +; RV32IZHINX-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: half_imm: ; RV64IZHINX: # %bb.0: ; RV64IZHINX-NEXT: lui a0, 4 ; RV64IZHINX-NEXT: addiw a0, a0, 512 +; RV64IZHINX-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV64IZHINX-NEXT: ret ; ; CHECKIZFHMIN-LABEL: half_imm: @@ -50,12 +52,14 @@ define half @half_imm() nounwind { ; RV32IZHINXMIN: # %bb.0: ; RV32IZHINXMIN-NEXT: lui a0, 4 ; RV32IZHINXMIN-NEXT: addi a0, a0, 512 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: half_imm: ; RV64IZHINXMIN: # %bb.0: ; RV64IZHINXMIN-NEXT: lui a0, 4 ; RV64IZHINXMIN-NEXT: addiw a0, a0, 512 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV64IZHINXMIN-NEXT: ret ret half 3.0 } diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll index 3e0f838270aa5..81e29329e7181 100644 --- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll @@ -1797,17 +1797,10 @@ define half @fabs_f16(half %a) nounwind { ; CHECKIZFH-NEXT: fabs.h fa0, fa0 ; CHECKIZFH-NEXT: ret ; -; RV32IZHINX-LABEL: fabs_f16: -; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: slli a0, a0, 17 -; RV32IZHINX-NEXT: srli a0, a0, 17 -; RV32IZHINX-NEXT: ret -; -; RV64IZHINX-LABEL: fabs_f16: -; RV64IZHINX: # %bb.0: -; RV64IZHINX-NEXT: slli a0, a0, 49 -; RV64IZHINX-NEXT: srli a0, a0, 49 -; RV64IZHINX-NEXT: ret +; CHECKIZHINX-LABEL: fabs_f16: +; CHECKIZHINX: # %bb.0: +; CHECKIZHINX-NEXT: fabs.h a0, a0 +; CHECKIZHINX-NEXT: ret ; ; RV32I-LABEL: fabs_f16: ; RV32I: # %bb.0: @@ -1839,14 +1832,18 @@ define half @fabs_f16(half %a) nounwind { ; ; RV32IZHINXMIN-LABEL: fabs_f16: ; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV32IZHINXMIN-NEXT: slli a0, a0, 17 ; RV32IZHINXMIN-NEXT: srli a0, a0, 17 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: fabs_f16: ; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV64IZHINXMIN-NEXT: slli a0, a0, 49 ; RV64IZHINXMIN-NEXT: srli a0, a0, 49 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV64IZHINXMIN-NEXT: ret %1 = call half @llvm.fabs.f16(half %a) ret half %1 @@ -2094,20 +2091,26 @@ define half @copysign_f16(half %a, half %b) nounwind { ; ; RV32IZHINXMIN-LABEL: copysign_f16: ; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: # kill: def $x11_h killed $x11_h def $x11 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV32IZHINXMIN-NEXT: lui a2, 1048568 ; RV32IZHINXMIN-NEXT: and a1, a1, a2 ; RV32IZHINXMIN-NEXT: slli a0, a0, 17 ; RV32IZHINXMIN-NEXT: srli a0, a0, 17 ; RV32IZHINXMIN-NEXT: or a0, a0, a1 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: copysign_f16: ; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: # kill: def $x11_h killed $x11_h def $x11 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV64IZHINXMIN-NEXT: lui a2, 1048568 ; RV64IZHINXMIN-NEXT: and a1, a1, a2 ; RV64IZHINXMIN-NEXT: slli a0, a0, 49 ; RV64IZHINXMIN-NEXT: srli a0, a0, 49 ; RV64IZHINXMIN-NEXT: or a0, a0, a1 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV64IZHINXMIN-NEXT: ret %1 = call half @llvm.copysign.f16(half %a, half %b) ret half %1 @@ -2835,6 +2838,7 @@ define i1 @isnan_d_fpclass(half %x) { ; ; RV32IZHINXMIN-LABEL: isnan_d_fpclass: ; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV32IZHINXMIN-NEXT: slli a0, a0, 17 ; RV32IZHINXMIN-NEXT: srli a0, a0, 17 ; RV32IZHINXMIN-NEXT: li a1, 31 @@ -2844,6 +2848,7 @@ define i1 @isnan_d_fpclass(half %x) { ; ; RV64IZHINXMIN-LABEL: isnan_d_fpclass: ; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV64IZHINXMIN-NEXT: slli a0, a0, 49 ; RV64IZHINXMIN-NEXT: srli a0, a0, 49 ; RV64IZHINXMIN-NEXT: li a1, 31 diff --git a/llvm/test/CodeGen/RISCV/kcfi-mir.ll b/llvm/test/CodeGen/RISCV/kcfi-mir.ll index 9d8475e2171ea..e478930d59abc 100644 --- a/llvm/test/CodeGen/RISCV/kcfi-mir.ll +++ b/llvm/test/CodeGen/RISCV/kcfi-mir.ll @@ -10,7 +10,7 @@ define void @f1(ptr noundef %x) !kcfi_type !1 { ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 ; CHECK-NEXT: SD killed $x1, $x2, 8 :: (store (s64) into %stack.0) ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 - ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x7, implicit-def $x28, implicit-def $x29, implicit-def $x30, implicit-def $x31, implicit-def dead $x1, implicit-def $x2, implicit killed $x10 { + ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x6_h, implicit-def $x7, implicit-def $x7_h, implicit-def $x28, implicit-def $x28_h, implicit-def $x29, implicit-def $x29_h, implicit-def $x30, implicit-def $x30_h, implicit-def $x31, implicit-def $x31_h, implicit-def dead $x1, implicit-def $x2, implicit-def $x2_h, implicit killed $x10 { ; CHECK-NEXT: KCFI_CHECK $x10, 12345678, implicit-def $x6, implicit-def $x7, implicit-def $x28, implicit-def $x29, implicit-def $x30, implicit-def $x31 ; CHECK-NEXT: PseudoCALLIndirect killed $x10, csr_ilp32_lp64, implicit-def dead $x1, implicit-def $x2 ; CHECK-NEXT: } @@ -26,7 +26,7 @@ define void @f2(ptr noundef %x) #0 { ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $x10 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x7, implicit-def $x28, implicit-def $x29, implicit-def $x30, implicit-def $x31, implicit killed $x10, implicit $x2 { + ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x6_h, implicit-def $x7, implicit-def $x7_h, implicit-def $x28, implicit-def $x28_h, implicit-def $x29, implicit-def $x29_h, implicit-def $x30, implicit-def $x30_h, implicit-def $x31, implicit-def $x31_h, implicit killed $x10, implicit $x2 { ; CHECK-NEXT: KCFI_CHECK $x10, 12345678, implicit-def $x6, implicit-def $x7, implicit-def $x28, implicit-def $x29, implicit-def $x30, implicit-def $x31 ; CHECK-NEXT: PseudoTAILIndirect killed $x10, implicit $x2 ; CHECK-NEXT: } diff --git a/llvm/test/CodeGen/RISCV/make-compressible-zbc-zhinx.mir b/llvm/test/CodeGen/RISCV/make-compressible-zbc-zhinx.mir new file mode 100644 index 0000000000000..45fcc792d2fca --- /dev/null +++ b/llvm/test/CodeGen/RISCV/make-compressible-zbc-zhinx.mir @@ -0,0 +1,249 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - %s -mtriple=riscv32 -mattr=+zcb,+zhinx -simplify-mir \ +# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=CHECK %s +# RUN: llc -o - %s -mtriple=riscv64 -mattr=+zcb,+zhinx -simplify-mir \ +# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=CHECK %s + +--- | + define void @store_common_value_half(ptr %a, ptr %b, ptr %c) #0 { + entry: + store half 0.0, ptr %a, align 2 + store half 0.0, ptr %b, align 2 + store half 0.0, ptr %c, align 2 + ret void + } + + define void @store_common_ptr_half(ptr %p) #0 { + entry: + store volatile half 2.0, ptr %p, align 2 + store volatile half 32.0, ptr %p, align 2 + store volatile half 512.0, ptr %p, align 2 + ret void + } + + define void @load_common_ptr_half(ptr %p) #0 { + entry: + %0 = load volatile half, ptr %p, align 2 + %1 = load volatile half, ptr %p, align 2 + %2 = load volatile half, ptr %p, align 2 + ret void + } + + define void @store_large_offset_half(ptr %p) #0 { + entry: + %0 = getelementptr inbounds half, ptr %p, i32 100 + store volatile half 2.0, ptr %0, align 2 + %1 = getelementptr inbounds half, ptr %p, i32 101 + store volatile half 32.0, ptr %1, align 2 + %2 = getelementptr inbounds half, ptr %p, i32 102 + store volatile half 512.0, ptr %2, align 2 + %3 = getelementptr inbounds half, ptr %p, i32 103 + store volatile half 16384.0, ptr %3, align 2 + ret void + } + + define void @load_large_offset_half(ptr %p) #0 { + entry: + %0 = getelementptr inbounds half, ptr %p, i32 100 + %a = load volatile half, ptr %0, align 2 + %1 = getelementptr inbounds half, ptr %p, i32 100 + %b = load volatile half, ptr %1, align 2 + %2 = getelementptr inbounds half, ptr %p, i32 101 + %c = load volatile half, ptr %2, align 2 + %3 = getelementptr inbounds half, ptr %p, i32 101 + %d = load volatile half, ptr %3, align 2 + ret void + } + + define void @store_large_offset_no_opt_half(ptr %p) #0 { + entry: + %0 = getelementptr inbounds i8, ptr %p, i8 100 + store volatile half 2.0, ptr %0, align 2 + %1 = getelementptr inbounds i8, ptr %p, i8 101 + store volatile half 32.0, ptr %1, align 2 + %2 = getelementptr inbounds i8, ptr %p, i8 104 + store volatile half 512.0, ptr %2, align 2 + ret void + } + + define void @load_large_offset_no_opt_half(ptr %p) #0 { + entry: + %0 = getelementptr inbounds half, ptr %p, i32 100 + %a = load volatile half, ptr %0, align 2 + %1 = getelementptr inbounds half, ptr %p, i32 101 + %c = load volatile half, ptr %1, align 2 + %2 = getelementptr inbounds half, ptr %p, i32 102 + %d = load volatile half, ptr %2, align 2 + ret void + } + + attributes #0 = { minsize } + +... +--- +name: store_common_value_half +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11, $x12 + + ; CHECK-LABEL: name: store_common_value_half + ; CHECK: liveins: $x10, $x11, $x12 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x13_h = PseudoMV_FPR16INX $x0_h + ; CHECK-NEXT: SH_INX $x13_h, killed renamable $x10, 0 :: (store (s16) into %ir.a) + ; CHECK-NEXT: SH_INX $x13_h, killed renamable $x11, 0 :: (store (s16) into %ir.b) + ; CHECK-NEXT: SH_INX $x13_h, killed renamable $x12, 0 :: (store (s16) into %ir.c) + ; CHECK-NEXT: PseudoRET + SH_INX $x0_h, killed renamable $x10, 0 :: (store (s16) into %ir.a) + SH_INX $x0_h, killed renamable $x11, 0 :: (store (s16) into %ir.b) + SH_INX $x0_h, killed renamable $x12, 0 :: (store (s16) into %ir.c) + PseudoRET + +... +--- +name: store_common_ptr_half +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; CHECK-LABEL: name: store_common_ptr_half + ; CHECK: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x10 = LUI 4 + ; CHECK-NEXT: $x11 = ADDI $x16, 0 + ; CHECK-NEXT: SH_INX killed renamable $x10_h, $x11, 0 :: (volatile store (s16) into %ir.p) + ; CHECK-NEXT: renamable $x10 = LUI 5 + ; CHECK-NEXT: SH_INX killed renamable $x10_h, $x11, 0 :: (volatile store (s16) into %ir.p) + ; CHECK-NEXT: renamable $x10 = LUI 6 + ; CHECK-NEXT: SH_INX killed renamable $x10_h, killed $x11, 0 :: (volatile store (s16) into %ir.p) + ; CHECK-NEXT: PseudoRET + renamable $x10 = LUI 4 + SH_INX killed renamable $x10_h, renamable $x16, 0 :: (volatile store (s16) into %ir.p) + renamable $x10 = LUI 5 + SH_INX killed renamable $x10_h, renamable $x16, 0 :: (volatile store (s16) into %ir.p) + renamable $x10 = LUI 6 + SH_INX killed renamable $x10_h, killed renamable $x16, 0 :: (volatile store (s16) into %ir.p) + PseudoRET + +... +--- +name: load_common_ptr_half +body: | + bb.0.entry: + liveins: $x16 + + ; CHECK-LABEL: name: load_common_ptr_half + ; CHECK: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x11 = ADDI $x16, 0 + ; CHECK-NEXT: dead $x10_h = LH_INX $x11, 0 :: (volatile load (s16) from %ir.p) + ; CHECK-NEXT: dead $x10_h = LH_INX $x11, 0 :: (volatile load (s16) from %ir.p) + ; CHECK-NEXT: dead $x10_h = LH_INX killed $x11, 0 :: (volatile load (s16) from %ir.p) + ; CHECK-NEXT: PseudoRET + dead $x10_h = LH_INX renamable $x16, 0 :: (volatile load (s16) from %ir.p) + dead $x10_h = LH_INX renamable $x16, 0 :: (volatile load (s16) from %ir.p) + dead $x10_h = LH_INX killed renamable $x16, 0 :: (volatile load (s16) from %ir.p) + PseudoRET + +... +--- +name: store_large_offset_half +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + ; CHECK-LABEL: name: store_large_offset_half + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x11 = LUI 4 + ; CHECK-NEXT: $x12 = ADDI $x10, 200 + ; CHECK-NEXT: SH_INX killed renamable $x11_h, $x12, 0 :: (volatile store (s16) into %ir.0) + ; CHECK-NEXT: renamable $x11 = LUI 5 + ; CHECK-NEXT: SH_INX killed renamable $x11_h, $x12, 0 :: (volatile store (s16) into %ir.1) + ; CHECK-NEXT: renamable $x11 = LUI 6 + ; CHECK-NEXT: SH_INX killed renamable $x11_h, $x12, 2 :: (volatile store (s16) into %ir.2) + ; CHECK-NEXT: renamable $x11 = LUI 7 + ; CHECK-NEXT: SH_INX killed renamable $x11_h, killed $x12, 2 :: (volatile store (s16) into %ir.3) + ; CHECK-NEXT: PseudoRET + renamable $x11 = LUI 4 + SH_INX killed renamable $x11_h, renamable $x10, 200 :: (volatile store (s16) into %ir.0) + renamable $x11 = LUI 5 + SH_INX killed renamable $x11_h, renamable $x10, 200 :: (volatile store (s16) into %ir.1) + renamable $x11 = LUI 6 + SH_INX killed renamable $x11_h, renamable $x10, 202 :: (volatile store (s16) into %ir.2) + renamable $x11 = LUI 7 + SH_INX killed renamable $x11_h, killed renamable $x10, 202 :: (volatile store (s16) into %ir.3) + PseudoRET + +... +--- +name: load_large_offset_half +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; CHECK-LABEL: name: load_large_offset_half + ; CHECK: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x11 = ADDI $x16, 100 + ; CHECK-NEXT: dead $x10_h = LH_INX $x11, 0 :: (volatile load (s16) from %ir.0) + ; CHECK-NEXT: dead $x10_h = LH_INX $x11, 0 :: (volatile load (s16) from %ir.1) + ; CHECK-NEXT: dead $x10_h = LH_INX $x11, 2 :: (volatile load (s16) from %ir.2) + ; CHECK-NEXT: dead $x10_h = LH_INX killed $x11, 2 :: (volatile load (s16) from %ir.3) + ; CHECK-NEXT: PseudoRET + dead $x10_h = LH_INX renamable $x16, 100 :: (volatile load (s16) from %ir.0) + dead $x10_h = LH_INX renamable $x16, 100 :: (volatile load (s16) from %ir.1) + dead $x10_h = LH_INX renamable $x16, 102 :: (volatile load (s16) from %ir.2) + dead $x10_h = LH_INX killed renamable $x16, 102 :: (volatile load (s16) from %ir.3) + PseudoRET + +... +--- +name: store_large_offset_no_opt_half +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; CHECK-LABEL: name: store_large_offset_no_opt_half + ; CHECK: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x11 = LUI 4 + ; CHECK-NEXT: SH_INX killed renamable $x11_h, renamable $x16, 200 :: (volatile store (s16) into %ir.0) + ; CHECK-NEXT: renamable $x11 = LUI 5 + ; CHECK-NEXT: SH_INX killed renamable $x11_h, renamable $x16, 202 :: (volatile store (s16) into %ir.1) + ; CHECK-NEXT: renamable $x11 = LUI 6 + ; CHECK-NEXT: SH_INX killed renamable $x11_h, renamable $x16, 204 :: (volatile store (s16) into %ir.2) + ; CHECK-NEXT: PseudoRET + renamable $x11 = LUI 4 + SH_INX killed renamable $x11_h, renamable $x16, 200 :: (volatile store (s16) into %ir.0) + renamable $x11 = LUI 5 + SH_INX killed renamable $x11_h, renamable $x16, 202 :: (volatile store (s16) into %ir.1) + renamable $x11 = LUI 6 + SH_INX killed renamable $x11_h, renamable $x16, 204 :: (volatile store (s16) into %ir.2) + PseudoRET + +... +--- +name: load_large_offset_no_opt_half +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; CHECK-LABEL: name: load_large_offset_no_opt_half + ; CHECK: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $x10_h = LH_INX renamable $x16, 100 :: (volatile load (s8) from %ir.0) + ; CHECK-NEXT: dead $x10_h = LH_INX renamable $x16, 102 :: (volatile load (s8) from %ir.1) + ; CHECK-NEXT: dead $x10_h = LH_INX killed renamable $x16, 104 :: (volatile load (s8) from %ir.2) + ; CHECK-NEXT: PseudoRET + dead $x10_h = LH_INX renamable $x16, 100 :: (volatile load (s8) from %ir.0) + dead $x10_h = LH_INX renamable $x16, 102 :: (volatile load (s8) from %ir.1) + dead $x10_h = LH_INX killed renamable $x16, 104 :: (volatile load (s8) from %ir.2) + PseudoRET + +...